Back to index...
/*
 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
package com.sun.java.util.jar.pack;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import static com.sun.java.util.jar.pack.Constants.*;
/**
 * Population-based coding.
 * See the section "Encodings of Uncorrelated Values" in the Pack200 spec.
 * @author John Rose
 */
// This tactic alone reduces the final zipped rt.jar by about a percent.
class PopulationCoding implements CodingMethod {
    Histogram vHist;   // histogram of all values
    int[]     fValues; // list of favored values
    int       fVlen;   // inclusive max index
    long[]    symtab;  // int map of favored value -> token [1..#fValues]
    CodingMethod favoredCoding;
    CodingMethod tokenCoding;
    CodingMethod unfavoredCoding;
    int L = -1;  //preferred L value for tokenCoding
    public void setFavoredValues(int[] fValues, int fVlen) {
        // Note:  {f} is allFavoredValues[1..fvlen], not [0..fvlen-1].
        // This is because zero is an exceptional favored value index.
        assert(fValues[0] == 0);  // must be empty
        assert(this.fValues == null);  // do not do this twice
        this.fValues = fValues;
        this.fVlen   = fVlen;
        if (L >= 0) {
            setL(L);  // reassert
        }
    }
    public void setFavoredValues(int[] fValues) {
        int lfVlen = fValues.length-1;
        setFavoredValues(fValues, lfVlen);
    }
    public void setHistogram(Histogram vHist) {
        this.vHist = vHist;
    }
    public void setL(int L) {
        this.L = L;
        if (L >= 0 && fValues != null && tokenCoding == null) {
            tokenCoding = fitTokenCoding(fVlen, L);
            assert(tokenCoding != null);
        }
    }
    public static Coding fitTokenCoding(int fVlen, int L) {
        // Find the smallest B s.t. (B,H,0) covers fVlen.
        if (fVlen < 256)
            // H/L do not matter when B==1
            return BandStructure.BYTE1;
        Coding longest = BandStructure.UNSIGNED5.setL(L);
        if (!longest.canRepresentUnsigned(fVlen))
            return null;  // failure; L is too sharp and fVlen too large
        Coding tc = longest;
        for (Coding shorter = longest; ; ) {
            shorter = shorter.setB(shorter.B()-1);
            if (shorter.umax() < fVlen)
                break;
            tc = shorter;  // shorten it by reducing B
        }
        return tc;
    }
    public void setFavoredCoding(CodingMethod favoredCoding) {
        this.favoredCoding = favoredCoding;
    }
    public void setTokenCoding(CodingMethod tokenCoding) {
        this.tokenCoding = tokenCoding;
        this.L = -1;
        if (tokenCoding instanceof Coding && fValues != null) {
            Coding tc = (Coding) tokenCoding;
            if (tc == fitTokenCoding(fVlen, tc.L()))
                this.L = tc.L();
            // Otherwise, it's a non-default coding.
        }
    }
    public void setUnfavoredCoding(CodingMethod unfavoredCoding) {
        this.unfavoredCoding = unfavoredCoding;
    }
    public int favoredValueMaxLength() {
        if (L == 0)
            return Integer.MAX_VALUE;
        else
            return BandStructure.UNSIGNED5.setL(L).umax();
    }
    public void resortFavoredValues() {
        Coding tc = (Coding) tokenCoding;
        // Make a local copy before reordering.
        fValues = BandStructure.realloc(fValues, 1+fVlen);
        // Resort favoredValues within each byte-size cadre.
        int fillp = 1;  // skip initial zero
        for (int n = 1; n <= tc.B(); n++) {
            int nmax = tc.byteMax(n);
            if (nmax > fVlen)
                nmax = fVlen;
            if (nmax < tc.byteMin(n))
                break;
            int low = fillp;
            int high = nmax+1;
            if (high == low)  continue;
            assert(high > low)
                : high+"!>"+low;
            assert(tc.getLength(low) == n)
                : n+" != len("+(low)+") == "+
                  tc.getLength(low);
            assert(tc.getLength(high-1) == n)
                : n+" != len("+(high-1)+") == "+
                  tc.getLength(high-1);
            int midTarget = low + (high-low)/2;
            int mid = low;
            // Divide the values into cadres, and sort within each.
            int prevCount = -1;
            int prevLimit = low;
            for (int i = low; i < high; i++) {
                int val = fValues[i];
                int count = vHist.getFrequency(val);
                if (prevCount != count) {
                    if (n == 1) {
                        // For the single-byte encoding, keep strict order
                        // among frequency groups.
                        Arrays.sort(fValues, prevLimit, i);
                    } else if (Math.abs(mid - midTarget) >
                               Math.abs(i   - midTarget)) {
                        // Find a single inflection point
                        // close to the middle of the byte-size cadre.
                        mid = i;
                    }
                    prevCount = count;
                    prevLimit = i;
                }
            }
            if (n == 1) {
                Arrays.sort(fValues, prevLimit, high);
            } else {
                // Sort up to the midpoint, if any.
                Arrays.sort(fValues, low, mid);
                Arrays.sort(fValues, mid, high);
            }
            assert(tc.getLength(low) == tc.getLength(mid));
            assert(tc.getLength(low) == tc.getLength(high-1));
            fillp = nmax+1;
        }
        assert(fillp == fValues.length);
        // Reset symtab.
        symtab = null;
    }
    public int getToken(int value) {
        if (symtab == null)
            symtab = makeSymtab();
        int pos = Arrays.binarySearch(symtab, (long)value << 32);
        if (pos < 0)  pos = -pos-1;
        if (pos < symtab.length && value == (int)(symtab[pos] >>> 32))
            return (int)symtab[pos];
        else
            return 0;
    }
    public int[][] encodeValues(int[] values, int start, int end) {
        // Compute token sequence.
        int[] tokens = new int[end-start];
        int nuv = 0;
        for (int i = 0; i < tokens.length; i++) {
            int val = values[start+i];
            int tok = getToken(val);
            if (tok != 0)
                tokens[i] = tok;
            else
                nuv += 1;
        }
        // Compute unfavored value sequence.
        int[] unfavoredValues = new int[nuv];
        nuv = 0;  // reset
        for (int i = 0; i < tokens.length; i++) {
            if (tokens[i] != 0)  continue;  // already covered
            int val = values[start+i];
            unfavoredValues[nuv++] = val;
        }
        assert(nuv == unfavoredValues.length);
        return new int[][]{ tokens, unfavoredValues };
    }
    private long[] makeSymtab() {
        long[] lsymtab = new long[fVlen];
        for (int token = 1; token <= fVlen; token++) {
            lsymtab[token-1] = ((long)fValues[token] << 32) | token;
        }
        // Index by value:
        Arrays.sort(lsymtab);
        return lsymtab;
    }
    private Coding getTailCoding(CodingMethod c) {
        while (c instanceof AdaptiveCoding)
            c = ((AdaptiveCoding)c).tailCoding;
        return (Coding) c;
    }
    // CodingMethod methods.
    public void writeArrayTo(OutputStream out, int[] a, int start, int end) throws IOException {
        int[][] vals = encodeValues(a, start, end);
        writeSequencesTo(out, vals[0], vals[1]);
    }
    void writeSequencesTo(OutputStream out, int[] tokens, int[] uValues) throws IOException {
        favoredCoding.writeArrayTo(out, fValues, 1, 1+fVlen);
        getTailCoding(favoredCoding).writeTo(out, computeSentinelValue());
        tokenCoding.writeArrayTo(out, tokens, 0, tokens.length);
        if (uValues.length > 0)
            unfavoredCoding.writeArrayTo(out, uValues, 0, uValues.length);
    }
   int computeSentinelValue() {
        Coding fc = getTailCoding(favoredCoding);
        if (fc.isDelta()) {
            // repeat the last favored value, using delta=0
            return 0;
        } else {
            // else repeat the shorter of the min or last value
            int min = fValues[1];
            int last = min;
            // (remember that fVlen is an inclusive limit in fValues)
            for (int i = 2; i <= fVlen; i++) {
                last = fValues[i];
                min = moreCentral(min, last);
            }
            int endVal;
            if (fc.getLength(min) <= fc.getLength(last))
                return min;
            else
                return last;
        }
   }
    public void readArrayFrom(InputStream in, int[] a, int start, int end) throws IOException {
        // Parameters are fCode, L, uCode.
        setFavoredValues(readFavoredValuesFrom(in, end-start));
        // Read the tokens.  Read them into the final array, for the moment.
        tokenCoding.readArrayFrom(in, a, start, end);
        // Decode the favored tokens.
        int headp = 0, tailp = -1;
        int uVlen = 0;
        for (int i = start; i < end; i++) {
            int tok = a[i];
            if (tok == 0) {
                // Make a linked list, and decode in a second pass.
                if (tailp < 0) {
                    headp = i;
                } else {
                    a[tailp] = i;
                }
                tailp = i;
                uVlen += 1;
            } else {
                a[i] = fValues[tok];
            }
        }
        // Walk the linked list of "zero" locations, decoding unfavored vals.
        int[] uValues = new int[uVlen];
        if (uVlen > 0)
            unfavoredCoding.readArrayFrom(in, uValues, 0, uVlen);
        for (int i = 0; i < uVlen; i++) {
            int nextp = a[headp];
            a[headp] = uValues[i];
            headp = nextp;
        }
    }
    int[] readFavoredValuesFrom(InputStream in, int maxForDebug) throws IOException {
        int[] lfValues = new int[1000];  // realloc as needed
        // The set uniqueValuesForDebug records all favored values.
        // As each new value is added, we assert that the value
        // was not already in the set.
        Set<Integer> uniqueValuesForDebug = null;
        assert((uniqueValuesForDebug = new HashSet<>()) != null);
        int fillp = 1;
        maxForDebug += fillp;
        int min = Integer.MIN_VALUE;  // farthest from the center
        //int min2 = Integer.MIN_VALUE;  // emulate buggy 150.7 spec.
        int last = 0;
        CodingMethod fcm = favoredCoding;
        while (fcm instanceof AdaptiveCoding) {
            AdaptiveCoding ac = (AdaptiveCoding) fcm;
            int len = ac.headLength;
            while (fillp + len > lfValues.length) {
                lfValues = BandStructure.realloc(lfValues);
            }
            int newFillp = fillp + len;
            ac.headCoding.readArrayFrom(in, lfValues, fillp, newFillp);
            while (fillp < newFillp) {
                int val = lfValues[fillp++];
                assert(uniqueValuesForDebug.add(val));
                assert(fillp <= maxForDebug);
                last = val;
                min = moreCentral(min, val);
                //min2 = moreCentral2(min2, val, min);
            }
            fcm = ac.tailCoding;
        }
        Coding fc = (Coding) fcm;
        if (fc.isDelta()) {
            for (long state = 0;;) {
                // Read a new value:
                state += fc.readFrom(in);
                int val;
                if (fc.isSubrange())
                    val = fc.reduceToUnsignedRange(state);
                else
                    val = (int)state;
                state = val;
                if (fillp > 1 && (val == last || val == min)) //|| val == min2
                    break;
                if (fillp == lfValues.length)
                    lfValues = BandStructure.realloc(lfValues);
                lfValues[fillp++] = val;
                assert(uniqueValuesForDebug.add(val));
                assert(fillp <= maxForDebug);
                last = val;
                min = moreCentral(min, val);
                //min2 = moreCentral(min2, val);
            }
        } else {
            for (;;) {
                int val = fc.readFrom(in);
                if (fillp > 1 && (val == last || val == min)) //|| val == min2
                    break;
                if (fillp == lfValues.length)
                    lfValues = BandStructure.realloc(lfValues);
                lfValues[fillp++] = val;
                assert(uniqueValuesForDebug.add(val));
                assert(fillp <= maxForDebug);
                last = val;
                min = moreCentral(min, val);
                //min2 = moreCentral2(min2, val, min);
            }
        }
        return BandStructure.realloc(lfValues, fillp);
    }
    private static int moreCentral(int x, int y) {
        int kx = (x >> 31) ^ (x << 1);
        int ky = (y >> 31) ^ (y << 1);
        // bias kx/ky to get an unsigned comparison:
        kx -= Integer.MIN_VALUE;
        ky -= Integer.MIN_VALUE;
        int xy = (kx < ky? x: y);
        // assert that this ALU-ish version is the same:
        assert(xy == moreCentralSlow(x, y));
        return xy;
    }
//  private static int moreCentral2(int x, int y, int min) {
//      // Strict implementation of buggy 150.7 specification.
//      // The bug is that the spec. says absolute-value ties are broken
//      // in favor of positive numbers, but the suggested implementation
//      // (also mentioned in the spec.) breaks ties in favor of negatives.
//      if (x + y == 0)  return (x > y? x : y);
//      return min;
//  }
    private static int moreCentralSlow(int x, int y) {
        int ax = x;
        if (ax < 0)  ax = -ax;
        if (ax < 0)  return y;  //x is MIN_VALUE
        int ay = y;
        if (ay < 0)  ay = -ay;
        if (ay < 0)  return x;  //y is MIN_VALUE
        if (ax < ay)  return x;
        if (ax > ay)  return y;
        // At this point the absolute values agree, and the negative wins.
        return x < y ? x : y;
    }
    static final int[] LValuesCoded
        = { -1, 4, 8, 16, 32, 64, 128, 192, 224, 240, 248, 252 };
    public byte[] getMetaCoding(Coding dflt) {
        int K = fVlen;
        int LCoded = 0;
        if (tokenCoding instanceof Coding) {
            Coding tc = (Coding) tokenCoding;
            if (tc.B() == 1) {
                LCoded = 1;
            } else if (L >= 0) {
                assert(L == tc.L());
                for (int i = 1; i < LValuesCoded.length; i++) {
                    if (LValuesCoded[i] == L) { LCoded = i; break; }
                }
            }
        }
        CodingMethod tokenDflt = null;
        if (LCoded != 0 && tokenCoding == fitTokenCoding(fVlen, L)) {
            // A simple L value is enough to recover the tokenCoding.
            tokenDflt = tokenCoding;
        }
        int FDef = (favoredCoding == dflt)?1:0;
        int UDef = (unfavoredCoding == dflt || unfavoredCoding == null)?1:0;
        int TDef = (tokenCoding == tokenDflt)?1:0;
        int TDefL = (TDef == 1) ? LCoded : 0;
        assert(TDef == ((TDefL>0)?1:0));
        ByteArrayOutputStream bytes = new ByteArrayOutputStream(10);
        bytes.write(_meta_pop + FDef + 2*UDef + 4*TDefL);
        try {
            if (FDef == 0)  bytes.write(favoredCoding.getMetaCoding(dflt));
            if (TDef == 0)  bytes.write(tokenCoding.getMetaCoding(dflt));
            if (UDef == 0)  bytes.write(unfavoredCoding.getMetaCoding(dflt));
        } catch (IOException ee) {
            throw new RuntimeException(ee);
        }
        return bytes.toByteArray();
    }
    public static int parseMetaCoding(byte[] bytes, int pos, Coding dflt, CodingMethod res[]) {
        int op = bytes[pos++] & 0xFF;
        if (op < _meta_pop || op >= _meta_limit)  return pos-1; // backup
        op -= _meta_pop;
        int FDef = op % 2;
        int UDef = (op / 2) % 2;
        int TDefL = (op / 4);
        int TDef = (TDefL > 0)?1:0;
        int L = LValuesCoded[TDefL];
        CodingMethod[] FCode = {dflt}, TCode = {null}, UCode = {dflt};
        if (FDef == 0)
            pos = BandStructure.parseMetaCoding(bytes, pos, dflt, FCode);
        if (TDef == 0)
            pos = BandStructure.parseMetaCoding(bytes, pos, dflt, TCode);
        if (UDef == 0)
            pos = BandStructure.parseMetaCoding(bytes, pos, dflt, UCode);
        PopulationCoding pop = new PopulationCoding();
        pop.L = L;  // might be -1
        pop.favoredCoding   = FCode[0];
        pop.tokenCoding     = TCode[0];  // might be null!
        pop.unfavoredCoding = UCode[0];
        res[0] = pop;
        return pos;
    }
    private String keyString(CodingMethod m) {
        if (m instanceof Coding)
            return ((Coding)m).keyString();
        if (m == null)
            return "none";
        return m.toString();
    }
    public String toString() {
        PropMap p200 = Utils.currentPropMap();
        boolean verbose
            = (p200 != null &&
               p200.getBoolean(Utils.COM_PREFIX+"verbose.pop"));
        StringBuilder res = new StringBuilder(100);
        res.append("pop(").append("fVlen=").append(fVlen);
        if (verbose && fValues != null) {
            res.append(" fV=[");
            for (int i = 1; i <= fVlen; i++) {
                res.append(i==1?"":",").append(fValues[i]);
            }
            res.append(";").append(computeSentinelValue());
            res.append("]");
        }
        res.append(" fc=").append(keyString(favoredCoding));
        res.append(" tc=").append(keyString(tokenCoding));
        res.append(" uc=").append(keyString(unfavoredCoding));
        res.append(")");
        return res.toString();
    }
}
Back to index...