MutableCodePointTrie.java

// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

// created: 2018may04 Markus W. Scherer

package com.ibm.icu.util;

import java.util.Arrays;

/**
 * Mutable Unicode code point trie. Fast map from Unicode code points (U+0000..U+10FFFF) to 32-bit
 * integer values. For details see https://icu.unicode.org/design/struct/utrie
 *
 * <p>Setting values (especially ranges) and lookup is fast. The mutable trie is only somewhat
 * space-efficient. It builds a compacted, immutable {@link CodePointTrie}.
 *
 * <p>This trie can be modified while iterating over its contents. For example, it is possible to
 * merge its values with those from another set of ranges (e.g., another @{link CodePointMap}):
 * Iterate over those source ranges; for each of them iterate over this trie; add the source value
 * into the value of each trie range.
 *
 * @stable ICU 63
 */
public final class MutableCodePointTrie extends CodePointMap implements Cloneable {
    /**
     * Constructs a mutable trie that initially maps each Unicode code point to the same value. It
     * uses 32-bit data values until {@link #buildImmutable(com.ibm.icu.util.CodePointTrie.Type,
     * com.ibm.icu.util.CodePointTrie.ValueWidth)} is called. buildImmutable() takes a valueWidth
     * parameter which determines the number of bits in the data value in the resulting {@link
     * CodePointTrie}.
     *
     * @param initialValue the initial value that is set for all code points
     * @param errorValue the value for out-of-range code points and ill-formed UTF-8/16
     * @stable ICU 63
     */
    public MutableCodePointTrie(int initialValue, int errorValue) {
        index = new int[BMP_I_LIMIT];
        index3NullOffset = -1;
        data = new int[INITIAL_DATA_LENGTH];
        dataNullOffset = -1;
        origInitialValue = initialValue;
        this.initialValue = initialValue;
        this.errorValue = errorValue;
        highValue = initialValue;
    }

    /**
     * Clones this mutable trie.
     *
     * @return the clone
     * @stable ICU 63
     */
    @Override
    public MutableCodePointTrie clone() {
        try {
            MutableCodePointTrie builder = (MutableCodePointTrie) super.clone();
            int iCapacity = highStart <= BMP_LIMIT ? BMP_I_LIMIT : I_LIMIT;
            builder.index = new int[iCapacity];
            builder.flags = new byte[UNICODE_LIMIT >> CodePointTrie.SHIFT_3];
            for (int i = 0, iLimit = highStart >> CodePointTrie.SHIFT_3; i < iLimit; ++i) {
                builder.index[i] = index[i];
                builder.flags[i] = flags[i];
            }
            builder.index3NullOffset = index3NullOffset;
            builder.data = data.clone();
            builder.dataLength = dataLength;
            builder.dataNullOffset = dataNullOffset;
            builder.origInitialValue = origInitialValue;
            builder.initialValue = initialValue;
            builder.errorValue = errorValue;
            builder.highStart = highStart;
            builder.highValue = highValue;
            assert index16 == null;
            return builder;
        } catch (CloneNotSupportedException ignored) {
            // Unreachable: Cloning *is* supported.
            return null;
        }
    }

    /**
     * Creates a mutable trie with the same contents as the {@link CodePointMap}.
     *
     * @param map the source map or trie
     * @return the mutable trie
     * @stable ICU 63
     */
    public static MutableCodePointTrie fromCodePointMap(CodePointMap map) {
        // TODO: Consider special code branch for map instanceof CodePointTrie?
        // Use the highValue as the initialValue to reduce the highStart.
        int errorValue = map.get(-1);
        int initialValue = map.get(MAX_UNICODE);
        MutableCodePointTrie mutableTrie = new MutableCodePointTrie(initialValue, errorValue);
        CodePointMap.Range range = new CodePointMap.Range();
        int start = 0;
        while (map.getRange(start, null, range)) {
            int end = range.getEnd();
            int value = range.getValue();
            if (value != initialValue) {
                if (start == end) {
                    mutableTrie.set(start, value);
                } else {
                    mutableTrie.setRange(start, end, value);
                }
            }
            start = end + 1;
        }
        return mutableTrie;
    }

    private void clear() {
        index3NullOffset = dataNullOffset = -1;
        dataLength = 0;
        highValue = initialValue = origInitialValue;
        highStart = 0;
        index16 = null;
    }

    /**
     * {@inheritDoc}
     *
     * @stable ICU 63
     */
    @Override
    public int get(int c) {
        if (c < 0 || MAX_UNICODE < c) {
            return errorValue;
        }
        if (c >= highStart) {
            return highValue;
        }
        int i = c >> CodePointTrie.SHIFT_3;
        if (flags[i] == ALL_SAME) {
            return index[i];
        } else {
            return data[index[i] + (c & CodePointTrie.SMALL_DATA_MASK)];
        }
    }

    private static final int maybeFilterValue(
            int value, int initialValue, int nullValue, ValueFilter filter) {
        if (value == initialValue) {
            value = nullValue;
        } else if (filter != null) {
            value = filter.apply(value);
        }
        return value;
    }

    /**
     * {@inheritDoc}
     *
     * <p>The trie can be modified between calls to this function.
     *
     * @stable ICU 63
     */
    @Override
    public boolean getRange(
            int start, CodePointTrie.ValueFilter filter, CodePointTrie.Range range) {
        if (start < 0 || MAX_UNICODE < start) {
            return false;
        }
        if (start >= highStart) {
            int value = highValue;
            if (filter != null) {
                value = filter.apply(value);
            }
            range.set(start, MAX_UNICODE, value);
            return true;
        }
        int nullValue = initialValue;
        if (filter != null) {
            nullValue = filter.apply(nullValue);
        }
        int c = start;
        // Initialize to make compiler happy. Real value when haveValue is true.
        int trieValue = 0, value = 0;
        boolean haveValue = false;
        int i = c >> CodePointTrie.SHIFT_3;
        do {
            if (flags[i] == ALL_SAME) {
                int trieValue2 = index[i];
                if (haveValue) {
                    if (trieValue2 != trieValue) {
                        if (filter == null
                                || maybeFilterValue(trieValue2, initialValue, nullValue, filter)
                                        != value) {
                            range.set(start, c - 1, value);
                            return true;
                        }
                        trieValue = trieValue2; // may or may not help
                    }
                } else {
                    trieValue = trieValue2;
                    value = maybeFilterValue(trieValue2, initialValue, nullValue, filter);
                    haveValue = true;
                }
                c = (c + CodePointTrie.SMALL_DATA_BLOCK_LENGTH) & ~CodePointTrie.SMALL_DATA_MASK;
            } else /* MIXED */ {
                int di = index[i] + (c & CodePointTrie.SMALL_DATA_MASK);
                int trieValue2 = data[di];
                if (haveValue) {
                    if (trieValue2 != trieValue) {
                        if (filter == null
                                || maybeFilterValue(trieValue2, initialValue, nullValue, filter)
                                        != value) {
                            range.set(start, c - 1, value);
                            return true;
                        }
                        trieValue = trieValue2; // may or may not help
                    }
                } else {
                    trieValue = trieValue2;
                    value = maybeFilterValue(trieValue2, initialValue, nullValue, filter);
                    haveValue = true;
                }
                while ((++c & CodePointTrie.SMALL_DATA_MASK) != 0) {
                    trieValue2 = data[++di];
                    if (trieValue2 != trieValue) {
                        if (filter == null
                                || maybeFilterValue(trieValue2, initialValue, nullValue, filter)
                                        != value) {
                            range.set(start, c - 1, value);
                            return true;
                        }
                        trieValue = trieValue2; // may or may not help
                    }
                }
            }
            ++i;
        } while (c < highStart);
        assert (haveValue);
        if (maybeFilterValue(highValue, initialValue, nullValue, filter) != value) {
            range.set(start, c - 1, value);
        } else {
            range.set(start, MAX_UNICODE, value);
        }
        return true;
    }

    private void writeBlock(int block, int value) {
        int limit = block + CodePointTrie.SMALL_DATA_BLOCK_LENGTH;
        Arrays.fill(data, block, limit, value);
    }

    /**
     * Sets a value for a code point.
     *
     * @param c the code point
     * @param value the value
     * @stable ICU 63
     */
    public void set(int c, int value) {
        if (c < 0 || MAX_UNICODE < c) {
            throw new IllegalArgumentException("invalid code point");
        }

        ensureHighStart(c);
        int block = getDataBlock(c >> CodePointTrie.SHIFT_3);
        data[block + (c & CodePointTrie.SMALL_DATA_MASK)] = value;
    }

    private void fillBlock(int block, int start, int limit, int value) {
        Arrays.fill(data, block + start, block + limit, value);
    }

    /**
     * Sets a value for each code point [start..end]. Faster and more space-efficient than setting
     * the value for each code point separately.
     *
     * @param start the first code point to get the value
     * @param end the last code point to get the value (inclusive)
     * @param value the value
     * @stable ICU 63
     */
    public void setRange(int start, int end, int value) {
        if (start < 0 || MAX_UNICODE < start || end < 0 || MAX_UNICODE < end || start > end) {
            throw new IllegalArgumentException("invalid code point range");
        }
        ensureHighStart(end);

        int limit = end + 1;
        if ((start & CodePointTrie.SMALL_DATA_MASK) != 0) {
            // Set partial block at [start..following block boundary[.
            int block = getDataBlock(start >> CodePointTrie.SHIFT_3);
            int nextStart =
                    (start + CodePointTrie.SMALL_DATA_MASK) & ~CodePointTrie.SMALL_DATA_MASK;
            if (nextStart <= limit) {
                fillBlock(
                        block,
                        start & CodePointTrie.SMALL_DATA_MASK,
                        CodePointTrie.SMALL_DATA_BLOCK_LENGTH,
                        value);
                start = nextStart;
            } else {
                fillBlock(
                        block,
                        start & CodePointTrie.SMALL_DATA_MASK,
                        limit & CodePointTrie.SMALL_DATA_MASK,
                        value);
                return;
            }
        }

        // Number of positions in the last, partial block.
        int rest = limit & CodePointTrie.SMALL_DATA_MASK;

        // Round down limit to a block boundary.
        limit &= ~CodePointTrie.SMALL_DATA_MASK;

        // Iterate over all-value blocks.
        while (start < limit) {
            int i = start >> CodePointTrie.SHIFT_3;
            if (flags[i] == ALL_SAME) {
                index[i] = value;
            } else /* MIXED */ {
                fillBlock(index[i], 0, CodePointTrie.SMALL_DATA_BLOCK_LENGTH, value);
            }
            start += CodePointTrie.SMALL_DATA_BLOCK_LENGTH;
        }

        if (rest > 0) {
            // Set partial block at [last block boundary..limit[.
            int block = getDataBlock(start >> CodePointTrie.SHIFT_3);
            fillBlock(block, 0, rest, value);
        }
    }

    /**
     * Compacts the data and builds an immutable {@link CodePointTrie} according to the parameters.
     * After this, the mutable trie will be empty.
     *
     * <p>The mutable trie stores 32-bit values until buildImmutable() is called. If values shorter
     * than 32 bits are to be stored in the immutable trie, then the upper bits are discarded. For
     * example, when the mutable trie contains values 0x81, -0x7f, and 0xa581, and the value width
     * is 8 bits, then each of these is stored as 0x81 and the immutable trie will return that as an
     * unsigned value. (Some implementations may want to make productive temporary use of the upper
     * bits until buildImmutable() discards them.)
     *
     * <p>Not every possible set of mappings can be built into a CodePointTrie, because of
     * limitations resulting from speed and space optimizations. Every Unicode assigned character
     * can be mapped to a unique value. Typical data yields data structures far smaller than the
     * limitations.
     *
     * <p>It is possible to construct extremely unusual mappings that exceed the data structure
     * limits. In such a case this function will throw an exception.
     *
     * @param type selects the trie type
     * @param valueWidth selects the number of bits in a trie data value; if smaller than 32 bits,
     *     then the values stored in the trie will be truncated first
     * @see #fromCodePointMap(CodePointMap)
     * @stable ICU 63
     */
    public CodePointTrie buildImmutable(
            CodePointTrie.Type type, CodePointTrie.ValueWidth valueWidth) {
        if (type == null || valueWidth == null) {
            throw new IllegalArgumentException("The type and valueWidth must be specified.");
        }

        try {
            return build(type, valueWidth);
        } finally {
            clear();
        }
    }

    private static final int MAX_UNICODE = 0x10ffff;

    private static final int UNICODE_LIMIT = 0x110000;
    private static final int BMP_LIMIT = 0x10000;
    private static final int ASCII_LIMIT = 0x80;

    private static final int I_LIMIT = UNICODE_LIMIT >> CodePointTrie.SHIFT_3;
    private static final int BMP_I_LIMIT = BMP_LIMIT >> CodePointTrie.SHIFT_3;
    private static final int ASCII_I_LIMIT = ASCII_LIMIT >> CodePointTrie.SHIFT_3;

    private static final int SMALL_DATA_BLOCKS_PER_BMP_BLOCK =
            (1 << (CodePointTrie.FAST_SHIFT - CodePointTrie.SHIFT_3));

    // Flag values for data blocks.
    private static final byte ALL_SAME = 0;
    private static final byte MIXED = 1;
    private static final byte SAME_AS = 2;

    /** Start with allocation of 16k data entries. */
    private static final int INITIAL_DATA_LENGTH = (1 << 14);

    /** Grow about 8x each time. */
    private static final int MEDIUM_DATA_LENGTH = (1 << 17);

    /** Maximum length of the build-time data array. One entry per 0x110000 code points. */
    private static final int MAX_DATA_LENGTH = UNICODE_LIMIT;

    // Flag values for index-3 blocks while compacting/building.
    private static final byte I3_NULL = 0;
    private static final byte I3_BMP = 1;
    private static final byte I3_16 = 2;
    private static final byte I3_18 = 3;

    private static final int INDEX_3_18BIT_BLOCK_LENGTH =
            CodePointTrie.INDEX_3_BLOCK_LENGTH + CodePointTrie.INDEX_3_BLOCK_LENGTH / 8;

    private int[] index;
    private int index3NullOffset;
    private int[] data;
    private int dataLength;
    private int dataNullOffset;

    private int origInitialValue;
    private int initialValue;
    private int errorValue;
    private int highStart;
    private int highValue;

    /** Temporary array while building the final data. */
    private char[] index16;

    private byte[] flags = new byte[UNICODE_LIMIT >> CodePointTrie.SHIFT_3];

    private void ensureHighStart(int c) {
        if (c >= highStart) {
            // Round up to a CodePointTrie.CP_PER_INDEX_2_ENTRY boundary to simplify compaction.
            c =
                    (c + CodePointTrie.CP_PER_INDEX_2_ENTRY)
                            & ~(CodePointTrie.CP_PER_INDEX_2_ENTRY - 1);
            int i = highStart >> CodePointTrie.SHIFT_3;
            int iLimit = c >> CodePointTrie.SHIFT_3;
            if (iLimit > index.length) {
                int[] newIndex = new int[I_LIMIT];
                for (int j = 0; j < i; ++j) {
                    newIndex[j] = index[j];
                }
                index = newIndex;
            }
            do {
                flags[i] = ALL_SAME;
                index[i] = initialValue;
            } while (++i < iLimit);
            highStart = c;
        }
    }

    private int allocDataBlock(int blockLength) {
        int newBlock = dataLength;
        int newTop = newBlock + blockLength;
        if (newTop > data.length) {
            int capacity;
            if (data.length < MEDIUM_DATA_LENGTH) {
                capacity = MEDIUM_DATA_LENGTH;
            } else if (data.length < MAX_DATA_LENGTH) {
                capacity = MAX_DATA_LENGTH;
            } else {
                // Should never occur.
                // Either MAX_DATA_LENGTH is incorrect,
                // or the code writes more values than should be possible.
                throw new AssertionError();
            }
            int[] newData = new int[capacity];
            for (int j = 0; j < dataLength; ++j) {
                newData[j] = data[j];
            }
            data = newData;
        }
        dataLength = newTop;
        return newBlock;
    }

    /**
     * No error checking for illegal arguments. The Java version always returns non-negative values.
     */
    private int getDataBlock(int i) {
        if (flags[i] == MIXED) {
            return index[i];
        }
        if (i < BMP_I_LIMIT) {
            int newBlock = allocDataBlock(CodePointTrie.FAST_DATA_BLOCK_LENGTH);
            int iStart = i & ~(SMALL_DATA_BLOCKS_PER_BMP_BLOCK - 1);
            int iLimit = iStart + SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
            do {
                assert (flags[iStart] == ALL_SAME);
                writeBlock(newBlock, index[iStart]);
                flags[iStart] = MIXED;
                index[iStart++] = newBlock;
                newBlock += CodePointTrie.SMALL_DATA_BLOCK_LENGTH;
            } while (iStart < iLimit);
            return index[i];
        } else {
            int newBlock = allocDataBlock(CodePointTrie.SMALL_DATA_BLOCK_LENGTH);
            if (newBlock < 0) {
                return newBlock;
            }
            writeBlock(newBlock, index[i]);
            flags[i] = MIXED;
            index[i] = newBlock;
            return newBlock;
        }
    }

    // compaction --------------------------------------------------------------

    private void maskValues(int mask) {
        initialValue &= mask;
        errorValue &= mask;
        highValue &= mask;
        int iLimit = highStart >> CodePointTrie.SHIFT_3;
        for (int i = 0; i < iLimit; ++i) {
            if (flags[i] == ALL_SAME) {
                index[i] &= mask;
            }
        }
        for (int i = 0; i < dataLength; ++i) {
            data[i] &= mask;
        }
    }

    private static boolean equalBlocks(int[] s, int si, int[] t, int ti, int length) {
        while (length > 0 && s[si] == t[ti]) {
            ++si;
            ++ti;
            --length;
        }
        return length == 0;
    }

    private static boolean equalBlocks(char[] s, int si, int[] t, int ti, int length) {
        while (length > 0 && s[si] == t[ti]) {
            ++si;
            ++ti;
            --length;
        }
        return length == 0;
    }

    private static boolean equalBlocks(char[] s, int si, char[] t, int ti, int length) {
        while (length > 0 && s[si] == t[ti]) {
            ++si;
            ++ti;
            --length;
        }
        return length == 0;
    }

    private static boolean allValuesSameAs(int[] p, int pi, int length, int value) {
        int pLimit = pi + length;
        while (pi < pLimit && p[pi] == value) {
            ++pi;
        }
        return pi == pLimit;
    }

    /** Search for an identical block. */
    private static int findSameBlock(
            char[] p, int pStart, int length, char[] q, int qStart, int blockLength) {
        // Ensure that we do not even partially get past length.
        length -= blockLength;

        while (pStart <= length) {
            if (equalBlocks(p, pStart, q, qStart, blockLength)) {
                return pStart;
            }
            ++pStart;
        }
        return -1;
    }

    private static int findAllSameBlock(int[] p, int start, int limit, int value, int blockLength) {
        // Ensure that we do not even partially get past limit.
        limit -= blockLength;

        for (int block = start; block <= limit; ++block) {
            if (p[block] == value) {
                for (int i = 1; ; ++i) {
                    if (i == blockLength) {
                        return block;
                    }
                    if (p[block + i] != value) {
                        block += i;
                        break;
                    }
                }
            }
        }
        return -1;
    }

    /**
     * Look for maximum overlap of the beginning of the other block with the previous, adjacent
     * block.
     */
    private static int getOverlap(int[] p, int length, int[] q, int qStart, int blockLength) {
        int overlap = blockLength - 1;
        assert (overlap <= length);
        while (overlap > 0 && !equalBlocks(p, length - overlap, q, qStart, overlap)) {
            --overlap;
        }
        return overlap;
    }

    private static int getOverlap(char[] p, int length, int[] q, int qStart, int blockLength) {
        int overlap = blockLength - 1;
        assert (overlap <= length);
        while (overlap > 0 && !equalBlocks(p, length - overlap, q, qStart, overlap)) {
            --overlap;
        }
        return overlap;
    }

    private static int getOverlap(char[] p, int length, char[] q, int qStart, int blockLength) {
        int overlap = blockLength - 1;
        assert (overlap <= length);
        while (overlap > 0 && !equalBlocks(p, length - overlap, q, qStart, overlap)) {
            --overlap;
        }
        return overlap;
    }

    private static int getAllSameOverlap(int[] p, int length, int value, int blockLength) {
        int min = length - (blockLength - 1);
        int i = length;
        while (min < i && p[i - 1] == value) {
            --i;
        }
        return length - i;
    }

    private static boolean isStartOfSomeFastBlock(int dataOffset, int[] index, int fastILimit) {
        for (int i = 0; i < fastILimit; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) {
            if (index[i] == dataOffset) {
                return true;
            }
        }
        return false;
    }

    /**
     * Finds the start of the last range in the trie by enumerating backward. Indexes for code
     * points higher than this will be omitted.
     */
    private int findHighStart() {
        int i = highStart >> CodePointTrie.SHIFT_3;
        while (i > 0) {
            boolean match;
            if (flags[--i] == ALL_SAME) {
                match = index[i] == highValue;
            } else /* MIXED */ {
                int p = index[i];
                for (int j = 0; ; ++j) {
                    if (j == CodePointTrie.SMALL_DATA_BLOCK_LENGTH) {
                        match = true;
                        break;
                    }
                    if (data[p + j] != highValue) {
                        match = false;
                        break;
                    }
                }
            }
            if (!match) {
                return (i + 1) << CodePointTrie.SHIFT_3;
            }
        }
        return 0;
    }

    private static final class AllSameBlocks {
        static final int NEW_UNIQUE = -1;
        static final int OVERFLOW = -2;

        AllSameBlocks() {
            mostRecent = -1;
        }

        int findOrAdd(int index, int count, int value) {
            if (mostRecent >= 0 && values[mostRecent] == value) {
                refCounts[mostRecent] += count;
                return indexes[mostRecent];
            }
            for (int i = 0; i < length; ++i) {
                if (values[i] == value) {
                    mostRecent = i;
                    refCounts[i] += count;
                    return indexes[i];
                }
            }
            if (length == CAPACITY) {
                return OVERFLOW;
            }
            mostRecent = length;
            indexes[length] = index;
            values[length] = value;
            refCounts[length++] = count;
            return NEW_UNIQUE;
        }

        /** Replaces the block which has the lowest reference count. */
        void add(int index, int count, int value) {
            assert (length == CAPACITY);
            int least = -1;
            int leastCount = I_LIMIT;
            for (int i = 0; i < length; ++i) {
                assert (values[i] != value);
                if (refCounts[i] < leastCount) {
                    least = i;
                    leastCount = refCounts[i];
                }
            }
            assert (least >= 0);
            mostRecent = least;
            indexes[least] = index;
            values[least] = value;
            refCounts[least] = count;
        }

        int findMostUsed() {
            if (length == 0) {
                return -1;
            }
            int max = -1;
            int maxCount = 0;
            for (int i = 0; i < length; ++i) {
                if (refCounts[i] > maxCount) {
                    max = i;
                    maxCount = refCounts[i];
                }
            }
            return indexes[max];
        }

        private static final int CAPACITY = 32;

        private int length;
        private int mostRecent;

        private int[] indexes = new int[CAPACITY];
        private int[] values = new int[CAPACITY];
        private int[] refCounts = new int[CAPACITY];
    }

    // Custom hash table for mixed-value blocks to be found anywhere in the
    // compacted data or index so far.
    private static final class MixedBlocks {
        void init(int maxLength, int newBlockLength) {
            // We store actual data indexes + 1 to reserve 0 for empty entries.
            int maxDataIndex = maxLength - newBlockLength + 1;
            int newLength;
            if (maxDataIndex <= 0xfff) { // 4k
                newLength = 6007;
                shift = 12;
                mask = 0xfff;
            } else if (maxDataIndex <= 0x7fff) { // 32k
                newLength = 50021;
                shift = 15;
                mask = 0x7fff;
            } else if (maxDataIndex <= 0x1ffff) { // 128k
                newLength = 200003;
                shift = 17;
                mask = 0x1ffff;
            } else {
                // maxDataIndex up to around MAX_DATA_LENGTH, ca. 1.1M
                newLength = 1500007;
                shift = 21;
                mask = 0x1fffff;
            }
            if (table == null || newLength > table.length) {
                table = new int[newLength];
            } else {
                Arrays.fill(table, 0, newLength, 0);
            }
            length = newLength;

            blockLength = newBlockLength;
        }

        void extend(int[] data, int minStart, int prevDataLength, int newDataLength) {
            int start = prevDataLength - blockLength;
            if (start >= minStart) {
                ++start; // Skip the last block that we added last time.
            } else {
                start = minStart; // Begin with the first full block.
            }
            for (int end = newDataLength - blockLength; start <= end; ++start) {
                int hashCode = makeHashCode(data, start);
                addEntry(data, null, start, hashCode, start);
            }
        }

        void extend(char[] data, int minStart, int prevDataLength, int newDataLength) {
            int start = prevDataLength - blockLength;
            if (start >= minStart) {
                ++start; // Skip the last block that we added last time.
            } else {
                start = minStart; // Begin with the first full block.
            }
            for (int end = newDataLength - blockLength; start <= end; ++start) {
                int hashCode = makeHashCode(data, start);
                addEntry(null, data, start, hashCode, start);
            }
        }

        int findBlock(int[] data, int[] blockData, int blockStart) {
            int hashCode = makeHashCode(blockData, blockStart);
            int entryIndex = findEntry(data, null, blockData, null, blockStart, hashCode);
            if (entryIndex >= 0) {
                return (table[entryIndex] & mask) - 1;
            } else {
                return -1;
            }
        }

        int findBlock(char[] data, int[] blockData, int blockStart) {
            int hashCode = makeHashCode(blockData, blockStart);
            int entryIndex = findEntry(null, data, blockData, null, blockStart, hashCode);
            if (entryIndex >= 0) {
                return (table[entryIndex] & mask) - 1;
            } else {
                return -1;
            }
        }

        int findBlock(char[] data, char[] blockData, int blockStart) {
            int hashCode = makeHashCode(blockData, blockStart);
            int entryIndex = findEntry(null, data, null, blockData, blockStart, hashCode);
            if (entryIndex >= 0) {
                return (table[entryIndex] & mask) - 1;
            } else {
                return -1;
            }
        }

        int findAllSameBlock(int[] data, int blockValue) {
            int hashCode = makeHashCode(blockValue);
            int entryIndex = findEntry(data, blockValue, hashCode);
            if (entryIndex >= 0) {
                return (table[entryIndex] & mask) - 1;
            } else {
                return -1;
            }
        }

        private int makeHashCode(int[] blockData, int blockStart) {
            int blockLimit = blockStart + blockLength;
            int hashCode = blockData[blockStart++];
            do {
                hashCode = 37 * hashCode + blockData[blockStart++];
            } while (blockStart < blockLimit);
            return hashCode;
        }

        private int makeHashCode(char[] blockData, int blockStart) {
            int blockLimit = blockStart + blockLength;
            int hashCode = blockData[blockStart++];
            do {
                hashCode = 37 * hashCode + blockData[blockStart++];
            } while (blockStart < blockLimit);
            return hashCode;
        }

        private int makeHashCode(int blockValue) {
            int hashCode = blockValue;
            for (int i = 1; i < blockLength; ++i) {
                hashCode = 37 * hashCode + blockValue;
            }
            return hashCode;
        }

        private void addEntry(
                int[] data32, char[] data16, int blockStart, int hashCode, int dataIndex) {
            assert (0 <= dataIndex && dataIndex < mask);
            int entryIndex = findEntry(data32, data16, data32, data16, blockStart, hashCode);
            if (entryIndex < 0) {
                table[~entryIndex] = (hashCode << shift) | (dataIndex + 1);
            }
        }

        private int findEntry(
                int[] data32,
                char[] data16,
                int[] blockData32,
                char[] blockData16,
                int blockStart,
                int hashCode) {
            int shiftedHashCode = hashCode << shift;
            int initialEntryIndex = modulo(hashCode, length - 1) + 1; // 1..length-1
            for (int entryIndex = initialEntryIndex; ; ) {
                int entry = table[entryIndex];
                if (entry == 0) {
                    return ~entryIndex;
                }
                if ((entry & ~mask) == shiftedHashCode) {
                    int dataIndex = (entry & mask) - 1;
                    if (data32 != null
                            ? equalBlocks(data32, dataIndex, blockData32, blockStart, blockLength)
                            : blockData32 != null
                                    ? equalBlocks(
                                            data16, dataIndex, blockData32, blockStart, blockLength)
                                    : equalBlocks(
                                            data16,
                                            dataIndex,
                                            blockData16,
                                            blockStart,
                                            blockLength)) {
                        return entryIndex;
                    }
                }
                entryIndex = nextIndex(initialEntryIndex, entryIndex);
            }
        }

        private int findEntry(int[] data, int blockValue, int hashCode) {
            int shiftedHashCode = hashCode << shift;
            int initialEntryIndex = modulo(hashCode, length - 1) + 1; // 1..length-1
            for (int entryIndex = initialEntryIndex; ; ) {
                int entry = table[entryIndex];
                if (entry == 0) {
                    return ~entryIndex;
                }
                if ((entry & ~mask) == shiftedHashCode) {
                    int dataIndex = (entry & mask) - 1;
                    if (allValuesSameAs(data, dataIndex, blockLength, blockValue)) {
                        return entryIndex;
                    }
                }
                entryIndex = nextIndex(initialEntryIndex, entryIndex);
            }
        }

        private int nextIndex(int initialEntryIndex, int entryIndex) {
            // U_ASSERT(0 < initialEntryIndex && initialEntryIndex < length);
            return (entryIndex + initialEntryIndex) % length;
        }

        /** Ensures non-negative n % m (that is 0..m-1). */
        private int modulo(int n, int m) {
            int i = n % m;
            if (i < 0) {
                i += m;
            }
            return i;
        }

        // Hash table.
        // The length is a prime number, larger than the maximum data length.
        // The "shift" lower bits store a data index + 1.
        // The remaining upper bits store a partial hashCode of the block data values.
        private int[] table;
        private int length;
        private int shift;
        private int mask;

        private int blockLength;
    }

    private int compactWholeDataBlocks(int fastILimit, AllSameBlocks allSameBlocks) {
        // ASCII data will be stored as a linear table, even if the following code
        // does not yet count it that way.
        int newDataCapacity = ASCII_LIMIT;
        // Add room for a small data null block in case it would match the start of
        // a fast data block where dataNullOffset must not be set in that case.
        newDataCapacity += CodePointTrie.SMALL_DATA_BLOCK_LENGTH;
        // Add room for special values (errorValue, highValue) and padding.
        newDataCapacity += 4;
        int iLimit = highStart >> CodePointTrie.SHIFT_3;
        int blockLength = CodePointTrie.FAST_DATA_BLOCK_LENGTH;
        int inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
        for (int i = 0; i < iLimit; i += inc) {
            if (i == fastILimit) {
                blockLength = CodePointTrie.SMALL_DATA_BLOCK_LENGTH;
                inc = 1;
            }
            int value = index[i];
            if (flags[i] == MIXED) {
                // Really mixed?
                int p = value;
                value = data[p];
                if (allValuesSameAs(data, p + 1, blockLength - 1, value)) {
                    flags[i] = ALL_SAME;
                    index[i] = value;
                    // Fall through to ALL_SAME handling.
                } else {
                    newDataCapacity += blockLength;
                    continue;
                }
            } else {
                assert (flags[i] == ALL_SAME);
                if (inc > 1) {
                    // Do all of the fast-range data block's ALL_SAME parts have the same value?
                    boolean allSame = true;
                    int next_i = i + inc;
                    for (int j = i + 1; j < next_i; ++j) {
                        assert (flags[j] == ALL_SAME);
                        if (index[j] != value) {
                            allSame = false;
                            break;
                        }
                    }
                    if (!allSame) {
                        // Turn it into a MIXED block.
                        if (getDataBlock(i) < 0) {
                            return -1;
                        }
                        newDataCapacity += blockLength;
                        continue;
                    }
                }
            }
            // Is there another ALL_SAME block with the same value?
            int other = allSameBlocks.findOrAdd(i, inc, value);
            if (other == AllSameBlocks.OVERFLOW) {
                // The fixed-size array overflowed. Slow check for a duplicate block.
                int jInc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
                for (int j = 0; ; j += jInc) {
                    if (j == i) {
                        allSameBlocks.add(i, inc, value);
                        break;
                    }
                    if (j == fastILimit) {
                        jInc = 1;
                    }
                    if (flags[j] == ALL_SAME && index[j] == value) {
                        allSameBlocks.add(j, jInc + inc, value);
                        other = j;
                        break;
                        // We could keep counting blocks with the same value
                        // before we add the first one, which may improve compaction in rare cases,
                        // but it would make it slower.
                    }
                }
            }
            if (other >= 0) {
                flags[i] = SAME_AS;
                index[i] = other;
            } else {
                // New unique same-value block.
                newDataCapacity += blockLength;
            }
        }
        return newDataCapacity;
    }

    /**
     * Compacts a build-time trie.
     *
     * <p>The compaction - removes blocks that are identical with earlier ones - overlaps each new
     * non-duplicate block as much as possible with the previously-written one - works with
     * fast-range data blocks whose length is a multiple of that of higher-code-point data blocks
     *
     * <p>It does not try to find an optimal order of writing, deduplicating, and overlapping
     * blocks.
     */
    private int compactData(
            int fastILimit, int[] newData, int dataNullIndex, MixedBlocks mixedBlocks) {
        // The linear ASCII data has been copied into newData already.
        int newDataLength = 0;
        for (int i = 0;
                newDataLength < ASCII_LIMIT;
                newDataLength += CodePointTrie.FAST_DATA_BLOCK_LENGTH,
                        i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) {
            index[i] = newDataLength;
        }

        int blockLength = CodePointTrie.FAST_DATA_BLOCK_LENGTH;
        mixedBlocks.init(newData.length, blockLength);
        mixedBlocks.extend(newData, 0, 0, newDataLength);

        int iLimit = highStart >> CodePointTrie.SHIFT_3;
        int inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
        int fastLength = 0;
        for (int i = ASCII_I_LIMIT; i < iLimit; i += inc) {
            if (i == fastILimit) {
                blockLength = CodePointTrie.SMALL_DATA_BLOCK_LENGTH;
                inc = 1;
                fastLength = newDataLength;
                mixedBlocks.init(newData.length, blockLength);
                mixedBlocks.extend(newData, 0, 0, newDataLength);
            }
            if (flags[i] == ALL_SAME) {
                int value = index[i];
                // Find an earlier part of the data array of length blockLength
                // that is filled with this value.
                int n = mixedBlocks.findAllSameBlock(newData, value);
                // If we find a match, and the current block is the data null block,
                // and it is not a fast block but matches the start of a fast block,
                // then we need to continue looking.
                // This is because this small block is shorter than the fast block,
                // and not all of the rest of the fast block is filled with this value.
                // Otherwise trie.getRange() would detect that the fast block starts at
                // dataNullOffset and assume incorrectly that it is filled with the null value.
                while (n >= 0
                        && i == dataNullIndex
                        && i >= fastILimit
                        && n < fastLength
                        && isStartOfSomeFastBlock(n, index, fastILimit)) {
                    n = findAllSameBlock(newData, n + 1, newDataLength, value, blockLength);
                }
                if (n >= 0) {
                    index[i] = n;
                } else {
                    n = getAllSameOverlap(newData, newDataLength, value, blockLength);
                    index[i] = newDataLength - n;
                    int prevDataLength = newDataLength;
                    while (n < blockLength) {
                        newData[newDataLength++] = value;
                        ++n;
                    }
                    mixedBlocks.extend(newData, 0, prevDataLength, newDataLength);
                }
            } else if (flags[i] == MIXED) {
                int block = index[i];
                int n = mixedBlocks.findBlock(newData, data, block);
                if (n >= 0) {
                    index[i] = n;
                } else {
                    n = getOverlap(newData, newDataLength, data, block, blockLength);
                    index[i] = newDataLength - n;
                    int prevDataLength = newDataLength;
                    while (n < blockLength) {
                        newData[newDataLength++] = data[block + n++];
                    }
                    mixedBlocks.extend(newData, 0, prevDataLength, newDataLength);
                }
            } else /* SAME_AS */ {
                int j = index[i];
                index[i] = index[j];
            }
        }

        return newDataLength;
    }

    private int compactIndex(int fastILimit, MixedBlocks mixedBlocks) {
        int fastIndexLength = fastILimit >> (CodePointTrie.FAST_SHIFT - CodePointTrie.SHIFT_3);
        if ((highStart >> CodePointTrie.FAST_SHIFT) <= fastIndexLength) {
            // Only the linear fast index, no multi-stage index tables.
            index3NullOffset = CodePointTrie.NO_INDEX3_NULL_OFFSET;
            return fastIndexLength;
        }

        // Condense the fast index table.
        // Also, does it contain an index-3 block with all dataNullOffset?
        char[] fastIndex = new char[fastIndexLength];
        int i3FirstNull = -1;
        for (int i = 0, j = 0; i < fastILimit; ++j) {
            int i3 = index[i];
            fastIndex[j] = (char) i3;
            if (i3 == dataNullOffset) {
                if (i3FirstNull < 0) {
                    i3FirstNull = j;
                } else if (index3NullOffset < 0
                        && (j - i3FirstNull + 1) == CodePointTrie.INDEX_3_BLOCK_LENGTH) {
                    index3NullOffset = i3FirstNull;
                }
            } else {
                i3FirstNull = -1;
            }
            // Set the index entries that compactData() skipped.
            // Needed when the multi-stage index covers the fast index range as well.
            int iNext = i + SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
            while (++i < iNext) {
                i3 += CodePointTrie.SMALL_DATA_BLOCK_LENGTH;
                index[i] = i3;
            }
        }

        mixedBlocks.init(fastIndexLength, CodePointTrie.INDEX_3_BLOCK_LENGTH);
        mixedBlocks.extend(fastIndex, 0, 0, fastIndexLength);

        // Examine index-3 blocks. For each determine one of:
        // - same as the index-3 null block
        // - same as a fast-index block
        // - 16-bit indexes
        // - 18-bit indexes
        // We store this in the first flags entry for the index-3 block.
        //
        // Also determine an upper limit for the index-3 table length.
        int index3Capacity = 0;
        i3FirstNull = index3NullOffset;
        boolean hasLongI3Blocks = false;
        // If the fast index covers the whole BMP, then
        // the multi-stage index is only for supplementary code points.
        // Otherwise, the multi-stage index covers all of Unicode.
        int iStart = fastILimit < BMP_I_LIMIT ? 0 : BMP_I_LIMIT;
        int iLimit = highStart >> CodePointTrie.SHIFT_3;
        for (int i = iStart; i < iLimit; ) {
            int j = i;
            int jLimit = i + CodePointTrie.INDEX_3_BLOCK_LENGTH;
            int oredI3 = 0;
            boolean isNull = true;
            do {
                int i3 = index[j];
                oredI3 |= i3;
                if (i3 != dataNullOffset) {
                    isNull = false;
                }
            } while (++j < jLimit);
            if (isNull) {
                flags[i] = I3_NULL;
                if (i3FirstNull < 0) {
                    if (oredI3 <= 0xffff) {
                        index3Capacity += CodePointTrie.INDEX_3_BLOCK_LENGTH;
                    } else {
                        index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;
                        hasLongI3Blocks = true;
                    }
                    i3FirstNull = 0;
                }
            } else {
                if (oredI3 <= 0xffff) {
                    int n = mixedBlocks.findBlock(fastIndex, index, i);
                    if (n >= 0) {
                        flags[i] = I3_BMP;
                        index[i] = n;
                    } else {
                        flags[i] = I3_16;
                        index3Capacity += CodePointTrie.INDEX_3_BLOCK_LENGTH;
                    }
                } else {
                    flags[i] = I3_18;
                    index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;
                    hasLongI3Blocks = true;
                }
            }
            i = j;
        }

        int index2Capacity = (iLimit - iStart) >> CodePointTrie.SHIFT_2_3;

        // Length of the index-1 table, rounded up.
        int index1Length = (index2Capacity + CodePointTrie.INDEX_2_MASK) >> CodePointTrie.SHIFT_1_2;

        // Index table: Fast index, index-1, index-3, index-2.
        // +1 for possible index table padding.
        int index16Capacity = fastIndexLength + index1Length + index3Capacity + index2Capacity + 1;
        index16 = Arrays.copyOf(fastIndex, index16Capacity);

        mixedBlocks.init(index16Capacity, CodePointTrie.INDEX_3_BLOCK_LENGTH);
        MixedBlocks longI3Blocks = null;
        if (hasLongI3Blocks) {
            longI3Blocks = new MixedBlocks();
            longI3Blocks.init(index16Capacity, INDEX_3_18BIT_BLOCK_LENGTH);
        }

        // Compact the index-3 table and write an uncompacted version of the index-2 table.
        char[] index2 = new char[index2Capacity];
        int i2Length = 0;
        i3FirstNull = index3NullOffset;
        int index3Start = fastIndexLength + index1Length;
        int indexLength = index3Start;
        for (int i = iStart; i < iLimit; i += CodePointTrie.INDEX_3_BLOCK_LENGTH) {
            int i3;
            byte f = flags[i];
            if (f == I3_NULL && i3FirstNull < 0) {
                // First index-3 null block. Write & overlap it like a normal block, then remember
                // it.
                f = dataNullOffset <= 0xffff ? I3_16 : I3_18;
                i3FirstNull = 0;
            }
            if (f == I3_NULL) {
                i3 = index3NullOffset;
            } else if (f == I3_BMP) {
                i3 = index[i];
            } else if (f == I3_16) {
                int n = mixedBlocks.findBlock(index16, index, i);
                if (n >= 0) {
                    i3 = n;
                } else {
                    if (indexLength == index3Start) {
                        // No overlap at the boundary between the index-1 and index-3 tables.
                        n = 0;
                    } else {
                        n =
                                getOverlap(
                                        index16,
                                        indexLength,
                                        index,
                                        i,
                                        CodePointTrie.INDEX_3_BLOCK_LENGTH);
                    }
                    i3 = indexLength - n;
                    int prevIndexLength = indexLength;
                    while (n < CodePointTrie.INDEX_3_BLOCK_LENGTH) {
                        index16[indexLength++] = (char) index[i + n++];
                    }
                    mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
                    if (hasLongI3Blocks) {
                        longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength);
                    }
                }
            } else {
                assert (f == I3_18);
                assert (hasLongI3Blocks);
                // Encode an index-3 block that contains one or more data indexes exceeding 16 bits.
                int j = i;
                int jLimit = i + CodePointTrie.INDEX_3_BLOCK_LENGTH;
                int k = indexLength;
                do {
                    ++k;
                    int v = index[j++];
                    int upperBits = (v & 0x30000) >> 2;
                    index16[k++] = (char) v;
                    v = index[j++];
                    upperBits |= (v & 0x30000) >> 4;
                    index16[k++] = (char) v;
                    v = index[j++];
                    upperBits |= (v & 0x30000) >> 6;
                    index16[k++] = (char) v;
                    v = index[j++];
                    upperBits |= (v & 0x30000) >> 8;
                    index16[k++] = (char) v;
                    v = index[j++];
                    upperBits |= (v & 0x30000) >> 10;
                    index16[k++] = (char) v;
                    v = index[j++];
                    upperBits |= (v & 0x30000) >> 12;
                    index16[k++] = (char) v;
                    v = index[j++];
                    upperBits |= (v & 0x30000) >> 14;
                    index16[k++] = (char) v;
                    v = index[j++];
                    upperBits |= (v & 0x30000) >> 16;
                    index16[k++] = (char) v;
                    index16[k - 9] = (char) upperBits;
                } while (j < jLimit);
                int n = longI3Blocks.findBlock(index16, index16, indexLength);
                if (n >= 0) {
                    i3 = n | 0x8000;
                } else {
                    if (indexLength == index3Start) {
                        // No overlap at the boundary between the index-1 and index-3 tables.
                        n = 0;
                    } else {
                        n =
                                getOverlap(
                                        index16,
                                        indexLength,
                                        index16,
                                        indexLength,
                                        INDEX_3_18BIT_BLOCK_LENGTH);
                    }
                    i3 = (indexLength - n) | 0x8000;
                    int prevIndexLength = indexLength;
                    if (n > 0) {
                        int start = indexLength;
                        while (n < INDEX_3_18BIT_BLOCK_LENGTH) {
                            index16[indexLength++] = index16[start + n++];
                        }
                    } else {
                        indexLength += INDEX_3_18BIT_BLOCK_LENGTH;
                    }
                    mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
                    if (hasLongI3Blocks) {
                        longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength);
                    }
                }
            }
            if (index3NullOffset < 0 && i3FirstNull >= 0) {
                index3NullOffset = i3;
            }
            // Set the index-2 table entry.
            index2[i2Length++] = (char) i3;
        }
        assert (i2Length == index2Capacity);
        assert (indexLength <= index3Start + index3Capacity);

        if (index3NullOffset < 0) {
            index3NullOffset = CodePointTrie.NO_INDEX3_NULL_OFFSET;
        }
        if (indexLength
                >= (CodePointTrie.NO_INDEX3_NULL_OFFSET + CodePointTrie.INDEX_3_BLOCK_LENGTH)) {
            // The index-3 offsets exceed 15 bits, or
            // the last one cannot be distinguished from the no-null-block value.
            throw new IndexOutOfBoundsException(
                    "The trie data exceeds limitations of the data structure.");
        }

        // Compact the index-2 table and write the index-1 table.
        // assert(CodePointTrie.INDEX_2_BLOCK_LENGTH == CodePointTrie.INDEX_3_BLOCK_LENGTH) :
        //     "must re-init mixedBlocks";
        int blockLength = CodePointTrie.INDEX_2_BLOCK_LENGTH;
        int i1 = fastIndexLength;
        for (int i = 0; i < i2Length; i += blockLength) {
            int n;
            if ((i2Length - i) >= blockLength) {
                // normal block
                assert (blockLength == CodePointTrie.INDEX_2_BLOCK_LENGTH);
                n = mixedBlocks.findBlock(index16, index2, i);
            } else {
                // highStart is inside the last index-2 block. Shorten it.
                blockLength = i2Length - i;
                n = findSameBlock(index16, index3Start, indexLength, index2, i, blockLength);
            }
            int i2;
            if (n >= 0) {
                i2 = n;
            } else {
                if (indexLength == index3Start) {
                    // No overlap at the boundary between the index-1 and index-3/2 tables.
                    n = 0;
                } else {
                    n = getOverlap(index16, indexLength, index2, i, blockLength);
                }
                i2 = indexLength - n;
                int prevIndexLength = indexLength;
                while (n < blockLength) {
                    index16[indexLength++] = index2[i + n++];
                }
                mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
            }
            // Set the index-1 table entry.
            index16[i1++] = (char) i2;
        }
        assert (i1 == index3Start);
        assert (indexLength <= index16Capacity);

        return indexLength;
    }

    private int compactTrie(int fastILimit) {
        // Find the real highStart and round it up.
        assert ((highStart & (CodePointTrie.CP_PER_INDEX_2_ENTRY - 1)) == 0);
        highValue = get(MAX_UNICODE);
        int realHighStart = findHighStart();
        realHighStart =
                (realHighStart + (CodePointTrie.CP_PER_INDEX_2_ENTRY - 1))
                        & ~(CodePointTrie.CP_PER_INDEX_2_ENTRY - 1);
        if (realHighStart == UNICODE_LIMIT) {
            highValue = initialValue;
        }

        // We always store indexes and data values for the fast range.
        // Pin highStart to the top of that range while building.
        int fastLimit = fastILimit << CodePointTrie.SHIFT_3;
        if (realHighStart < fastLimit) {
            for (int i = (realHighStart >> CodePointTrie.SHIFT_3); i < fastILimit; ++i) {
                flags[i] = ALL_SAME;
                index[i] = highValue;
            }
            highStart = fastLimit;
        } else {
            highStart = realHighStart;
        }

        int[] asciiData = new int[ASCII_LIMIT];
        for (int i = 0; i < ASCII_LIMIT; ++i) {
            asciiData[i] = get(i);
        }

        // First we look for which data blocks have the same value repeated over the whole block,
        // deduplicate such blocks, find a good null data block (for faster enumeration),
        // and get an upper bound for the necessary data array length.
        AllSameBlocks allSameBlocks = new AllSameBlocks();
        int newDataCapacity = compactWholeDataBlocks(fastILimit, allSameBlocks);
        int[] newData = Arrays.copyOf(asciiData, newDataCapacity);

        int dataNullIndex = allSameBlocks.findMostUsed();

        MixedBlocks mixedBlocks = new MixedBlocks();
        int newDataLength = compactData(fastILimit, newData, dataNullIndex, mixedBlocks);
        assert (newDataLength <= newDataCapacity);
        data = newData;
        dataLength = newDataLength;
        if (dataLength > (0x3ffff + CodePointTrie.SMALL_DATA_BLOCK_LENGTH)) {
            // The offset of the last data block is too high to be stored in the index table.
            throw new IndexOutOfBoundsException(
                    "The trie data exceeds limitations of the data structure.");
        }

        if (dataNullIndex >= 0) {
            dataNullOffset = index[dataNullIndex];
            initialValue = data[dataNullOffset];
        } else {
            dataNullOffset = CodePointTrie.NO_DATA_NULL_OFFSET;
        }

        int indexLength = compactIndex(fastILimit, mixedBlocks);
        highStart = realHighStart;
        return indexLength;
    }

    private CodePointTrie build(CodePointTrie.Type type, CodePointTrie.ValueWidth valueWidth) {
        // The mutable trie always stores 32-bit values.
        // When we build a UCPTrie for a smaller value width, we first mask off unused bits
        // before compacting the data.
        switch (valueWidth) {
            case BITS_32:
                break;
            case BITS_16:
                maskValues(0xffff);
                break;
            case BITS_8:
                maskValues(0xff);
                break;
            default:
                // Should be unreachable.
                throw new IllegalArgumentException();
        }

        int fastLimit = type == CodePointTrie.Type.FAST ? BMP_LIMIT : CodePointTrie.SMALL_LIMIT;
        int indexLength = compactTrie(fastLimit >> CodePointTrie.SHIFT_3);

        // Ensure data table alignment: The index length must be even for uint32_t data.
        if (valueWidth == CodePointTrie.ValueWidth.BITS_32 && (indexLength & 1) != 0) {
            index16[indexLength++] = 0xffee; // arbitrary value
        }

        // Make the total trie structure length a multiple of 4 bytes by padding the data table,
        // and store special values as the last two data values.
        int length = indexLength * 2;
        if (valueWidth == CodePointTrie.ValueWidth.BITS_16) {
            if (((indexLength ^ dataLength) & 1) != 0) {
                // padding
                data[dataLength++] = errorValue;
            }
            if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) {
                data[dataLength++] = highValue;
                data[dataLength++] = errorValue;
            }
            length += dataLength * 2;
        } else if (valueWidth == CodePointTrie.ValueWidth.BITS_32) {
            // 32-bit data words never need padding to a multiple of 4 bytes.
            if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) {
                if (data[dataLength - 1] != highValue) {
                    data[dataLength++] = highValue;
                }
                data[dataLength++] = errorValue;
            }
            length += dataLength * 4;
        } else {
            int and3 = (length + dataLength) & 3;
            if (and3 == 0
                    && data[dataLength - 1] == errorValue
                    && data[dataLength - 2] == highValue) {
                // all set
            } else if (and3 == 3 && data[dataLength - 1] == highValue) {
                data[dataLength++] = errorValue;
            } else {
                while (and3 != 2) {
                    data[dataLength++] = highValue;
                    and3 = (and3 + 1) & 3;
                }
                data[dataLength++] = highValue;
                data[dataLength++] = errorValue;
            }
            length += dataLength;
        }
        assert ((length & 3) == 0);

        // Fill the index and data arrays.
        char[] trieIndex;
        if (highStart <= fastLimit) {
            // Condense only the fast index from the mutable-trie index.
            trieIndex = new char[indexLength];
            for (int i = 0, j = 0; j < indexLength; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK, ++j) {
                trieIndex[j] = (char) index[i];
            }
        } else {
            if (indexLength == index16.length) {
                trieIndex = index16;
                index16 = null;
            } else {
                trieIndex = Arrays.copyOf(index16, indexLength);
            }
        }

        // Write the data array.
        switch (valueWidth) {
            case BITS_16:
                {
                    // Write 16-bit data values.
                    char[] data16 = new char[dataLength];
                    for (int i = 0; i < dataLength; ++i) {
                        data16[i] = (char) data[i];
                    }
                    return type == CodePointTrie.Type.FAST
                            ? new CodePointTrie.Fast16(
                                    trieIndex, data16, highStart, index3NullOffset, dataNullOffset)
                            : new CodePointTrie.Small16(
                                    trieIndex, data16, highStart, index3NullOffset, dataNullOffset);
                }
            case BITS_32:
                {
                    // Write 32-bit data values.
                    int[] data32 = Arrays.copyOf(data, dataLength);
                    return type == CodePointTrie.Type.FAST
                            ? new CodePointTrie.Fast32(
                                    trieIndex, data32, highStart, index3NullOffset, dataNullOffset)
                            : new CodePointTrie.Small32(
                                    trieIndex, data32, highStart, index3NullOffset, dataNullOffset);
                }
            case BITS_8:
                {
                    // Write 8-bit data values.
                    byte[] data8 = new byte[dataLength];
                    for (int i = 0; i < dataLength; ++i) {
                        data8[i] = (byte) data[i];
                    }
                    return type == CodePointTrie.Type.FAST
                            ? new CodePointTrie.Fast8(
                                    trieIndex, data8, highStart, index3NullOffset, dataNullOffset)
                            : new CodePointTrie.Small8(
                                    trieIndex, data8, highStart, index3NullOffset, dataNullOffset);
                }
            default:
                // Should be unreachable.
                throw new IllegalArgumentException();
        }
    }
}