CodePointTrie.java

// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

// created: 2018may04 Markus W. Scherer

package com.ibm.icu.util;

import com.ibm.icu.impl.ICUBinary;
import com.ibm.icu.impl.Normalizer2Impl.UTF16Plus;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;

/**
 * Immutable Unicode code point trie. Fast, reasonably compact, map from Unicode code points
 * (U+0000..U+10FFFF) to integer values. For details see https://icu.unicode.org/design/struct/utrie
 *
 * <p>This class is not intended for public subclassing.
 *
 * @see MutableCodePointTrie
 * @stable ICU 63
 */
public abstract class CodePointTrie extends CodePointMap {
    /**
     * Selectors for the type of a CodePointTrie. Different trade-offs for size vs. speed.
     *
     * <p>Use null for {@link #fromBinary} to accept any type; {@link #getType} will return the
     * actual type.
     *
     * @see MutableCodePointTrie#buildImmutable(CodePointTrie.Type, CodePointTrie.ValueWidth)
     * @see #fromBinary
     * @see #getType
     * @stable ICU 63
     */
    public enum Type {
        /**
         * Fast/simple/larger BMP data structure. The {@link Fast} subclasses have additional
         * functions for lookup for BMP and supplementary code points.
         *
         * @see Fast
         * @stable ICU 63
         */
        FAST,
        /**
         * Small/slower BMP data structure.
         *
         * @see Small
         * @stable ICU 63
         */
        SMALL
    }

    /**
     * Selectors for the number of bits in a CodePointTrie data value.
     *
     * <p>Use null for {@link #fromBinary} to accept any data value width; {@link #getValueWidth}
     * will return the actual data value width.
     *
     * @stable ICU 63
     */
    public enum ValueWidth {
        /**
         * The trie stores 16 bits per data value. It returns them as unsigned values
         * 0..0xffff=65535.
         *
         * @stable ICU 63
         */
        BITS_16,
        /**
         * The trie stores 32 bits per data value.
         *
         * @stable ICU 63
         */
        BITS_32,
        /**
         * The trie stores 8 bits per data value. It returns them as unsigned values 0..0xff=255.
         *
         * @stable ICU 63
         */
        BITS_8
    }

    private CodePointTrie(
            char[] index, Data data, int highStart, int index3NullOffset, int dataNullOffset) {
        this.ascii = new int[ASCII_LIMIT];
        this.index = index;
        this.data = data;
        this.dataLength = data.getDataLength();
        this.highStart = highStart;
        this.index3NullOffset = index3NullOffset;
        this.dataNullOffset = dataNullOffset;

        for (int c = 0; c < ASCII_LIMIT; ++c) {
            ascii[c] = data.getFromIndex(c);
        }

        int nullValueOffset = dataNullOffset;
        if (nullValueOffset >= dataLength) {
            nullValueOffset = dataLength - HIGH_VALUE_NEG_DATA_OFFSET;
        }
        nullValue = data.getFromIndex(nullValueOffset);
    }

    /**
     * Creates a trie from its binary form, stored in the ByteBuffer starting at the current
     * position. Advances the buffer position to just after the trie data. Inverse of {@link
     * #toBinary(OutputStream)}.
     *
     * <p>The data is copied from the buffer; later modification of the buffer will not affect the
     * trie.
     *
     * @param type selects the trie type; this method throws an exception if the type does not match
     *     the binary data; use null to accept any type
     * @param valueWidth selects the number of bits in a data value; this method throws an exception
     *     if the valueWidth does not match the binary data; use null to accept any data value width
     * @param bytes a buffer containing the binary data of a CodePointTrie
     * @return the trie
     * @see MutableCodePointTrie#MutableCodePointTrie(int, int)
     * @see MutableCodePointTrie#buildImmutable(CodePointTrie.Type, CodePointTrie.ValueWidth)
     * @see #toBinary(OutputStream)
     * @stable ICU 63
     */
    public static CodePointTrie fromBinary(Type type, ValueWidth valueWidth, ByteBuffer bytes) {
        ByteOrder outerByteOrder = bytes.order();
        try {
            // Enough data for a trie header?
            if (bytes.remaining() < 16 /* sizeof(UCPTrieHeader) */) {
                throw new ICUUncheckedIOException("Buffer too short for a CodePointTrie header");
            }

            // struct UCPTrieHeader
            /** "Tri3" in big-endian US-ASCII (0x54726933) */
            int signature = bytes.getInt();

            // Check the signature.
            switch (signature) {
                case 0x54726933:
                    // The buffer is already set to the trie data byte order.
                    break;
                case 0x33697254:
                    // Temporarily reverse the byte order.
                    boolean isBigEndian = outerByteOrder == ByteOrder.BIG_ENDIAN;
                    bytes.order(isBigEndian ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN);
                    signature = 0x54726933;
                    break;
                default:
                    throw new ICUUncheckedIOException(
                            "Buffer does not contain a serialized CodePointTrie");
            }

            // struct UCPTrieHeader continued
            /**
             * Options bit field: Bits 15..12: Data length bits 19..16. Bits 11..8: Data null block
             * offset bits 19..16. Bits 7..6: UCPTrieType Bits 5..3: Reserved (0). Bits 2..0:
             * UCPTrieValueWidth
             */
            int options = bytes.getChar();

            /** Total length of the index tables. */
            int indexLength = bytes.getChar();

            /** Data length bits 15..0. */
            int dataLength = bytes.getChar();

            /** Index-3 null block offset, 0x7fff or 0xffff if none. */
            int index3NullOffset = bytes.getChar();

            /** Data null block offset bits 15..0, 0xfffff if none. */
            int dataNullOffset = bytes.getChar();

            /**
             * First code point of the single-value range ending with U+10ffff, rounded up and then
             * shifted right by SHIFT_2.
             */
            int shiftedHighStart = bytes.getChar();
            // struct UCPTrieHeader end

            int typeInt = (options >> 6) & 3;
            Type actualType;
            switch (typeInt) {
                case 0:
                    actualType = Type.FAST;
                    break;
                case 1:
                    actualType = Type.SMALL;
                    break;
                default:
                    throw new ICUUncheckedIOException(
                            "CodePointTrie data header has an unsupported type");
            }

            int valueWidthInt = options & OPTIONS_VALUE_BITS_MASK;
            ValueWidth actualValueWidth;
            switch (valueWidthInt) {
                case 0:
                    actualValueWidth = ValueWidth.BITS_16;
                    break;
                case 1:
                    actualValueWidth = ValueWidth.BITS_32;
                    break;
                case 2:
                    actualValueWidth = ValueWidth.BITS_8;
                    break;
                default:
                    throw new ICUUncheckedIOException(
                            "CodePointTrie data header has an unsupported value width");
            }

            if ((options & OPTIONS_RESERVED_MASK) != 0) {
                throw new ICUUncheckedIOException(
                        "CodePointTrie data header has unsupported options");
            }

            if (type == null) {
                type = actualType;
            }
            if (valueWidth == null) {
                valueWidth = actualValueWidth;
            }
            if (type != actualType || valueWidth != actualValueWidth) {
                throw new ICUUncheckedIOException(
                        "CodePointTrie data header has a different type or value width than required");
            }

            // Get the length values and offsets.
            dataLength |= ((options & OPTIONS_DATA_LENGTH_MASK) << 4);
            dataNullOffset |= ((options & OPTIONS_DATA_NULL_OFFSET_MASK) << 8);

            int highStart = shiftedHighStart << SHIFT_2;

            // Calculate the actual length, minus the header.
            int actualLength = indexLength * 2;
            if (valueWidth == ValueWidth.BITS_16) {
                actualLength += dataLength * 2;
            } else if (valueWidth == ValueWidth.BITS_32) {
                actualLength += dataLength * 4;
            } else {
                actualLength += dataLength;
            }
            if (bytes.remaining() < actualLength) {
                throw new ICUUncheckedIOException("Buffer too short for the CodePointTrie data");
            }

            char[] index = ICUBinary.getChars(bytes, indexLength, 0);
            switch (valueWidth) {
                case BITS_16:
                    {
                        char[] data16 = ICUBinary.getChars(bytes, dataLength, 0);
                        return type == Type.FAST
                                ? new Fast16(
                                        index, data16, highStart, index3NullOffset, dataNullOffset)
                                : new Small16(
                                        index, data16, highStart, index3NullOffset, dataNullOffset);
                    }
                case BITS_32:
                    {
                        int[] data32 = ICUBinary.getInts(bytes, dataLength, 0);
                        return type == Type.FAST
                                ? new Fast32(
                                        index, data32, highStart, index3NullOffset, dataNullOffset)
                                : new Small32(
                                        index, data32, highStart, index3NullOffset, dataNullOffset);
                    }
                case BITS_8:
                    {
                        byte[] data8 = ICUBinary.getBytes(bytes, dataLength, 0);
                        return type == Type.FAST
                                ? new Fast8(
                                        index, data8, highStart, index3NullOffset, dataNullOffset)
                                : new Small8(
                                        index, data8, highStart, index3NullOffset, dataNullOffset);
                    }
                default:
                    throw new AssertionError("should be unreachable");
            }
        } finally {
            bytes.order(outerByteOrder);
        }
    }

    /**
     * Returns the trie type.
     *
     * @return the trie type
     * @stable ICU 63
     */
    public abstract Type getType();

    /**
     * Returns the number of bits in a trie data value.
     *
     * @return the number of bits in a trie data value
     * @stable ICU 63
     */
    public final ValueWidth getValueWidth() {
        return data.getValueWidth();
    }

    /**
     * {@inheritDoc}
     *
     * @stable ICU 63
     */
    @Override
    public int get(int c) {
        return data.getFromIndex(cpIndex(c));
    }

    /**
     * Returns a trie value for an ASCII code point, without range checking.
     *
     * @param c the input code point; must be U+0000..U+007F
     * @return The ASCII code point's trie value.
     * @stable ICU 63
     */
    public final int asciiGet(int c) {
        return ascii[c];
    }

    private static final int MAX_UNICODE = 0x10ffff;

    private static final int ASCII_LIMIT = 0x80;

    private static final int maybeFilterValue(
            int value, int trieNullValue, int nullValue, ValueFilter filter) {
        if (value == trieNullValue) {
            value = nullValue;
        } else if (filter != null) {
            value = filter.apply(value);
        }
        return value;
    }

    /**
     * {@inheritDoc}
     *
     * @stable ICU 63
     */
    @Override
    public final boolean getRange(int start, ValueFilter filter, Range range) {
        if (start < 0 || MAX_UNICODE < start) {
            return false;
        }
        if (start >= highStart) {
            int di = dataLength - HIGH_VALUE_NEG_DATA_OFFSET;
            int value = data.getFromIndex(di);
            if (filter != null) {
                value = filter.apply(value);
            }
            range.set(start, MAX_UNICODE, value);
            return true;
        }

        int nullValue = this.nullValue;
        if (filter != null) {
            nullValue = filter.apply(nullValue);
        }
        Type type = getType();

        int prevI3Block = -1;
        int prevBlock = -1;
        int c = start;
        // Initialize to make compiler happy. Real value when haveValue is true.
        int trieValue = 0, value = 0;
        boolean haveValue = false;
        do {
            int i3Block;
            int i3;
            int i3BlockLength;
            int dataBlockLength;
            if (c <= 0xffff && (type == Type.FAST || c <= SMALL_MAX)) {
                i3Block = 0;
                i3 = c >> FAST_SHIFT;
                i3BlockLength = type == Type.FAST ? BMP_INDEX_LENGTH : SMALL_INDEX_LENGTH;
                dataBlockLength = FAST_DATA_BLOCK_LENGTH;
            } else {
                // Use the multi-stage index.
                int i1 = c >> SHIFT_1;
                if (type == Type.FAST) {
                    assert (0xffff < c && c < highStart);
                    i1 += BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH;
                } else {
                    assert (c < highStart && highStart > SMALL_LIMIT);
                    i1 += SMALL_INDEX_LENGTH;
                }
                i3Block = index[index[i1] + ((c >> SHIFT_2) & INDEX_2_MASK)];
                if (i3Block == prevI3Block && (c - start) >= CP_PER_INDEX_2_ENTRY) {
                    // The index-3 block is the same as the previous one, and filled with value.
                    assert ((c & (CP_PER_INDEX_2_ENTRY - 1)) == 0);
                    c += CP_PER_INDEX_2_ENTRY;
                    continue;
                }
                prevI3Block = i3Block;
                if (i3Block == index3NullOffset) {
                    // This is the index-3 null block.
                    if (haveValue) {
                        if (nullValue != value) {
                            range.set(start, c - 1, value);
                            return true;
                        }
                    } else {
                        trieValue = this.nullValue;
                        value = nullValue;
                        haveValue = true;
                    }
                    prevBlock = dataNullOffset;
                    c = (c + CP_PER_INDEX_2_ENTRY) & ~(CP_PER_INDEX_2_ENTRY - 1);
                    continue;
                }
                i3 = (c >> SHIFT_3) & INDEX_3_MASK;
                i3BlockLength = INDEX_3_BLOCK_LENGTH;
                dataBlockLength = SMALL_DATA_BLOCK_LENGTH;
            }
            // Enumerate data blocks for one index-3 block.
            do {
                int block;
                if ((i3Block & 0x8000) == 0) {
                    block = index[i3Block + i3];
                } else {
                    // 18-bit indexes stored in groups of 9 entries per 8 indexes.
                    int group = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3);
                    int gi = i3 & 7;
                    block = (index[group++] << (2 + (2 * gi))) & 0x30000;
                    block |= index[group + gi];
                }
                if (block == prevBlock && (c - start) >= dataBlockLength) {
                    // The block is the same as the previous one, and filled with value.
                    assert ((c & (dataBlockLength - 1)) == 0);
                    c += dataBlockLength;
                } else {
                    int dataMask = dataBlockLength - 1;
                    prevBlock = block;
                    if (block == dataNullOffset) {
                        // This is the data null block.
                        if (haveValue) {
                            if (nullValue != value) {
                                range.set(start, c - 1, value);
                                return true;
                            }
                        } else {
                            trieValue = this.nullValue;
                            value = nullValue;
                            haveValue = true;
                        }
                        c = (c + dataBlockLength) & ~dataMask;
                    } else {
                        int di = block + (c & dataMask);
                        int trieValue2 = data.getFromIndex(di);
                        if (haveValue) {
                            if (trieValue2 != trieValue) {
                                if (filter == null
                                        || maybeFilterValue(
                                                        trieValue2,
                                                        this.nullValue,
                                                        nullValue,
                                                        filter)
                                                != value) {
                                    range.set(start, c - 1, value);
                                    return true;
                                }
                                trieValue = trieValue2; // may or may not help
                            }
                        } else {
                            trieValue = trieValue2;
                            value = maybeFilterValue(trieValue2, this.nullValue, nullValue, filter);
                            haveValue = true;
                        }
                        while ((++c & dataMask) != 0) {
                            trieValue2 = data.getFromIndex(++di);
                            if (trieValue2 != trieValue) {
                                if (filter == null
                                        || maybeFilterValue(
                                                        trieValue2,
                                                        this.nullValue,
                                                        nullValue,
                                                        filter)
                                                != value) {
                                    range.set(start, c - 1, value);
                                    return true;
                                }
                                trieValue = trieValue2; // may or may not help
                            }
                        }
                    }
                }
            } while (++i3 < i3BlockLength);
        } while (c < highStart);
        assert (haveValue);
        int di = dataLength - HIGH_VALUE_NEG_DATA_OFFSET;
        int highValue = data.getFromIndex(di);
        if (maybeFilterValue(highValue, this.nullValue, nullValue, filter) != value) {
            --c;
        } else {
            c = MAX_UNICODE;
        }
        range.set(start, c, value);
        return true;
    }

    /**
     * Writes a representation of the trie to the output stream. Inverse of {@link #fromBinary}.
     *
     * @param os the output stream
     * @return the number of bytes written
     * @stable ICU 63
     */
    public final int toBinary(OutputStream os) {
        try {
            DataOutputStream dos = new DataOutputStream(os);

            // Write the UCPTrieHeader
            dos.writeInt(0x54726933); // signature="Tri3"
            dos.writeChar( // options
                    ((dataLength & 0xf0000) >> 4)
                            | ((dataNullOffset & 0xf0000) >> 8)
                            | (getType().ordinal() << 6)
                            | getValueWidth().ordinal());
            dos.writeChar(index.length);
            dos.writeChar(dataLength);
            dos.writeChar(index3NullOffset);
            dos.writeChar(dataNullOffset);
            dos.writeChar(highStart >> SHIFT_2); // shiftedHighStart
            int length = 16; // sizeof(UCPTrieHeader)

            for (char i : index) {
                dos.writeChar(i);
            }
            length += index.length * 2;
            length += data.write(dos);
            return length;
        } catch (IOException e) {
            throw new ICUUncheckedIOException(e);
        }
    }

    /**
     * @internal
     */
    static final int FAST_SHIFT = 6;

    /** Number of entries in a data block for code points below the fast limit. 64=0x40 @internal */
    static final int FAST_DATA_BLOCK_LENGTH = 1 << FAST_SHIFT;

    /** Mask for getting the lower bits for the in-fast-data-block offset. @internal */
    private static final int FAST_DATA_MASK = FAST_DATA_BLOCK_LENGTH - 1;

    /**
     * @internal
     */
    private static final int SMALL_MAX = 0xfff;

    /**
     * Offset from dataLength (to be subtracted) for fetching the value returned for out-of-range
     * code points and ill-formed UTF-8/16.
     *
     * @internal
     */
    private static final int ERROR_VALUE_NEG_DATA_OFFSET = 1;

    /**
     * Offset from dataLength (to be subtracted) for fetching the value returned for code points
     * highStart..U+10FFFF.
     *
     * @internal
     */
    private static final int HIGH_VALUE_NEG_DATA_OFFSET = 2;

    // ucptrie_impl.h

    /** The length of the BMP index table. 1024=0x400 */
    private static final int BMP_INDEX_LENGTH = 0x10000 >> FAST_SHIFT;

    static final int SMALL_LIMIT = 0x1000;
    private static final int SMALL_INDEX_LENGTH = SMALL_LIMIT >> FAST_SHIFT;

    /** Shift size for getting the index-3 table offset. */
    static final int SHIFT_3 = 4;

    /** Shift size for getting the index-2 table offset. */
    private static final int SHIFT_2 = 5 + SHIFT_3;

    /** Shift size for getting the index-1 table offset. */
    private static final int SHIFT_1 = 5 + SHIFT_2;

    /**
     * Difference between two shift sizes, for getting an index-2 offset from an index-3 offset.
     * 5=9-4
     */
    static final int SHIFT_2_3 = SHIFT_2 - SHIFT_3;

    /**
     * Difference between two shift sizes, for getting an index-1 offset from an index-2 offset.
     * 5=14-9
     */
    static final int SHIFT_1_2 = SHIFT_1 - SHIFT_2;

    /**
     * Number of index-1 entries for the BMP. (4) This part of the index-1 table is omitted from the
     * serialized form.
     */
    private static final int OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> SHIFT_1;

    /** Number of entries in an index-2 block. 32=0x20 */
    static final int INDEX_2_BLOCK_LENGTH = 1 << SHIFT_1_2;

    /** Mask for getting the lower bits for the in-index-2-block offset. */
    static final int INDEX_2_MASK = INDEX_2_BLOCK_LENGTH - 1;

    /** Number of code points per index-2 table entry. 512=0x200 */
    static final int CP_PER_INDEX_2_ENTRY = 1 << SHIFT_2;

    /** Number of entries in an index-3 block. 32=0x20 */
    static final int INDEX_3_BLOCK_LENGTH = 1 << SHIFT_2_3;

    /** Mask for getting the lower bits for the in-index-3-block offset. */
    private static final int INDEX_3_MASK = INDEX_3_BLOCK_LENGTH - 1;

    /** Number of entries in a small data block. 16=0x10 */
    static final int SMALL_DATA_BLOCK_LENGTH = 1 << SHIFT_3;

    /** Mask for getting the lower bits for the in-small-data-block offset. */
    static final int SMALL_DATA_MASK = SMALL_DATA_BLOCK_LENGTH - 1;

    // ucptrie_impl.h: Constants for use with UCPTrieHeader.options.
    private static final int OPTIONS_DATA_LENGTH_MASK = 0xf000;
    private static final int OPTIONS_DATA_NULL_OFFSET_MASK = 0xf00;
    private static final int OPTIONS_RESERVED_MASK = 0x38;
    private static final int OPTIONS_VALUE_BITS_MASK = 7;

    /**
     * Value for index3NullOffset which indicates that there is no index-3 null block. Bit 15 is
     * unused for this value because this bit is used if the index-3 contains 18-bit indexes.
     */
    static final int NO_INDEX3_NULL_OFFSET = 0x7fff;

    static final int NO_DATA_NULL_OFFSET = 0xfffff;

    private abstract static class Data {
        abstract ValueWidth getValueWidth();

        abstract int getDataLength();

        abstract int getFromIndex(int index);

        abstract int write(DataOutputStream dos) throws IOException;
    }

    private static final class Data16 extends Data {
        char[] array;

        Data16(char[] a) {
            array = a;
        }

        @Override
        ValueWidth getValueWidth() {
            return ValueWidth.BITS_16;
        }

        @Override
        int getDataLength() {
            return array.length;
        }

        @Override
        int getFromIndex(int index) {
            return array[index];
        }

        @Override
        int write(DataOutputStream dos) throws IOException {
            for (char v : array) {
                dos.writeChar(v);
            }
            return array.length * 2;
        }
    }

    private static final class Data32 extends Data {
        int[] array;

        Data32(int[] a) {
            array = a;
        }

        @Override
        ValueWidth getValueWidth() {
            return ValueWidth.BITS_32;
        }

        @Override
        int getDataLength() {
            return array.length;
        }

        @Override
        int getFromIndex(int index) {
            return array[index];
        }

        @Override
        int write(DataOutputStream dos) throws IOException {
            for (int v : array) {
                dos.writeInt(v);
            }
            return array.length * 4;
        }
    }

    private static final class Data8 extends Data {
        byte[] array;

        Data8(byte[] a) {
            array = a;
        }

        @Override
        ValueWidth getValueWidth() {
            return ValueWidth.BITS_8;
        }

        @Override
        int getDataLength() {
            return array.length;
        }

        @Override
        int getFromIndex(int index) {
            return array[index] & 0xff;
        }

        @Override
        int write(DataOutputStream dos) throws IOException {
            for (byte v : array) {
                dos.writeByte(v);
            }
            return array.length;
        }
    }

    /**
     * @internal
     */
    private final int[] ascii;

    /**
     * @internal
     */
    private final char[] index;

    /**
     * @internal
     * @deprecated This API is ICU internal only.
     */
    @Deprecated protected final Data data;

    /**
     * @internal
     * @deprecated This API is ICU internal only.
     */
    @Deprecated protected final int dataLength;

    /**
     * Start of the last range which ends at U+10FFFF.
     *
     * @internal
     * @deprecated This API is ICU internal only.
     */
    @Deprecated protected final int highStart;

    /**
     * Internal index-3 null block offset. Set to an impossibly high value (e.g., 0xffff) if there
     * is no dedicated index-3 null block.
     *
     * @internal
     */
    private final int index3NullOffset;

    /**
     * Internal data null block offset, not shifted. Set to an impossibly high value (e.g., 0xfffff)
     * if there is no dedicated data null block.
     *
     * @internal
     */
    private final int dataNullOffset;

    /**
     * @internal
     */
    private final int nullValue;

    /**
     * @internal
     * @deprecated This API is ICU internal only.
     */
    @Deprecated
    protected final int fastIndex(int c) {
        return index[c >> FAST_SHIFT] + (c & FAST_DATA_MASK);
    }

    /**
     * @internal
     * @deprecated This API is ICU internal only.
     */
    @Deprecated
    protected final int smallIndex(Type type, int c) {
        // Split into two methods to make this part inline-friendly.
        // In C, this part is a macro.
        if (c >= highStart) {
            return dataLength - HIGH_VALUE_NEG_DATA_OFFSET;
        }
        return internalSmallIndex(type, c);
    }

    private final int internalSmallIndex(Type type, int c) {
        int i1 = c >> SHIFT_1;
        if (type == Type.FAST) {
            assert (0xffff < c && c < highStart);
            i1 += BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH;
        } else {
            assert (0 <= c && c < highStart && highStart > SMALL_LIMIT);
            i1 += SMALL_INDEX_LENGTH;
        }
        int i3Block = index[index[i1] + ((c >> SHIFT_2) & INDEX_2_MASK)];
        int i3 = (c >> SHIFT_3) & INDEX_3_MASK;
        int dataBlock;
        if ((i3Block & 0x8000) == 0) {
            // 16-bit indexes
            dataBlock = index[i3Block + i3];
        } else {
            // 18-bit indexes stored in groups of 9 entries per 8 indexes.
            i3Block = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3);
            i3 &= 7;
            dataBlock = (index[i3Block++] << (2 + (2 * i3))) & 0x30000;
            dataBlock |= index[i3Block + i3];
        }
        return dataBlock + (c & SMALL_DATA_MASK);
    }

    /**
     * @internal
     * @deprecated This API is ICU internal only.
     */
    @Deprecated
    protected abstract int cpIndex(int c);

    /**
     * A CodePointTrie with {@link Type#FAST}.
     *
     * @stable ICU 63
     */
    public abstract static class Fast extends CodePointTrie {
        private Fast(
                char[] index, Data data, int highStart, int index3NullOffset, int dataNullOffset) {
            super(index, data, highStart, index3NullOffset, dataNullOffset);
        }

        /**
         * Creates a trie from its binary form. Same as {@link CodePointTrie#fromBinary(Type,
         * ValueWidth, ByteBuffer)} with {@link Type#FAST}.
         *
         * @param valueWidth selects the number of bits in a data value; this method throws an
         *     exception if the valueWidth does not match the binary data; use null to accept any
         *     data value width
         * @param bytes a buffer containing the binary data of a CodePointTrie
         * @return the trie
         * @stable ICU 63
         */
        public static Fast fromBinary(ValueWidth valueWidth, ByteBuffer bytes) {
            return (Fast) CodePointTrie.fromBinary(Type.FAST, valueWidth, bytes);
        }

        /**
         * @return {@link Type#FAST}
         * @stable ICU 63
         */
        @Override
        public final Type getType() {
            return Type.FAST;
        }

        /**
         * Returns a trie value for a BMP code point (U+0000..U+FFFF), without range checking. Can
         * be used to look up a value for a UTF-16 code unit if other parts of the string processing
         * check for surrogates.
         *
         * @param c the input code point, must be U+0000..U+FFFF
         * @return The BMP code point's trie value.
         * @stable ICU 63
         */
        public abstract int bmpGet(int c);

        /**
         * Returns a trie value for a supplementary code point (U+10000..U+10FFFF), without range
         * checking.
         *
         * @param c the input code point, must be U+10000..U+10FFFF
         * @return The supplementary code point's trie value.
         * @stable ICU 63
         */
        public abstract int suppGet(int c);

        /**
         * @internal
         * @deprecated This API is ICU internal only.
         */
        @Deprecated
        @Override
        protected final int cpIndex(int c) {
            if (c >= 0) {
                if (c <= 0xffff) {
                    return fastIndex(c);
                } else if (c <= 0x10ffff) {
                    return smallIndex(Type.FAST, c);
                }
            }
            return dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
        }

        /**
         * {@inheritDoc}
         *
         * @stable ICU 63
         */
        @Override
        public final StringIterator stringIterator(CharSequence s, int sIndex) {
            return new FastStringIterator(s, sIndex);
        }

        private final class FastStringIterator extends StringIterator {
            private FastStringIterator(CharSequence s, int sIndex) {
                super(s, sIndex);
            }

            @Override
            public boolean next() {
                if (sIndex >= s.length()) {
                    return false;
                }
                char lead = s.charAt(sIndex++);
                c = lead;
                int dataIndex;
                if (!Character.isSurrogate(lead)) {
                    dataIndex = fastIndex(c);
                } else {
                    char trail;
                    if (UTF16Plus.isSurrogateLead(lead)
                            && sIndex < s.length()
                            && Character.isLowSurrogate(trail = s.charAt(sIndex))) {
                        ++sIndex;
                        c = Character.toCodePoint(lead, trail);
                        dataIndex = smallIndex(Type.FAST, c);
                    } else {
                        dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
                    }
                }
                value = data.getFromIndex(dataIndex);
                return true;
            }

            @Override
            public boolean previous() {
                if (sIndex <= 0) {
                    return false;
                }
                char trail = s.charAt(--sIndex);
                c = trail;
                int dataIndex;
                if (!Character.isSurrogate(trail)) {
                    dataIndex = fastIndex(c);
                } else {
                    char lead;
                    if (!UTF16Plus.isSurrogateLead(trail)
                            && sIndex > 0
                            && Character.isHighSurrogate(lead = s.charAt(sIndex - 1))) {
                        --sIndex;
                        c = Character.toCodePoint(lead, trail);
                        dataIndex = smallIndex(Type.FAST, c);
                    } else {
                        dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
                    }
                }
                value = data.getFromIndex(dataIndex);
                return true;
            }
        }
    }

    /**
     * A CodePointTrie with {@link Type#SMALL}.
     *
     * @stable ICU 63
     */
    public abstract static class Small extends CodePointTrie {
        private Small(
                char[] index, Data data, int highStart, int index3NullOffset, int dataNullOffset) {
            super(index, data, highStart, index3NullOffset, dataNullOffset);
        }

        /**
         * Creates a trie from its binary form. Same as {@link CodePointTrie#fromBinary(Type,
         * ValueWidth, ByteBuffer)} with {@link Type#SMALL}.
         *
         * @param valueWidth selects the number of bits in a data value; this method throws an
         *     exception if the valueWidth does not match the binary data; use null to accept any
         *     data value width
         * @param bytes a buffer containing the binary data of a CodePointTrie
         * @return the trie
         * @stable ICU 63
         */
        public static Small fromBinary(ValueWidth valueWidth, ByteBuffer bytes) {
            return (Small) CodePointTrie.fromBinary(Type.SMALL, valueWidth, bytes);
        }

        /**
         * @return {@link Type#SMALL}
         * @stable ICU 63
         */
        @Override
        public final Type getType() {
            return Type.SMALL;
        }

        /**
         * @internal
         * @deprecated This API is ICU internal only.
         */
        @Deprecated
        @Override
        protected final int cpIndex(int c) {
            if (c >= 0) {
                if (c <= SMALL_MAX) {
                    return fastIndex(c);
                } else if (c <= 0x10ffff) {
                    return smallIndex(Type.SMALL, c);
                }
            }
            return dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
        }

        /**
         * {@inheritDoc}
         *
         * @stable ICU 63
         */
        @Override
        public final StringIterator stringIterator(CharSequence s, int sIndex) {
            return new SmallStringIterator(s, sIndex);
        }

        private final class SmallStringIterator extends StringIterator {
            private SmallStringIterator(CharSequence s, int sIndex) {
                super(s, sIndex);
            }

            @Override
            public boolean next() {
                if (sIndex >= s.length()) {
                    return false;
                }
                char lead = s.charAt(sIndex++);
                c = lead;
                int dataIndex;
                if (!Character.isSurrogate(lead)) {
                    dataIndex = cpIndex(c);
                } else {
                    char trail;
                    if (UTF16Plus.isSurrogateLead(lead)
                            && sIndex < s.length()
                            && Character.isLowSurrogate(trail = s.charAt(sIndex))) {
                        ++sIndex;
                        c = Character.toCodePoint(lead, trail);
                        dataIndex = smallIndex(Type.SMALL, c);
                    } else {
                        dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
                    }
                }
                value = data.getFromIndex(dataIndex);
                return true;
            }

            @Override
            public boolean previous() {
                if (sIndex <= 0) {
                    return false;
                }
                char trail = s.charAt(--sIndex);
                c = trail;
                int dataIndex;
                if (!Character.isSurrogate(trail)) {
                    dataIndex = cpIndex(c);
                } else {
                    char lead;
                    if (!UTF16Plus.isSurrogateLead(trail)
                            && sIndex > 0
                            && Character.isHighSurrogate(lead = s.charAt(sIndex - 1))) {
                        --sIndex;
                        c = Character.toCodePoint(lead, trail);
                        dataIndex = smallIndex(Type.SMALL, c);
                    } else {
                        dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
                    }
                }
                value = data.getFromIndex(dataIndex);
                return true;
            }
        }
    }

    /**
     * A CodePointTrie with {@link Type#FAST} and {@link ValueWidth#BITS_16}.
     *
     * @stable ICU 63
     */
    public static final class Fast16 extends Fast {
        private final char[] dataArray;

        Fast16(
                char[] index,
                char[] data16,
                int highStart,
                int index3NullOffset,
                int dataNullOffset) {
            super(index, new Data16(data16), highStart, index3NullOffset, dataNullOffset);
            this.dataArray = data16;
        }

        /**
         * Creates a trie from its binary form. Same as {@link CodePointTrie#fromBinary(Type,
         * ValueWidth, ByteBuffer)} with {@link Type#FAST} and {@link ValueWidth#BITS_16}.
         *
         * @param bytes a buffer containing the binary data of a CodePointTrie
         * @return the trie
         * @stable ICU 63
         */
        public static Fast16 fromBinary(ByteBuffer bytes) {
            return (Fast16) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_16, bytes);
        }

        /**
         * {@inheritDoc}
         *
         * @stable ICU 63
         */
        @Override
        public final int get(int c) {
            return dataArray[cpIndex(c)];
        }

        /**
         * {@inheritDoc}
         *
         * @stable ICU 63
         */
        @Override
        public final int bmpGet(int c) {
            assert 0 <= c && c <= 0xffff;
            return dataArray[fastIndex(c)];
        }

        /**
         * {@inheritDoc}
         *
         * @stable ICU 63
         */
        @Override
        public final int suppGet(int c) {
            assert 0x10000 <= c && c <= 0x10ffff;
            return dataArray[smallIndex(Type.FAST, c)];
        }
    }

    /**
     * A CodePointTrie with {@link Type#FAST} and {@link ValueWidth#BITS_32}.
     *
     * @stable ICU 63
     */
    public static final class Fast32 extends Fast {
        private final int[] dataArray;

        Fast32(
                char[] index,
                int[] data32,
                int highStart,
                int index3NullOffset,
                int dataNullOffset) {
            super(index, new Data32(data32), highStart, index3NullOffset, dataNullOffset);
            this.dataArray = data32;
        }

        /**
         * Creates a trie from its binary form. Same as {@link CodePointTrie#fromBinary(Type,
         * ValueWidth, ByteBuffer)} with {@link Type#FAST} and {@link ValueWidth#BITS_32}.
         *
         * @param bytes a buffer containing the binary data of a CodePointTrie
         * @return the trie
         * @stable ICU 63
         */
        public static Fast32 fromBinary(ByteBuffer bytes) {
            return (Fast32) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_32, bytes);
        }

        /**
         * {@inheritDoc}
         *
         * @stable ICU 63
         */
        @Override
        public final int get(int c) {
            return dataArray[cpIndex(c)];
        }

        /**
         * {@inheritDoc}
         *
         * @stable ICU 63
         */
        @Override
        public final int bmpGet(int c) {
            assert 0 <= c && c <= 0xffff;
            return dataArray[fastIndex(c)];
        }

        /**
         * {@inheritDoc}
         *
         * @stable ICU 63
         */
        @Override
        public final int suppGet(int c) {
            assert 0x10000 <= c && c <= 0x10ffff;
            return dataArray[smallIndex(Type.FAST, c)];
        }
    }

    /**
     * A CodePointTrie with {@link Type#FAST} and {@link ValueWidth#BITS_8}.
     *
     * @stable ICU 63
     */
    public static final class Fast8 extends Fast {
        private final byte[] dataArray;

        Fast8(char[] index, byte[] data8, int highStart, int index3NullOffset, int dataNullOffset) {
            super(index, new Data8(data8), highStart, index3NullOffset, dataNullOffset);
            this.dataArray = data8;
        }

        /**
         * Creates a trie from its binary form. Same as {@link CodePointTrie#fromBinary(Type,
         * ValueWidth, ByteBuffer)} with {@link Type#FAST} and {@link ValueWidth#BITS_8}.
         *
         * @param bytes a buffer containing the binary data of a CodePointTrie
         * @return the trie
         * @stable ICU 63
         */
        public static Fast8 fromBinary(ByteBuffer bytes) {
            return (Fast8) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_8, bytes);
        }

        /**
         * {@inheritDoc}
         *
         * @stable ICU 63
         */
        @Override
        public final int get(int c) {
            return dataArray[cpIndex(c)] & 0xff;
        }

        /**
         * {@inheritDoc}
         *
         * @stable ICU 63
         */
        @Override
        public final int bmpGet(int c) {
            assert 0 <= c && c <= 0xffff;
            return dataArray[fastIndex(c)] & 0xff;
        }

        /**
         * {@inheritDoc}
         *
         * @stable ICU 63
         */
        @Override
        public final int suppGet(int c) {
            assert 0x10000 <= c && c <= 0x10ffff;
            return dataArray[smallIndex(Type.FAST, c)] & 0xff;
        }
    }

    /**
     * A CodePointTrie with {@link Type#SMALL} and {@link ValueWidth#BITS_16}.
     *
     * @stable ICU 63
     */
    public static final class Small16 extends Small {
        Small16(
                char[] index,
                char[] data16,
                int highStart,
                int index3NullOffset,
                int dataNullOffset) {
            super(index, new Data16(data16), highStart, index3NullOffset, dataNullOffset);
        }

        /**
         * Creates a trie from its binary form. Same as {@link CodePointTrie#fromBinary(Type,
         * ValueWidth, ByteBuffer)} with {@link Type#SMALL} and {@link ValueWidth#BITS_16}.
         *
         * @param bytes a buffer containing the binary data of a CodePointTrie
         * @return the trie
         * @stable ICU 63
         */
        public static Small16 fromBinary(ByteBuffer bytes) {
            return (Small16) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_16, bytes);
        }
    }

    /**
     * A CodePointTrie with {@link Type#SMALL} and {@link ValueWidth#BITS_32}.
     *
     * @stable ICU 63
     */
    public static final class Small32 extends Small {
        Small32(
                char[] index,
                int[] data32,
                int highStart,
                int index3NullOffset,
                int dataNullOffset) {
            super(index, new Data32(data32), highStart, index3NullOffset, dataNullOffset);
        }

        /**
         * Creates a trie from its binary form. Same as {@link CodePointTrie#fromBinary(Type,
         * ValueWidth, ByteBuffer)} with {@link Type#SMALL} and {@link ValueWidth#BITS_32}.
         *
         * @param bytes a buffer containing the binary data of a CodePointTrie
         * @return the trie
         * @stable ICU 63
         */
        public static Small32 fromBinary(ByteBuffer bytes) {
            return (Small32) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_32, bytes);
        }
    }

    /**
     * A CodePointTrie with {@link Type#SMALL} and {@link ValueWidth#BITS_8}.
     *
     * @stable ICU 63
     */
    public static final class Small8 extends Small {
        Small8(
                char[] index,
                byte[] data8,
                int highStart,
                int index3NullOffset,
                int dataNullOffset) {
            super(index, new Data8(data8), highStart, index3NullOffset, dataNullOffset);
        }

        /**
         * Creates a trie from its binary form. Same as {@link CodePointTrie#fromBinary(Type,
         * ValueWidth, ByteBuffer)} with {@link Type#SMALL} and {@link ValueWidth#BITS_8}.
         *
         * @param bytes a buffer containing the binary data of a CodePointTrie
         * @return the trie
         * @stable ICU 63
         */
        public static Small8 fromBinary(ByteBuffer bytes) {
            return (Small8) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_8, bytes);
        }
    }
}