CharsTrie.java

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 *******************************************************************************
 *   Copyright (C) 2011-2014, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *******************************************************************************
 *   created on: 2011jan06
 *   created by: Markus W. Scherer
 *   ported from ICU4C ucharstrie.h/.cpp
 */

package com.ibm.icu.util;

import com.ibm.icu.text.UTF16;
import com.ibm.icu.util.BytesTrie.Result;
import java.io.IOException;
import java.util.ArrayList;
import java.util.NoSuchElementException;

/**
 * Light-weight, non-const reader class for a CharsTrie. Traverses a char-serialized data structure
 * with minimal state, for mapping strings (16-bit-unit sequences) to non-negative integer values.
 *
 * <p>This class is not intended for public subclassing.
 *
 * @stable ICU 4.8
 * @author Markus W. Scherer
 */
public final class CharsTrie implements Cloneable, Iterable<CharsTrie.Entry> {
    /**
     * Constructs a CharsTrie reader instance.
     *
     * <p>The CharSequence must contain a copy of a char sequence from the CharsTrieBuilder, with
     * the offset indicating the first char of that sequence. The CharsTrie object will not read
     * more chars than the CharsTrieBuilder generated in the corresponding build() call.
     *
     * <p>The CharSequence is not copied/cloned and must not be modified while the CharsTrie object
     * is in use.
     *
     * @param trieChars CharSequence that contains the serialized trie.
     * @param offset Root offset of the trie in the CharSequence.
     * @stable ICU 4.8
     */
    public CharsTrie(CharSequence trieChars, int offset) {
        chars_ = trieChars;
        pos_ = root_ = offset;
        remainingMatchLength_ = -1;
    }

    /**
     * Copy constructor. Makes a shallow copy of the other trie reader object and its state. Does
     * not copy the char array which will be shared. Same as clone() but without the throws clause.
     *
     * @stable ICU 64
     */
    public CharsTrie(CharsTrie other) {
        chars_ = other.chars_;
        root_ = other.root_;
        pos_ = other.pos_;
        remainingMatchLength_ = other.remainingMatchLength_;
    }

    /**
     * Clones this trie reader object and its state, but not the char array which will be shared.
     *
     * @return A shallow clone of this trie.
     * @stable ICU 4.8
     */
    @Override
    public CharsTrie clone() throws CloneNotSupportedException {
        return (CharsTrie) super.clone(); // A shallow copy is just what we need.
    }

    /**
     * Resets this trie to its initial state.
     *
     * @return this
     * @stable ICU 4.8
     */
    public CharsTrie reset() {
        pos_ = root_;
        remainingMatchLength_ = -1;
        return this;
    }

    /**
     * Returns the state of this trie as a 64-bit integer. The state value is never 0.
     *
     * @return opaque state value
     * @see #resetToState64
     * @stable ICU 64
     */
    public long getState64() {
        return ((long) remainingMatchLength_ << 32) | pos_;
    }

    /**
     * Resets this trie to the saved state. Unlike {@link #resetToState(State)}, the 64-bit state
     * value must be from {@link #getState64()} from the same trie object or from one initialized
     * the exact same way. Because of no validation, this method is faster.
     *
     * @param state The opaque trie state value from getState64().
     * @return this
     * @see #getState64
     * @see #resetToState
     * @see #reset
     * @stable ICU 64
     */
    public CharsTrie resetToState64(long state) {
        remainingMatchLength_ = (int) (state >> 32);
        pos_ = (int) state;
        return this;
    }

    /**
     * CharsTrie state object, for saving a trie's current state and resetting the trie back to this
     * state later.
     *
     * @stable ICU 4.8
     */
    public static final class State {
        /**
         * Constructs an empty State.
         *
         * @stable ICU 4.8
         */
        public State() {}

        private CharSequence chars;
        private int root;
        private int pos;
        private int remainingMatchLength;
    }

    /**
     * Saves the state of this trie.
     *
     * @param state The State object to hold the trie's state.
     * @return this
     * @see #resetToState
     * @stable ICU 4.8
     */
    public CharsTrie saveState(State state) /*const*/ {
        state.chars = chars_;
        state.root = root_;
        state.pos = pos_;
        state.remainingMatchLength = remainingMatchLength_;
        return this;
    }

    /**
     * Resets this trie to the saved state. Slower than {@link #resetToState64(long)} which does not
     * validate the state value.
     *
     * @param state The State object which holds a saved trie state.
     * @return this
     * @throws IllegalArgumentException if the state object contains no state, or the state of a
     *     different trie
     * @see #saveState
     * @see #reset
     * @stable ICU 4.8
     */
    public CharsTrie resetToState(State state) {
        if (chars_ == state.chars && chars_ != null && root_ == state.root) {
            pos_ = state.pos;
            remainingMatchLength_ = state.remainingMatchLength;
        } else {
            throw new IllegalArgumentException("incompatible trie state");
        }
        return this;
    }

    /**
     * Determines whether the string so far matches, whether it has a value, and whether another
     * input char can continue a matching string.
     *
     * @return The match/value Result.
     * @stable ICU 4.8
     */
    public Result current() /*const*/ {
        int pos = pos_;
        if (pos < 0) {
            return Result.NO_MATCH;
        } else {
            int node;
            return (remainingMatchLength_ < 0 && (node = chars_.charAt(pos)) >= kMinValueLead)
                    ? valueResults_[node >> 15]
                    : Result.NO_VALUE;
        }
    }

    /**
     * Traverses the trie from the initial state for this input char. Equivalent to
     * reset().next(inUnit).
     *
     * @param inUnit Input char value. Values below 0 and above 0xffff will never match.
     * @return The match/value Result.
     * @stable ICU 4.8
     */
    public Result first(int inUnit) {
        remainingMatchLength_ = -1;
        return nextImpl(root_, inUnit);
    }

    /**
     * Traverses the trie from the initial state for the one or two UTF-16 code units for this input
     * code point. Equivalent to reset().nextForCodePoint(cp).
     *
     * @param cp A Unicode code point 0..0x10ffff.
     * @return The match/value Result.
     * @stable ICU 4.8
     */
    public Result firstForCodePoint(int cp) {
        return cp <= 0xffff
                ? first(cp)
                : (first(UTF16.getLeadSurrogate(cp)).hasNext()
                        ? next(UTF16.getTrailSurrogate(cp))
                        : Result.NO_MATCH);
    }

    /**
     * Traverses the trie from the current state for this input char.
     *
     * @param inUnit Input char value. Values below 0 and above 0xffff will never match.
     * @return The match/value Result.
     * @stable ICU 4.8
     */
    public Result next(int inUnit) {
        int pos = pos_;
        if (pos < 0) {
            return Result.NO_MATCH;
        }
        int length = remainingMatchLength_; // Actual remaining match length minus 1.
        if (length >= 0) {
            // Remaining part of a linear-match node.
            if (inUnit == chars_.charAt(pos++)) {
                remainingMatchLength_ = --length;
                pos_ = pos;
                int node;
                return (length < 0 && (node = chars_.charAt(pos)) >= kMinValueLead)
                        ? valueResults_[node >> 15]
                        : Result.NO_VALUE;
            } else {
                stop();
                return Result.NO_MATCH;
            }
        }
        return nextImpl(pos, inUnit);
    }

    /**
     * Traverses the trie from the current state for the one or two UTF-16 code units for this input
     * code point.
     *
     * @param cp A Unicode code point 0..0x10ffff.
     * @return The match/value Result.
     * @stable ICU 4.8
     */
    public Result nextForCodePoint(int cp) {
        return cp <= 0xffff
                ? next(cp)
                : (next(UTF16.getLeadSurrogate(cp)).hasNext()
                        ? next(UTF16.getTrailSurrogate(cp))
                        : Result.NO_MATCH);
    }

    /**
     * Traverses the trie from the current state for this string. Equivalent to
     *
     * <pre>
     * Result result=current();
     * for(each c in s)
     *   if(!result.hasNext()) return Result.NO_MATCH;
     *   result=next(c);
     * return result;
     * </pre>
     *
     * @param s Contains a string.
     * @param sIndex The start index of the string in s.
     * @param sLimit The (exclusive) end index of the string in s.
     * @return The match/value Result.
     * @stable ICU 4.8
     */
    public Result next(CharSequence s, int sIndex, int sLimit) {
        if (sIndex >= sLimit) {
            // Empty input.
            return current();
        }
        int pos = pos_;
        if (pos < 0) {
            return Result.NO_MATCH;
        }
        int length = remainingMatchLength_; // Actual remaining match length minus 1.
        for (; ; ) {
            // Fetch the next input unit, if there is one.
            // Continue a linear-match node.
            char inUnit;
            for (; ; ) {
                if (sIndex == sLimit) {
                    remainingMatchLength_ = length;
                    pos_ = pos;
                    int node;
                    return (length < 0 && (node = chars_.charAt(pos)) >= kMinValueLead)
                            ? valueResults_[node >> 15]
                            : Result.NO_VALUE;
                }
                inUnit = s.charAt(sIndex++);
                if (length < 0) {
                    remainingMatchLength_ = length;
                    break;
                }
                if (inUnit != chars_.charAt(pos)) {
                    stop();
                    return Result.NO_MATCH;
                }
                ++pos;
                --length;
            }
            int node = chars_.charAt(pos++);
            for (; ; ) {
                if (node < kMinLinearMatch) {
                    Result result = branchNext(pos, node, inUnit);
                    if (result == Result.NO_MATCH) {
                        return Result.NO_MATCH;
                    }
                    // Fetch the next input unit, if there is one.
                    if (sIndex == sLimit) {
                        return result;
                    }
                    if (result == Result.FINAL_VALUE) {
                        // No further matching units.
                        stop();
                        return Result.NO_MATCH;
                    }
                    inUnit = s.charAt(sIndex++);
                    pos = pos_; // branchNext() advanced pos and wrote it to pos_ .
                    node = chars_.charAt(pos++);
                } else if (node < kMinValueLead) {
                    // Match length+1 units.
                    length = node - kMinLinearMatch; // Actual match length minus 1.
                    if (inUnit != chars_.charAt(pos)) {
                        stop();
                        return Result.NO_MATCH;
                    }
                    ++pos;
                    --length;
                    break;
                } else if ((node & kValueIsFinal) != 0) {
                    // No further matching units.
                    stop();
                    return Result.NO_MATCH;
                } else {
                    // Skip intermediate value.
                    pos = skipNodeValue(pos, node);
                    node &= kNodeTypeMask;
                }
            }
        }
    }

    /**
     * Returns a matching string's value if called immediately after current()/first()/next()
     * returned Result.INTERMEDIATE_VALUE or Result.FINAL_VALUE. getValue() can be called multiple
     * times.
     *
     * <p>Do not call getValue() after Result.NO_MATCH or Result.NO_VALUE!
     *
     * @return The value for the string so far.
     * @stable ICU 4.8
     */
    public int getValue() /*const*/ {
        int pos = pos_;
        int leadUnit = chars_.charAt(pos++);
        assert (leadUnit >= kMinValueLead);
        return (leadUnit & kValueIsFinal) != 0
                ? readValue(chars_, pos, leadUnit & 0x7fff)
                : readNodeValue(chars_, pos, leadUnit);
    }

    /**
     * Determines whether all strings reachable from the current state map to the same value, and if
     * so, returns that value.
     *
     * @return The unique value in bits 32..1 with bit 0 set, if all strings reachable from the
     *     current state map to the same value; otherwise returns 0.
     * @stable ICU 4.8
     */
    public long getUniqueValue() /*const*/ {
        int pos = pos_;
        if (pos < 0) {
            return 0;
        }
        // Skip the rest of a pending linear-match node.
        long uniqueValue = findUniqueValue(chars_, pos + remainingMatchLength_ + 1, 0);
        // Ignore internally used bits 63..33; extend the actual value's sign bit from bit 32.
        return (uniqueValue << 31) >> 31;
    }

    /**
     * Finds each char which continues the string from the current state. That is, each char c for
     * which it would be next(c)!=Result.NO_MATCH now.
     *
     * @param out Each next char is appended to this object. (Only uses the out.append(c) method.)
     * @return The number of chars which continue the string from here.
     * @stable ICU 4.8
     */
    public int getNextChars(Appendable out) /*const*/ {
        int pos = pos_;
        if (pos < 0) {
            return 0;
        }
        if (remainingMatchLength_ >= 0) {
            append(out, chars_.charAt(pos)); // Next unit of a pending linear-match node.
            return 1;
        }
        int node = chars_.charAt(pos++);
        if (node >= kMinValueLead) {
            if ((node & kValueIsFinal) != 0) {
                return 0;
            } else {
                pos = skipNodeValue(pos, node);
                node &= kNodeTypeMask;
            }
        }
        if (node < kMinLinearMatch) {
            if (node == 0) {
                node = chars_.charAt(pos++);
            }
            getNextBranchChars(chars_, pos, ++node, out);
            return node;
        } else {
            // First unit of the linear-match node.
            append(out, chars_.charAt(pos));
            return 1;
        }
    }

    /**
     * Iterates from the current state of this trie.
     *
     * @return A new CharsTrie.Iterator.
     * @stable ICU 4.8
     */
    @Override
    public Iterator iterator() {
        return new Iterator(chars_, pos_, remainingMatchLength_, 0);
    }

    /**
     * Iterates from the current state of this trie.
     *
     * @param maxStringLength If 0, the iterator returns full strings. Otherwise, the iterator
     *     returns strings with this maximum length.
     * @return A new CharsTrie.Iterator.
     * @stable ICU 4.8
     */
    public Iterator iterator(int maxStringLength) {
        return new Iterator(chars_, pos_, remainingMatchLength_, maxStringLength);
    }

    /**
     * Iterates from the root of a char-serialized BytesTrie.
     *
     * @param trieChars CharSequence that contains the serialized trie.
     * @param offset Root offset of the trie in the CharSequence.
     * @param maxStringLength If 0, the iterator returns full strings. Otherwise, the iterator
     *     returns strings with this maximum length.
     * @return A new CharsTrie.Iterator.
     * @stable ICU 4.8
     */
    public static Iterator iterator(CharSequence trieChars, int offset, int maxStringLength) {
        return new Iterator(trieChars, offset, -1, maxStringLength);
    }

    /**
     * Return value type for the Iterator.
     *
     * @stable ICU 4.8
     */
    public static final class Entry {
        /**
         * The string.
         *
         * @stable ICU 4.8
         */
        public CharSequence chars;

        /**
         * The value associated with the string.
         *
         * @stable ICU 4.8
         */
        public int value;

        private Entry() {}
    }

    /**
     * Iterator for all of the (string, value) pairs in a CharsTrie.
     *
     * @stable ICU 4.8
     */
    public static final class Iterator implements java.util.Iterator<Entry> {
        private Iterator(
                CharSequence trieChars, int offset, int remainingMatchLength, int maxStringLength) {
            chars_ = trieChars;
            pos_ = initialPos_ = offset;
            remainingMatchLength_ = initialRemainingMatchLength_ = remainingMatchLength;
            maxLength_ = maxStringLength;
            int length = remainingMatchLength_; // Actual remaining match length minus 1.
            if (length >= 0) {
                // Pending linear-match node, append remaining bytes to str_.
                ++length;
                if (maxLength_ > 0 && length > maxLength_) {
                    length = maxLength_; // This will leave remainingMatchLength>=0 as a signal.
                }
                str_.append(chars_, pos_, pos_ + length);
                pos_ += length;
                remainingMatchLength_ -= length;
            }
        }

        /**
         * Resets this iterator to its initial state.
         *
         * @return this
         * @stable ICU 4.8
         */
        public Iterator reset() {
            pos_ = initialPos_;
            remainingMatchLength_ = initialRemainingMatchLength_;
            skipValue_ = false;
            int length = remainingMatchLength_ + 1; // Remaining match length.
            if (maxLength_ > 0 && length > maxLength_) {
                length = maxLength_;
            }
            str_.setLength(length);
            pos_ += length;
            remainingMatchLength_ -= length;
            stack_.clear();
            return this;
        }

        /**
         * @return true if there are more elements.
         * @stable ICU 4.8
         */
        @Override
        public boolean hasNext() /*const*/ {
            return pos_ >= 0 || !stack_.isEmpty();
        }

        /**
         * Finds the next (string, value) pair if there is one.
         *
         * <p>If the string is truncated to the maximum length and does not have a real value, then
         * the value is set to -1. In this case, this "not a real value" is indistinguishable from a
         * real value of -1.
         *
         * @return An Entry with the string and value of the next element.
         * @throws NoSuchElementException - iteration has no more elements.
         * @stable ICU 4.8
         */
        @Override
        public Entry next() {
            int pos = pos_;
            if (pos < 0) {
                if (stack_.isEmpty()) {
                    throw new NoSuchElementException();
                }
                // Pop the state off the stack and continue with the next outbound edge of
                // the branch node.
                long top = stack_.remove(stack_.size() - 1);
                int length = (int) top;
                pos = (int) (top >> 32);
                str_.setLength(length & 0xffff);
                length >>>= 16;
                if (length > 1) {
                    pos = branchNext(pos, length);
                    if (pos < 0) {
                        return entry_; // Reached a final value.
                    }
                } else {
                    str_.append(chars_.charAt(pos++));
                }
            }
            if (remainingMatchLength_ >= 0) {
                // We only get here if we started in a pending linear-match node
                // with more than maxLength remaining units.
                return truncateAndStop();
            }
            for (; ; ) {
                int node = chars_.charAt(pos++);
                if (node >= kMinValueLead) {
                    if (skipValue_) {
                        pos = skipNodeValue(pos, node);
                        node &= kNodeTypeMask;
                        skipValue_ = false;
                    } else {
                        // Deliver value for the string so far.
                        boolean isFinal = (node & kValueIsFinal) != 0;
                        if (isFinal) {
                            entry_.value = readValue(chars_, pos, node & 0x7fff);
                        } else {
                            entry_.value = readNodeValue(chars_, pos, node);
                        }
                        if (isFinal || (maxLength_ > 0 && str_.length() == maxLength_)) {
                            pos_ = -1;
                        } else {
                            // We cannot skip the value right here because it shares its
                            // lead unit with a match node which we have to evaluate
                            // next time.
                            // Instead, keep pos_ on the node lead unit itself.
                            pos_ = pos - 1;
                            skipValue_ = true;
                        }
                        entry_.chars = str_;
                        return entry_;
                    }
                }
                if (maxLength_ > 0 && str_.length() == maxLength_) {
                    return truncateAndStop();
                }
                if (node < kMinLinearMatch) {
                    if (node == 0) {
                        node = chars_.charAt(pos++);
                    }
                    pos = branchNext(pos, node + 1);
                    if (pos < 0) {
                        return entry_; // Reached a final value.
                    }
                } else {
                    // Linear-match node, append length units to str_.
                    int length = node - kMinLinearMatch + 1;
                    if (maxLength_ > 0 && str_.length() + length > maxLength_) {
                        str_.append(chars_, pos, pos + maxLength_ - str_.length());
                        return truncateAndStop();
                    }
                    str_.append(chars_, pos, pos + length);
                    pos += length;
                }
            }
        }

        /**
         * Iterator.remove() is not supported.
         *
         * @throws UnsupportedOperationException (always)
         * @stable ICU 4.8
         */
        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }

        private Entry truncateAndStop() {
            pos_ = -1;
            // We reset entry_.chars every time we return entry_
            // just because the caller might have modified the Entry.
            entry_.chars = str_;
            entry_.value = -1; // no real value for str
            return entry_;
        }

        private int branchNext(int pos, int length) {
            while (length > kMaxBranchLinearSubNodeLength) {
                ++pos; // ignore the comparison unit
                // Push state for the greater-or-equal edge.
                stack_.add(
                        ((long) skipDelta(chars_, pos) << 32)
                                | ((length - (length >> 1)) << 16)
                                | str_.length());
                // Follow the less-than edge.
                length >>= 1;
                pos = jumpByDelta(chars_, pos);
            }
            // List of key-value pairs where values are either final values or jump deltas.
            // Read the first (key, value) pair.
            char trieUnit = chars_.charAt(pos++);
            int node = chars_.charAt(pos++);
            boolean isFinal = (node & kValueIsFinal) != 0;
            int value = readValue(chars_, pos, node &= 0x7fff);
            pos = skipValue(pos, node);
            stack_.add(((long) pos << 32) | ((length - 1) << 16) | str_.length());
            str_.append(trieUnit);
            if (isFinal) {
                pos_ = -1;
                entry_.chars = str_;
                entry_.value = value;
                return -1;
            } else {
                return pos + value;
            }
        }

        private CharSequence chars_;
        private int pos_;
        private int initialPos_;
        private int remainingMatchLength_;
        private int initialRemainingMatchLength_;
        private boolean skipValue_; // Skip intermediate value which was already delivered.

        private StringBuilder str_ = new StringBuilder();
        private int maxLength_;
        private Entry entry_ = new Entry();

        // The stack stores longs for backtracking to another
        // outbound edge of a branch node.
        // Each long has the offset in chars_ in bits 62..32,
        // the str_.length() from before the node in bits 15..0,
        // and the remaining branch length in bits 31..16.
        // (We could store the remaining branch length minus 1 in bits 30..16 and not use bit 31,
        // but the code looks more confusing that way.)
        private ArrayList<Long> stack_ = new ArrayList<>();
    }

    private void stop() {
        pos_ = -1;
    }

    // Reads a compact 32-bit integer.
    // pos is already after the leadUnit, and the lead unit has bit 15 reset.
    private static int readValue(CharSequence chars, int pos, int leadUnit) {
        int value;
        if (leadUnit < kMinTwoUnitValueLead) {
            value = leadUnit;
        } else if (leadUnit < kThreeUnitValueLead) {
            value = ((leadUnit - kMinTwoUnitValueLead) << 16) | chars.charAt(pos);
        } else {
            value = (chars.charAt(pos) << 16) | chars.charAt(pos + 1);
        }
        return value;
    }

    private static int skipValue(int pos, int leadUnit) {
        if (leadUnit >= kMinTwoUnitValueLead) {
            if (leadUnit < kThreeUnitValueLead) {
                ++pos;
            } else {
                pos += 2;
            }
        }
        return pos;
    }

    private static int skipValue(CharSequence chars, int pos) {
        int leadUnit = chars.charAt(pos++);
        return skipValue(pos, leadUnit & 0x7fff);
    }

    private static int readNodeValue(CharSequence chars, int pos, int leadUnit) {
        assert (kMinValueLead <= leadUnit && leadUnit < kValueIsFinal);
        int value;
        if (leadUnit < kMinTwoUnitNodeValueLead) {
            value = (leadUnit >> 6) - 1;
        } else if (leadUnit < kThreeUnitNodeValueLead) {
            value = (((leadUnit & 0x7fc0) - kMinTwoUnitNodeValueLead) << 10) | chars.charAt(pos);
        } else {
            value = (chars.charAt(pos) << 16) | chars.charAt(pos + 1);
        }
        return value;
    }

    private static int skipNodeValue(int pos, int leadUnit) {
        assert (kMinValueLead <= leadUnit && leadUnit < kValueIsFinal);
        if (leadUnit >= kMinTwoUnitNodeValueLead) {
            if (leadUnit < kThreeUnitNodeValueLead) {
                ++pos;
            } else {
                pos += 2;
            }
        }
        return pos;
    }

    private static int jumpByDelta(CharSequence chars, int pos) {
        int delta = chars.charAt(pos++);
        if (delta >= kMinTwoUnitDeltaLead) {
            if (delta == kThreeUnitDeltaLead) {
                delta = (chars.charAt(pos) << 16) | chars.charAt(pos + 1);
                pos += 2;
            } else {
                delta = ((delta - kMinTwoUnitDeltaLead) << 16) | chars.charAt(pos++);
            }
        }
        return pos + delta;
    }

    private static int skipDelta(CharSequence chars, int pos) {
        int delta = chars.charAt(pos++);
        if (delta >= kMinTwoUnitDeltaLead) {
            if (delta == kThreeUnitDeltaLead) {
                pos += 2;
            } else {
                ++pos;
            }
        }
        return pos;
    }

    private static Result[] valueResults_ = {Result.INTERMEDIATE_VALUE, Result.FINAL_VALUE};

    // Handles a branch node for both next(unit) and next(string).
    private Result branchNext(int pos, int length, int inUnit) {
        // Branch according to the current unit.
        if (length == 0) {
            length = chars_.charAt(pos++);
        }
        ++length;
        // The length of the branch is the number of units to select from.
        // The data structure encodes a binary search.
        while (length > kMaxBranchLinearSubNodeLength) {
            if (inUnit < chars_.charAt(pos++)) {
                length >>= 1;
                pos = jumpByDelta(chars_, pos);
            } else {
                length = length - (length >> 1);
                pos = skipDelta(chars_, pos);
            }
        }
        // Drop down to linear search for the last few units.
        // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
        // and divides length by 2.
        do {
            if (inUnit == chars_.charAt(pos++)) {
                Result result;
                int node = chars_.charAt(pos);
                if ((node & kValueIsFinal) != 0) {
                    // Leave the final value for getValue() to read.
                    result = Result.FINAL_VALUE;
                } else {
                    // Use the non-final value as the jump delta.
                    ++pos;
                    // int delta=readValue(pos, node);
                    int delta;
                    if (node < kMinTwoUnitValueLead) {
                        delta = node;
                    } else if (node < kThreeUnitValueLead) {
                        delta = ((node - kMinTwoUnitValueLead) << 16) | chars_.charAt(pos++);
                    } else {
                        delta = (chars_.charAt(pos) << 16) | chars_.charAt(pos + 1);
                        pos += 2;
                    }
                    // end readValue()
                    pos += delta;
                    node = chars_.charAt(pos);
                    result = node >= kMinValueLead ? valueResults_[node >> 15] : Result.NO_VALUE;
                }
                pos_ = pos;
                return result;
            }
            --length;
            pos = skipValue(chars_, pos);
        } while (length > 1);
        if (inUnit == chars_.charAt(pos++)) {
            pos_ = pos;
            int node = chars_.charAt(pos);
            return node >= kMinValueLead ? valueResults_[node >> 15] : Result.NO_VALUE;
        } else {
            stop();
            return Result.NO_MATCH;
        }
    }

    // Requires remainingLength_<0.
    private Result nextImpl(int pos, int inUnit) {
        int node = chars_.charAt(pos++);
        for (; ; ) {
            if (node < kMinLinearMatch) {
                return branchNext(pos, node, inUnit);
            } else if (node < kMinValueLead) {
                // Match the first of length+1 units.
                int length = node - kMinLinearMatch; // Actual match length minus 1.
                if (inUnit == chars_.charAt(pos++)) {
                    remainingMatchLength_ = --length;
                    pos_ = pos;
                    return (length < 0 && (node = chars_.charAt(pos)) >= kMinValueLead)
                            ? valueResults_[node >> 15]
                            : Result.NO_VALUE;
                } else {
                    // No match.
                    break;
                }
            } else if ((node & kValueIsFinal) != 0) {
                // No further matching units.
                break;
            } else {
                // Skip intermediate value.
                pos = skipNodeValue(pos, node);
                node &= kNodeTypeMask;
            }
        }
        stop();
        return Result.NO_MATCH;
    }

    // Helper functions for getUniqueValue().
    // Recursively finds a unique value (or whether there is not a unique one)
    // from a branch.
    // uniqueValue: On input, same as for getUniqueValue()/findUniqueValue().
    // On return, if not 0, then bits 63..33 contain the updated non-negative pos.
    private static long findUniqueValueFromBranch(
            CharSequence chars, int pos, int length, long uniqueValue) {
        while (length > kMaxBranchLinearSubNodeLength) {
            ++pos; // ignore the comparison unit
            uniqueValue =
                    findUniqueValueFromBranch(
                            chars, jumpByDelta(chars, pos), length >> 1, uniqueValue);
            if (uniqueValue == 0) {
                return 0;
            }
            length = length - (length >> 1);
            pos = skipDelta(chars, pos);
        }
        do {
            ++pos; // ignore a comparison unit
            // handle its value
            int node = chars.charAt(pos++);
            boolean isFinal = (node & kValueIsFinal) != 0;
            node &= 0x7fff;
            int value = readValue(chars, pos, node);
            pos = skipValue(pos, node);
            if (isFinal) {
                if (uniqueValue != 0) {
                    if (value != (int) (uniqueValue >> 1)) {
                        return 0;
                    }
                } else {
                    uniqueValue = ((long) value << 1) | 1;
                }
            } else {
                uniqueValue = findUniqueValue(chars, pos + value, uniqueValue);
                if (uniqueValue == 0) {
                    return 0;
                }
            }
        } while (--length > 1);
        // ignore the last comparison byte
        return ((long) (pos + 1) << 33) | (uniqueValue & 0x1ffffffffL);
    }

    // Recursively finds a unique value (or whether there is not a unique one)
    // starting from a position on a node lead unit.
    // uniqueValue: If there is one, then bits 32..1 contain the value and bit 0 is set.
    // Otherwise, uniqueValue is 0. Bits 63..33 are ignored.
    private static long findUniqueValue(CharSequence chars, int pos, long uniqueValue) {
        int node = chars.charAt(pos++);
        for (; ; ) {
            if (node < kMinLinearMatch) {
                if (node == 0) {
                    node = chars.charAt(pos++);
                }
                uniqueValue = findUniqueValueFromBranch(chars, pos, node + 1, uniqueValue);
                if (uniqueValue == 0) {
                    return 0;
                }
                pos = (int) (uniqueValue >>> 33);
                node = chars.charAt(pos++);
            } else if (node < kMinValueLead) {
                // linear-match node
                pos += node - kMinLinearMatch + 1; // Ignore the match units.
                node = chars.charAt(pos++);
            } else {
                boolean isFinal = (node & kValueIsFinal) != 0;
                int value;
                if (isFinal) {
                    value = readValue(chars, pos, node & 0x7fff);
                } else {
                    value = readNodeValue(chars, pos, node);
                }
                if (uniqueValue != 0) {
                    if (value != (int) (uniqueValue >> 1)) {
                        return 0;
                    }
                } else {
                    uniqueValue = ((long) value << 1) | 1;
                }
                if (isFinal) {
                    return uniqueValue;
                }
                pos = skipNodeValue(pos, node);
                node &= kNodeTypeMask;
            }
        }
    }

    // Helper functions for getNextChars().
    // getNextChars() when pos is on a branch node.
    private static void getNextBranchChars(
            CharSequence chars, int pos, int length, Appendable out) {
        while (length > kMaxBranchLinearSubNodeLength) {
            ++pos; // ignore the comparison unit
            getNextBranchChars(chars, jumpByDelta(chars, pos), length >> 1, out);
            length = length - (length >> 1);
            pos = skipDelta(chars, pos);
        }
        do {
            append(out, chars.charAt(pos++));
            pos = skipValue(chars, pos);
        } while (--length > 1);
        append(out, chars.charAt(pos));
    }

    private static void append(Appendable out, int c) {
        try {
            out.append((char) c);
        } catch (IOException e) {
            throw new ICUUncheckedIOException(e);
        }
    }

    // CharsTrie data structure
    //
    // The trie consists of a series of char-serialized nodes for incremental
    // Unicode string/char sequence matching. (char=16-bit unsigned integer)
    // The root node is at the beginning of the trie data.
    //
    // Types of nodes are distinguished by their node lead unit ranges.
    // After each node, except a final-value node, another node follows to
    // encode match values or continue matching further units.
    //
    // Node types:
    //  - Final-value node: Stores a 32-bit integer in a compact, variable-length format.
    //    The value is for the string/char sequence so far.
    //  - Match node, optionally with an intermediate value in a different compact format.
    //    The value, if present, is for the string/char sequence so far.
    //
    //  Aside from the value, which uses the node lead unit's high bits:
    //
    //  - Linear-match node: Matches a number of units.
    //  - Branch node: Branches to other nodes according to the current input unit.
    //    The node unit is the length of the branch (number of units to select from)
    //    minus 1. It is followed by a sub-node:
    //    - If the length is at most kMaxBranchLinearSubNodeLength, then
    //      there are length-1 (key, value) pairs and then one more comparison unit.
    //      If one of the key units matches, then the value is either a final value for
    //      the string so far, or a "jump" delta to the next node.
    //      If the last unit matches, then matching continues with the next node.
    //      (Values have the same encoding as final-value nodes.)
    //    - If the length is greater than kMaxBranchLinearSubNodeLength, then
    //      there is one unit and one "jump" delta.
    //      If the input unit is less than the sub-node unit, then "jump" by delta to
    //      the next sub-node which will have a length of length/2.
    //      (The delta has its own compact encoding.)
    //      Otherwise, skip the "jump" delta to the next sub-node
    //      which will have a length of length-length/2.

    // Match-node lead unit values, after masking off intermediate-value bits:

    // 0000..002f: Branch node. If node!=0 then the length is node+1, otherwise
    // the length is one more than the next unit.

    // For a branch sub-node with at most this many entries, we drop down
    // to a linear search.
    /*package*/ static final int kMaxBranchLinearSubNodeLength = 5;

    // 0030..003f: Linear-match node, match 1..16 units and continue reading the next node.
    /*package*/ static final int kMinLinearMatch = 0x30;
    /*package*/ static final int kMaxLinearMatchLength = 0x10;

    // Match-node lead unit bits 14..6 for the optional intermediate value.
    // If these bits are 0, then there is no intermediate value.
    // Otherwise, see the *NodeValue* constants below.
    /*package*/ static final int kMinValueLead = kMinLinearMatch + kMaxLinearMatchLength; // 0x0040
    /*package*/ static final int kNodeTypeMask = kMinValueLead - 1; // 0x003f

    // A final-value node has bit 15 set.
    /*package*/ static final int kValueIsFinal = 0x8000;

    // Compact value: After testing and masking off bit 15, use the following thresholds.
    /*package*/ static final int kMaxOneUnitValue = 0x3fff;

    /*package*/ static final int kMinTwoUnitValueLead = kMaxOneUnitValue + 1; // 0x4000
    /*package*/ static final int kThreeUnitValueLead = 0x7fff;

    /*package*/ static final int kMaxTwoUnitValue =
            ((kThreeUnitValueLead - kMinTwoUnitValueLead) << 16) - 1; // 0x3ffeffff

    // Compact intermediate-value integer, lead unit shared with a branch or linear-match node.
    /*package*/ static final int kMaxOneUnitNodeValue = 0xff;
    /*package*/ static final int kMinTwoUnitNodeValueLead =
            kMinValueLead + ((kMaxOneUnitNodeValue + 1) << 6); // 0x4040
    /*package*/ static final int kThreeUnitNodeValueLead = 0x7fc0;

    /*package*/ static final int kMaxTwoUnitNodeValue =
            ((kThreeUnitNodeValueLead - kMinTwoUnitNodeValueLead) << 10) - 1; // 0xfdffff

    // Compact delta integers.
    /*package*/ static final int kMaxOneUnitDelta = 0xfbff;
    /*package*/ static final int kMinTwoUnitDeltaLead = kMaxOneUnitDelta + 1; // 0xfc00
    /*package*/ static final int kThreeUnitDeltaLead = 0xffff;

    /*package*/ static final int kMaxTwoUnitDelta =
            ((kThreeUnitDeltaLead - kMinTwoUnitDeltaLead) << 16) - 1; // 0x03feffff

    // Fixed value referencing the CharsTrie words.
    private CharSequence chars_;
    private int root_;

    // Iterator variables.

    // Pointer to next trie unit to read. NULL if no more matches.
    private int pos_;
    // Remaining length of a linear-match node, minus 1. Negative if not in such a node.
    private int remainingMatchLength_;
}