BoundaryIteratorOfInts.java

// © 2025 and later: Unicode, Inc. and others.
// License & terms of use: https://www.unicode.org/copyright.html

package com.ibm.icu.segmenter;

import com.ibm.icu.segmenter.Segments.IterationDirection;
import com.ibm.icu.text.BreakIterator;

/**
 * An iterator of segmentation boundaries that can operate in either the forwards or reverse
 * direction.
 *
 * <p>When constructed to operate in the forwards direction, the iterator will return all boundaries
 * that are strictly after the input index value provided to the constructor. However, when
 * constructed to operate in the backwards direction, if the input index is already a segmentation
 * boundary, then it will be included as the first value that the iterator returns as it iterates
 * backwards.
 */
class BoundaryIteratorOfInts {
    private BreakIterator breakIter;
    private IterationDirection direction;
    private int currIdx;

    BoundaryIteratorOfInts(
            BreakIterator breakIter,
            CharSequence sourceSequence,
            IterationDirection direction,
            int startIdx) {
        this.breakIter = breakIter;
        this.direction = direction;

        if (direction == IterationDirection.FORWARDS) {
            currIdx = breakIter.following(startIdx);
        } else {
            assert direction == IterationDirection.BACKWARDS;

            // When iterating backwards over boundaries, adjust the initial index to be the boundary
            // that is either startIdx or else the one right before startIdx.
            //
            // Note: we have to set the initial index indirectly because there is no way to
            // statelessly
            // query whether an index is on a boundary. Instead, BreakIterator.isBoundary() will
            // mutate
            // state when the input is not on a boundary, before it returns the value indicating a
            // boundary.
            int sourceLength = sourceSequence.length();
            if (startIdx == 0) {
                currIdx = breakIter.first();
            } else if (startIdx == sourceLength) {
                currIdx = breakIter.last();
            } else {
                boolean isOnBoundary =
                        0 <= startIdx && startIdx <= sourceLength && breakIter.isBoundary(startIdx);

                // The previous call to BreakIterator.isBoundary(startIdx) will have advanced
                // breakIter's
                // current position forwards to the next boundary if the argument, startIdx, is not
                // a
                // boundary. Therefore, in that case, we have to move back to the previous boundary.
                //
                // BreakIterator.isBoundary(startIdx) should have cached the surrounding 2
                // boundaries in the
                // BreakIterator, which means that BreakIterator.preceding(startIdx) shouldn't cost
                // significant extra time.
                //
                // BreakIterator.preceding(startIdx) is used in initialization instead of a simple
                // call to
                // BreakIterator.previous() since BreakIterator.preceding() can accept arguments
                // larger than
                // the last boundary and return the last boundary, whereas .previous() would return
                // DONE.
                // Thus, .preceding() provides symmetrical behavior to .following(), which we use in
                // the
                // forwards direction.
                currIdx = isOnBoundary ? startIdx : breakIter.preceding(startIdx);
            }
        }
    }

    public boolean hasNext() {
        return currIdx != BreakIterator.DONE;
    }

    public Integer next() {
        int result = currIdx;

        if (direction == IterationDirection.FORWARDS) {
            currIdx = breakIter.next();
        } else {
            assert direction == IterationDirection.BACKWARDS;
            currIdx = breakIter.previous();
        }

        return result;
    }
}