ComposedCharIter.java

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 *******************************************************************************
 * Copyright (C) 1996-2014, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */
package com.ibm.icu.text;

import com.ibm.icu.impl.Norm2AllModes;
import com.ibm.icu.impl.Normalizer2Impl;

/**
 * This class has been deprecated since ICU 2.2. One problem is that this class is not designed to
 * return supplementary characters. Use the Normalizer2 and UCharacter classes instead.
 *
 * <p>{@code ComposedCharIter} is an iterator class that returns all of the precomposed characters
 * defined in the Unicode standard, along with their decomposed forms. This is often useful when
 * building data tables (<i>e.g.</i> collation tables) which need to treat composed and decomposed
 * characters equivalently.
 *
 * <p>For example, imagine that you have built a collation table with ordering rules for the {@link
 * Normalizer#DECOMP canonically decomposed} forms of all characters used in a particular language.
 * When you process input text using this table, the text must first be decomposed so that it
 * matches the form used in the table. This can impose a performance penalty that may be
 * unacceptable in some situations.
 *
 * <p>You can avoid this problem by ensuring that the collation table contains rules for both the
 * decomposed <i>and</i> composed versions of each character. To do so, use a {@code
 * ComposedCharIter} to iterate through all of the composed characters in Unicode. If the
 * decomposition for that character consists solely of characters that are listed in your ruleset,
 * you can add a new rule for the composed character that makes it equivalent to its decomposition
 * sequence.
 *
 * <p>Note that {@code ComposedCharIter} iterates over a <em>static</em> table of the composed
 * characters in Unicode. If you want to iterate over the composed characters in a particular
 * string, use {@link Normalizer} instead.
 *
 * <p>When constructing a {@code ComposedCharIter} there is one optional feature that you can enable
 * or disable:
 *
 * <ul>
 *   <li>{@link Normalizer#IGNORE_HANGUL} - Do not iterate over the Hangul characters and their
 *       corresponding Jamo decompositions. This option is off by default (<i>i.e.</i> Hangul
 *       processing is enabled) since the Unicode standard specifies that Hangul to Jamo is a
 *       canonical decomposition.
 * </ul>
 *
 * <p>{@code ComposedCharIter} is currently based on version 2.1.8 of the <a
 * href="http://www.unicode.org" target="unicode">Unicode Standard</a>. It will be updated as later
 * versions of Unicode are released.
 *
 * @deprecated ICU 2.2
 */
@Deprecated
public final class ComposedCharIter {
    /**
     * Constant that indicates the iteration has completed. {@link #next} returns this value when
     * there are no more composed characters over which to iterate.
     *
     * @deprecated ICU 2.2
     */
    @Deprecated public static final char DONE = (char) Normalizer.DONE;

    /**
     * Construct a new {@code ComposedCharIter}. The iterator will return all Unicode characters
     * with canonical decompositions, including Korean Hangul characters.
     *
     * @deprecated ICU 2.2
     */
    @Deprecated
    public ComposedCharIter() {
        this(false, 0);
    }

    /**
     * Constructs a non-default {@code ComposedCharIter} with optional behavior.
     *
     * <p>
     *
     * @param compat {@code false} for canonical decompositions only; {@code true} for both
     *     canonical and compatibility decompositions.
     * @param options Optional decomposition features. None are supported, so this is ignored.
     * @deprecated ICU 2.2
     */
    @Deprecated
    public ComposedCharIter(boolean compat, int options) {
        if (compat) {
            n2impl = Norm2AllModes.getNFKCInstance().impl;
        } else {
            n2impl = Norm2AllModes.getNFCInstance().impl;
        }
    }

    /**
     * Determines whether there any precomposed Unicode characters not yet returned by {@link
     * #next}.
     *
     * @deprecated ICU 2.2
     */
    @Deprecated
    public boolean hasNext() {
        if (nextChar == Normalizer.DONE) {
            findNextChar();
        }
        return nextChar != Normalizer.DONE;
    }

    /**
     * Returns the next precomposed Unicode character. Repeated calls to {@code next} return all of
     * the precomposed characters defined by Unicode, in ascending order. After all precomposed
     * characters have been returned, {@link #hasNext} will return {@code false} and further calls
     * to {@code next} will return {@link #DONE}.
     *
     * @deprecated ICU 2.2
     */
    @Deprecated
    public char next() {
        if (nextChar == Normalizer.DONE) {
            findNextChar();
        }
        curChar = nextChar;
        nextChar = Normalizer.DONE;
        return (char) curChar;
    }

    /**
     * Returns the Unicode decomposition of the current character. This method returns the
     * decomposition of the precomposed character most recently returned by {@link #next}. The
     * resulting decomposition is affected by the settings of the options passed to the constructor.
     *
     * @deprecated ICU 2.2
     */
    @Deprecated
    public String decomposition() {
        // the decomposition buffer contains the decomposition of
        // current char so just return it
        if (decompBuf != null) {
            return decompBuf;
        } else {
            return "";
        }
    }

    private void findNextChar() {
        int c = curChar + 1;
        decompBuf = null;
        for (; ; ) {
            if (c < 0xFFFF) {
                decompBuf = n2impl.getDecomposition(c);
                if (decompBuf != null) {
                    // the curChar can be decomposed... so it is a composed char
                    // cache the result
                    break;
                }
                c++;
            } else {
                c = Normalizer.DONE;
                break;
            }
        }
        nextChar = c;
    }

    private final Normalizer2Impl n2impl;
    private String decompBuf;
    private int curChar = 0;
    private int nextChar = Normalizer.DONE;
}