CaseMap.java

// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package com.ibm.icu.text;

import com.ibm.icu.impl.CaseMapImpl;
import com.ibm.icu.impl.UCaseProps;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.util.ULocale;
import java.util.Locale;

/**
 * Low-level case mapping options and methods. Immutable. "Setters" return instances with the union
 * of the current and new options set.
 *
 * <p>This class is not intended for public subclassing.
 *
 * @stable ICU 59
 */
public abstract class CaseMap {
    /**
     * @internal
     * @deprecated This API is ICU internal only.
     */
    @Deprecated protected int internalOptions;

    private CaseMap(int opt) {
        internalOptions = opt;
    }

    private static int getCaseLocale(Locale locale) {
        if (locale == null) {
            locale = Locale.getDefault();
        }
        return UCaseProps.getCaseLocale(locale);
    }

    /**
     * @return Lowercasing object with default options.
     * @stable ICU 59
     */
    public static Lower toLower() {
        return Lower.DEFAULT;
    }

    /**
     * @return Uppercasing object with default options.
     * @stable ICU 59
     */
    public static Upper toUpper() {
        return Upper.DEFAULT;
    }

    /**
     * @return Titlecasing object with default options.
     * @stable ICU 59
     */
    public static Title toTitle() {
        return Title.DEFAULT;
    }

    /**
     * @return Case folding object with default options.
     * @stable ICU 59
     */
    public static Fold fold() {
        return Fold.DEFAULT;
    }

    /**
     * Returns an instance that behaves like this one but omits unchanged text when case-mapping
     * with {@link Edits}.
     *
     * @return an options object with this option.
     * @stable ICU 59
     */
    public abstract CaseMap omitUnchangedText();

    /**
     * Lowercasing options and methods. Immutable.
     *
     * @see #toLower()
     * @stable ICU 59
     */
    public static final class Lower extends CaseMap {
        private static final Lower DEFAULT = new Lower(0);
        private static final Lower OMIT_UNCHANGED = new Lower(CaseMapImpl.OMIT_UNCHANGED_TEXT);

        private Lower(int opt) {
            super(opt);
        }

        /**
         * {@inheritDoc}
         *
         * @stable ICU 59
         */
        @Override
        public Lower omitUnchangedText() {
            return OMIT_UNCHANGED;
        }

        /**
         * Lowercases a string. Casing is locale-dependent and context-sensitive. The result may be
         * longer or shorter than the original.
         *
         * @param locale The locale ID. Can be null for {@link Locale#getDefault}. (See {@link
         *     ULocale#toLocale}.)
         * @param src The original string.
         * @return the result string.
         * @see UCharacter#toLowerCase(Locale, String)
         * @stable ICU 60
         */
        public String apply(Locale locale, CharSequence src) {
            return CaseMapImpl.toLower(getCaseLocale(locale), internalOptions, src);
        }

        /**
         * Lowercases a string and optionally records edits (see {@link #omitUnchangedText}). Casing
         * is locale-dependent and context-sensitive. The result may be longer or shorter than the
         * original.
         *
         * @param locale The locale ID. Can be null for {@link Locale#getDefault}. (See {@link
         *     ULocale#toLocale}.)
         * @param src The original string.
         * @param dest A buffer for the result string. Must not be null.
         * @param edits Records edits for index mapping, working with styled text, and getting only
         *     changes (if any). This function calls edits.reset() first. edits can be null.
         * @return dest with the result string (or only changes) appended.
         * @see UCharacter#toLowerCase(Locale, String)
         * @stable ICU 59
         */
        public <A extends Appendable> A apply(
                Locale locale, CharSequence src, A dest, Edits edits) {
            return CaseMapImpl.toLower(getCaseLocale(locale), internalOptions, src, dest, edits);
        }
    }

    /**
     * Uppercasing options and methods. Immutable.
     *
     * @see #toUpper()
     * @stable ICU 59
     */
    public static final class Upper extends CaseMap {
        private static final Upper DEFAULT = new Upper(0);
        private static final Upper OMIT_UNCHANGED = new Upper(CaseMapImpl.OMIT_UNCHANGED_TEXT);

        private Upper(int opt) {
            super(opt);
        }

        /**
         * {@inheritDoc}
         *
         * @stable ICU 59
         */
        @Override
        public Upper omitUnchangedText() {
            return OMIT_UNCHANGED;
        }

        /**
         * Uppercases a string. Casing is locale-dependent and context-sensitive. The result may be
         * longer or shorter than the original.
         *
         * @param locale The locale ID. Can be null for {@link Locale#getDefault}. (See {@link
         *     ULocale#toLocale}.)
         * @param src The original string.
         * @return the result string.
         * @see UCharacter#toUpperCase(Locale, String)
         * @stable ICU 60
         */
        public String apply(Locale locale, CharSequence src) {
            return CaseMapImpl.toUpper(getCaseLocale(locale), internalOptions, src);
        }

        /**
         * Uppercases a string and optionally records edits (see {@link #omitUnchangedText}). Casing
         * is locale-dependent and context-sensitive. The result may be longer or shorter than the
         * original.
         *
         * @param locale The locale ID. Can be null for {@link Locale#getDefault}. (See {@link
         *     ULocale#toLocale}.)
         * @param src The original string.
         * @param dest A buffer for the result string. Must not be null.
         * @param edits Records edits for index mapping, working with styled text, and getting only
         *     changes (if any). This function calls edits.reset() first. edits can be null.
         * @return dest with the result string (or only changes) appended.
         * @see UCharacter#toUpperCase(Locale, String)
         * @stable ICU 59
         */
        public <A extends Appendable> A apply(
                Locale locale, CharSequence src, A dest, Edits edits) {
            return CaseMapImpl.toUpper(getCaseLocale(locale), internalOptions, src, dest, edits);
        }
    }

    /**
     * Titlecasing options and methods. Immutable.
     *
     * @see #toTitle()
     * @stable ICU 59
     */
    public static final class Title extends CaseMap {
        private static final Title DEFAULT = new Title(0);
        private static final Title OMIT_UNCHANGED = new Title(CaseMapImpl.OMIT_UNCHANGED_TEXT);

        private Title(int opt) {
            super(opt);
        }

        /**
         * Returns an instance that behaves like this one but titlecases the string as a whole
         * rather than each word. (Titlecases only the character at index 0, possibly adjusted.)
         *
         * <p>It is an error to specify multiple titlecasing iterator options together, including
         * both an option and an explicit BreakIterator.
         *
         * @return an options object with this option.
         * @see #adjustToCased()
         * @stable ICU 60
         */
        public Title wholeString() {
            return new Title(
                    CaseMapImpl.addTitleIteratorOption(
                            internalOptions, CaseMapImpl.TITLECASE_WHOLE_STRING));
        }

        /**
         * Returns an instance that behaves like this one but titlecases sentences rather than
         * words. (Titlecases only the first character of each sentence, possibly adjusted.)
         *
         * <p>It is an error to specify multiple titlecasing iterator options together, including
         * both an option and an explicit BreakIterator.
         *
         * @return an options object with this option.
         * @see #adjustToCased()
         * @stable ICU 60
         */
        public Title sentences() {
            return new Title(
                    CaseMapImpl.addTitleIteratorOption(
                            internalOptions, CaseMapImpl.TITLECASE_SENTENCES));
        }

        /**
         * {@inheritDoc}
         *
         * @stable ICU 59
         */
        @Override
        public Title omitUnchangedText() {
            if (internalOptions == 0 || internalOptions == CaseMapImpl.OMIT_UNCHANGED_TEXT) {
                return OMIT_UNCHANGED;
            }
            return new Title(internalOptions | CaseMapImpl.OMIT_UNCHANGED_TEXT);
        }

        /**
         * Returns an instance that behaves like this one but does not lowercase non-initial parts
         * of words when titlecasing.
         *
         * <p>By default, titlecasing will titlecase the character at each (possibly adjusted)
         * BreakIterator index and lowercase all other characters up to the next iterator index.
         * With this option, the other characters will not be modified.
         *
         * @return an options object with this option.
         * @see UCharacter#TITLECASE_NO_LOWERCASE
         * @see #adjustToCased()
         * @stable ICU 59
         */
        public Title noLowercase() {
            return new Title(internalOptions | UCharacter.TITLECASE_NO_LOWERCASE);
        }

        /**
         * Returns an instance that behaves like this one but does not adjust the titlecasing
         * BreakIterator indexes; titlecases exactly the characters at breaks from the iterator.
         *
         * <p>By default, titlecasing will take each break iterator index, adjust it to the next
         * relevant character (see {@link #adjustToCased()}), and titlecase that one.
         *
         * <p>Other characters are lowercased.
         *
         * @return an options object with this option.
         * @see UCharacter#TITLECASE_NO_BREAK_ADJUSTMENT
         * @stable ICU 59
         */
        public Title noBreakAdjustment() {
            return new Title(
                    CaseMapImpl.addTitleAdjustmentOption(
                            internalOptions, UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT));
        }

        /**
         * Returns an instance that behaves like this one but adjusts each titlecasing BreakIterator
         * index to the next cased character. (See the Unicode Standard, chapter 3, Default Case
         * Conversion, R3 toTitlecase(X).)
         *
         * <p>This used to be the default index adjustment in ICU. Since ICU 60, the default index
         * adjustment is to the next character that is a letter, number, symbol, or private use code
         * point. (Uncased modifier letters are skipped.) The difference in behavior is small for
         * word titlecasing, but the new adjustment is much better for whole-string and sentence
         * titlecasing: It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»".
         *
         * <p>It is an error to specify multiple titlecasing adjustment options together.
         *
         * @return an options object with this option.
         * @see #noBreakAdjustment()
         * @stable ICU 60
         */
        public Title adjustToCased() {
            return new Title(
                    CaseMapImpl.addTitleAdjustmentOption(
                            internalOptions, CaseMapImpl.TITLECASE_ADJUST_TO_CASED));
        }

        /**
         * Titlecases a string. Casing is locale-dependent and context-sensitive. The result may be
         * longer or shorter than the original.
         *
         * <p>Titlecasing uses a break iterator to find the first characters of words that are to be
         * titlecased. It titlecases those characters and lowercases all others. (This can be
         * modified with options bits.)
         *
         * @param locale The locale ID. Can be null for {@link Locale#getDefault}. (See {@link
         *     ULocale#toLocale}.)
         * @param iter A break iterator to find the first characters of words that are to be
         *     titlecased. It is set to the source string (setText()) and used one or more times for
         *     iteration (first() and next()). If null, then a word break iterator for the locale is
         *     used (or something equivalent).
         * @param src The original string.
         * @return the result string.
         * @see UCharacter#toUpperCase(Locale, String)
         * @stable ICU 60
         */
        public String apply(Locale locale, BreakIterator iter, CharSequence src) {
            if (iter == null && locale == null) {
                locale = Locale.getDefault();
            }
            iter = CaseMapImpl.getTitleBreakIterator(locale, internalOptions, iter);
            iter.setText(src);
            return CaseMapImpl.toTitle(getCaseLocale(locale), internalOptions, iter, src);
        }

        /**
         * Titlecases a string and optionally records edits (see {@link #omitUnchangedText}). Casing
         * is locale-dependent and context-sensitive. The result may be longer or shorter than the
         * original.
         *
         * <p>Titlecasing uses a break iterator to find the first characters of words that are to be
         * titlecased. It titlecases those characters and lowercases all others. (This can be
         * modified with options bits.)
         *
         * @param locale The locale ID. Can be null for {@link Locale#getDefault}. (See {@link
         *     ULocale#toLocale}.)
         * @param iter A break iterator to find the first characters of words that are to be
         *     titlecased. It is set to the source string (setText()) and used one or more times for
         *     iteration (first() and next()). If null, then a word break iterator for the locale is
         *     used (or something equivalent).
         * @param src The original string.
         * @param dest A buffer for the result string. Must not be null.
         * @param edits Records edits for index mapping, working with styled text, and getting only
         *     changes (if any). This function calls edits.reset() first. edits can be null.
         * @return dest with the result string (or only changes) appended.
         * @see UCharacter#toTitleCase(Locale, String, BreakIterator, int)
         * @stable ICU 59
         */
        public <A extends Appendable> A apply(
                Locale locale, BreakIterator iter, CharSequence src, A dest, Edits edits) {
            if (iter == null && locale == null) {
                locale = Locale.getDefault();
            }
            iter = CaseMapImpl.getTitleBreakIterator(locale, internalOptions, iter);
            iter.setText(src);
            return CaseMapImpl.toTitle(
                    getCaseLocale(locale), internalOptions, iter, src, dest, edits);
        }
    }

    /**
     * Case folding options and methods. Immutable.
     *
     * @see #fold()
     * @stable ICU 59
     */
    public static final class Fold extends CaseMap {
        private static final Fold DEFAULT = new Fold(0);
        private static final Fold TURKIC = new Fold(UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I);
        private static final Fold OMIT_UNCHANGED = new Fold(CaseMapImpl.OMIT_UNCHANGED_TEXT);
        private static final Fold TURKIC_OMIT_UNCHANGED =
                new Fold(UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I | CaseMapImpl.OMIT_UNCHANGED_TEXT);

        private Fold(int opt) {
            super(opt);
        }

        /**
         * {@inheritDoc}
         *
         * @stable ICU 59
         */
        @Override
        public Fold omitUnchangedText() {
            return (internalOptions & UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0
                    ? OMIT_UNCHANGED
                    : TURKIC_OMIT_UNCHANGED;
        }

        /**
         * Returns an instance that behaves like this one but handles dotted I and dotless i
         * appropriately for Turkic languages (tr, az).
         *
         * <p>Uses the Unicode CaseFolding.txt mappings marked with 'T' that are to be excluded for
         * default mappings and included for the Turkic-specific mappings.
         *
         * @return an options object with this option.
         * @see UCharacter#FOLD_CASE_EXCLUDE_SPECIAL_I
         * @stable ICU 59
         */
        public Fold turkic() {
            return (internalOptions & CaseMapImpl.OMIT_UNCHANGED_TEXT) == 0
                    ? TURKIC
                    : TURKIC_OMIT_UNCHANGED;
        }

        /**
         * Case-folds a string. The result may be longer or shorter than the original.
         *
         * <p>Case-folding is locale-independent and not context-sensitive, but there is an option
         * for whether to include or exclude mappings for dotted I and dotless i that are marked
         * with 'T' in CaseFolding.txt.
         *
         * @param src The original string.
         * @return the result string.
         * @see UCharacter#foldCase(String, int)
         * @stable ICU 60
         */
        public String apply(CharSequence src) {
            return CaseMapImpl.fold(internalOptions, src);
        }

        /**
         * Case-folds a string and optionally records edits (see {@link #omitUnchangedText}). The
         * result may be longer or shorter than the original.
         *
         * <p>Case-folding is locale-independent and not context-sensitive, but there is an option
         * for whether to include or exclude mappings for dotted I and dotless i that are marked
         * with 'T' in CaseFolding.txt.
         *
         * @param src The original string.
         * @param dest A buffer for the result string. Must not be null.
         * @param edits Records edits for index mapping, working with styled text, and getting only
         *     changes (if any). This function calls edits.reset() first. edits can be null.
         * @return dest with the result string (or only changes) appended.
         * @see UCharacter#foldCase(String, int)
         * @stable ICU 59
         */
        public <A extends Appendable> A apply(CharSequence src, A dest, Edits edits) {
            return CaseMapImpl.fold(internalOptions, src, dest, edits);
        }
    }
}