StringReplacer.java

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 **********************************************************************
 *   Copyright (c) 2002-2007, International Business Machines Corporation
 *   and others.  All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
 *   01/14/2002  aliu        Creation.
 **********************************************************************
 */

package com.ibm.icu.text;

import com.ibm.icu.impl.Utility;

/**
 * A replacer that produces static text as its output. The text may contain transliterator stand-in
 * characters that represent nested UnicodeReplacer objects, making it possible to encode a tree of
 * replacers in a StringReplacer. A StringReplacer that contains such stand-ins is called a
 * <em>complex</em> StringReplacer. A complex StringReplacer has a slower processing loop than a
 * non-complex one.
 *
 * @author Alan Liu
 */
class StringReplacer implements UnicodeReplacer {

    /**
     * Output text, possibly containing stand-in characters that represent nested UnicodeReplacers.
     */
    private String output;

    /** Cursor position. Value is ignored if hasCursor is false. */
    private int cursorPos;

    /** True if this object outputs a cursor position. */
    private boolean hasCursor;

    /**
     * A complex object contains nested replacers and requires more complex processing.
     * StringReplacers are initially assumed to be complex. If no nested replacers are seen during
     * processing, then isComplex is set to false, and future replacements are short circuited for
     * better performance.
     */
    private boolean isComplex;

    /** Object that translates stand-in characters in 'output' to UnicodeReplacer objects. */
    private final RuleBasedTransliterator.Data data;

    /**
     * Construct a StringReplacer that sets the emits the given output text and sets the cursor to
     * the given position.
     *
     * @param theOutput text that will replace input text when the replace() method is called. May
     *     contain stand-in characters that represent nested replacers.
     * @param theCursorPos cursor position that will be returned by the replace() method
     * @param theData transliterator context object that translates stand-in characters to
     *     UnicodeReplacer objects
     */
    public StringReplacer(
            String theOutput, int theCursorPos, RuleBasedTransliterator.Data theData) {
        output = theOutput;
        cursorPos = theCursorPos;
        hasCursor = true;
        data = theData;
        isComplex = true;
    }

    /**
     * Construct a StringReplacer that sets the emits the given output text and does not modify the
     * cursor.
     *
     * @param theOutput text that will replace input text when the replace() method is called. May
     *     contain stand-in characters that represent nested replacers.
     * @param theData transliterator context object that translates stand-in characters to
     *     UnicodeReplacer objects
     */
    public StringReplacer(String theOutput, RuleBasedTransliterator.Data theData) {
        output = theOutput;
        cursorPos = 0;
        hasCursor = false;
        data = theData;
        isComplex = true;
    }

    // =    public static UnicodeReplacer valueOf(String output,
    // =                                          int cursorPos,
    // =                                          RuleBasedTransliterator.Data data) {
    // =        if (output.length() == 1) {
    // =            char c = output.charAt(0);
    // =            UnicodeReplacer r = data.lookupReplacer(c);
    // =            if (r != null) {
    // =                return r;
    // =            }
    // =        }
    // =        return new StringReplacer(output, cursorPos, data);
    // =    }

    /** UnicodeReplacer API */
    @Override
    public int replace(Replaceable text, int start, int limit, int[] cursor) {
        int outLen;
        int newStart = 0;

        // NOTE: It should be possible to _always_ run the complex
        // processing code; just slower.  If not, then there is a bug
        // in the complex processing code.

        // Simple (no nested replacers) Processing Code :
        if (!isComplex) {
            text.replace(start, limit, output);
            outLen = output.length();

            // Setup default cursor position (for cursorPos within output)
            newStart = cursorPos;
        }

        // Complex (nested replacers) Processing Code :
        else {
            /* When there are segments to be copied, use the Replaceable.copy()
             * API in order to retain out-of-band data.  Copy everything to the
             * end of the string, then copy them back over the key.  This preserves
             * the integrity of indices into the key and surrounding context while
             * generating the output text.
             */
            StringBuilder buf = new StringBuilder();
            int oOutput; // offset into 'output'
            isComplex = false;

            // The temporary buffer starts at tempStart, and extends
            // to destLimit + tempExtra.  The start of the buffer has a single
            // character from before the key.  This provides style
            // data when addition characters are filled into the
            // temporary buffer.  If there is nothing to the left, use
            // the non-character U+FFFF, which Replaceable subclasses
            // should treat specially as a "no-style character."
            // destStart points to the point after the style context
            // character, so it is tempStart+1 or tempStart+2.
            int tempStart = text.length(); // start of temp buffer
            int destStart = tempStart; // copy new text to here
            if (start > 0) {
                int len = UTF16.getCharCount(text.char32At(start - 1));
                text.copy(start - len, start, tempStart);
                destStart += len;
            } else {
                text.replace(tempStart, tempStart, "\uFFFF");
                destStart++;
            }
            int destLimit = destStart;
            int tempExtra = 0; // temp chars after destLimit

            for (oOutput = 0; oOutput < output.length(); ) {
                if (oOutput == cursorPos) {
                    // Record the position of the cursor
                    newStart = buf.length() + destLimit - destStart; // relative to start
                    // the buf.length() was inserted for bug 5789
                    // the problem is that if we are accumulating into a buffer (when r == null
                    // below)
                    // then the actual length of the text at that point needs to add the buf length.
                    // there was an alternative suggested in #5789, but that looks like it won't
                    // work
                    // if we have accumulated some stuff in the dest part AND have a non-zero
                    // buffer.
                }
                int c = UTF16.charAt(output, oOutput);

                // When we are at the last position copy the right style
                // context character into the temporary buffer.  We don't
                // do this before because it will provide an incorrect
                // right context for previous replace() operations.
                int nextIndex = oOutput + UTF16.getCharCount(c);
                if (nextIndex == output.length()) {
                    tempExtra = UTF16.getCharCount(text.char32At(limit));
                    text.copy(limit, limit + tempExtra, destLimit);
                }

                UnicodeReplacer r = data.lookupReplacer(c);
                if (r == null) {
                    // Accumulate straight (non-segment) text.
                    buf.appendCodePoint(c);
                } else {
                    isComplex = true;

                    // Insert any accumulated straight text.
                    if (buf.length() > 0) {
                        text.replace(destLimit, destLimit, buf.toString());
                        destLimit += buf.length();
                        buf.setLength(0);
                    }

                    // Delegate output generation to replacer object
                    int len = r.replace(text, destLimit, destLimit, cursor);
                    destLimit += len;
                }
                oOutput = nextIndex;
            }
            // Insert any accumulated straight text.
            if (buf.length() > 0) {
                text.replace(destLimit, destLimit, buf.toString());
                destLimit += buf.length();
            }
            if (oOutput == cursorPos) {
                // Record the position of the cursor
                newStart = destLimit - destStart; // relative to start
            }

            outLen = destLimit - destStart;

            // Copy new text to start, and delete it
            text.copy(destStart, destLimit, start);
            text.replace(tempStart + outLen, destLimit + tempExtra + outLen, "");

            // Delete the old text (the key)
            text.replace(start + outLen, limit + outLen, "");
        }

        if (hasCursor) {
            // Adjust the cursor for positions outside the key.  These
            // refer to code points rather than code units.  If cursorPos
            // is within the output string, then use newStart, which has
            // already been set above.
            if (cursorPos < 0) {
                newStart = start;
                int n = cursorPos;
                // Outside the output string, cursorPos counts code points
                while (n < 0 && newStart > 0) {
                    newStart -= UTF16.getCharCount(text.char32At(newStart - 1));
                    ++n;
                }
                newStart += n;
            } else if (cursorPos > output.length()) {
                newStart = start + outLen;
                int n = cursorPos - output.length();
                // Outside the output string, cursorPos counts code points
                while (n > 0 && newStart < text.length()) {
                    newStart += UTF16.getCharCount(text.char32At(newStart));
                    --n;
                }
                newStart += n;
            } else {
                // Cursor is within output string.  It has been set up above
                // to be relative to start.
                newStart += start;
            }

            cursor[0] = newStart;
        }

        return outLen;
    }

    /** UnicodeReplacer API */
    @Override
    public String toReplacerPattern(boolean escapeUnprintable) {
        StringBuilder rule = new StringBuilder();
        StringBuilder quoteBuf = new StringBuilder();

        int cursor = cursorPos;

        // Handle a cursor preceding the output
        if (hasCursor && cursor < 0) {
            while (cursor++ < 0) {
                Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf);
            }
            // Fall through and append '|' below
        }

        for (int i = 0; i < output.length(); ++i) {
            if (hasCursor && i == cursor) {
                Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf);
            }
            char c = output.charAt(i); // Ok to use 16-bits here

            UnicodeReplacer r = data.lookupReplacer(c);
            if (r == null) {
                Utility.appendToRule(rule, c, false, escapeUnprintable, quoteBuf);
            } else {
                StringBuilder buf = new StringBuilder(" ");
                buf.append(r.toReplacerPattern(escapeUnprintable));
                buf.append(' ');
                Utility.appendToRule(rule, buf.toString(), true, escapeUnprintable, quoteBuf);
            }
        }

        // Handle a cursor after the output.  Use > rather than >= because
        // if cursor == output.length() it is at the end of the output,
        // which is the default position, so we need not emit it.
        if (hasCursor && cursor > output.length()) {
            cursor -= output.length();
            while (cursor-- > 0) {
                Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf);
            }
            Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf);
        }
        // Flush quoteBuf out to result
        Utility.appendToRule(rule, -1, true, escapeUnprintable, quoteBuf);

        return rule.toString();
    }

    /**
     * Union the set of all characters that may output by this object into the given set.
     *
     * @param toUnionTo the set into which to union the output characters
     */
    @Override
    public void addReplacementSetTo(UnicodeSet toUnionTo) {
        int ch;
        for (int i = 0; i < output.length(); i += UTF16.getCharCount(ch)) {
            ch = UTF16.charAt(output, i);
            UnicodeReplacer r = data.lookupReplacer(ch);
            if (r == null) {
                toUnionTo.add(ch);
            } else {
                r.addReplacementSetTo(toUnionTo);
            }
        }
    }
}

// eof