UConverterConstants.java

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 *******************************************************************************
 * Copyright (C) 2006-2008, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */
package com.ibm.icu.charset;

interface UConverterConstants {

    static final short UNSIGNED_BYTE_MASK = 0xff;
    static final int UNSIGNED_SHORT_MASK = 0xffff;
    static final long UNSIGNED_INT_MASK = 0xffffffffL;

    static final int U_IS_BIG_ENDIAN = 0;

    /**
     * Useful constant for the maximum size of the whole locale ID (including the terminating NULL).
     */
    static final int ULOC_FULLNAME_CAPACITY = 56;

    /**
     * This value is intended for sentinel values for APIs that (take or) return single code points
     * (UChar32). It is outside of the Unicode code point range 0..0x10ffff.
     *
     * <p>For example, a "done" or "error" value in a new API could be indicated with U_SENTINEL.
     *
     * <p>ICU APIs designed before ICU 2.4 usually define service-specific "done" values, mostly
     * 0xffff. Those may need to be distinguished from actual U+ffff text contents by calling
     * functions like CharacterIterator::hasNext() or UnicodeString::length().
     */
    static final int U_SENTINEL = -1;

    // end utf.h

    // begin ucnv.h
    /**
     * Character that separates converter names from options and options from each other.
     *
     * @see CharsetICU#forNameICU(String)
     */
    static final byte OPTION_SEP_CHAR = ',';

    /** Maximum length of a converter name including the terminating NULL */
    static final int MAX_CONVERTER_NAME_LENGTH = 60;

    /** Maximum length of a converter name including path and terminating NULL */
    static final int MAX_FULL_FILE_NAME_LENGTH = (600 + MAX_CONVERTER_NAME_LENGTH);

    /** Shift in for EBDCDIC_STATEFUL and iso2022 states */
    static final int SI = 0x0F;

    /** Shift out for EBDCDIC_STATEFUL and iso2022 states */
    static final int SO = 0x0E;

    // end ucnv.h

    // begin bld.h
    /* size of the overflow buffers in UConverter, enough for escaping callbacks */
    // #define ERROR_BUFFER_LENGTH 32
    static final int ERROR_BUFFER_LENGTH = 32;

    /* at most 4 bytes per substitution character (part of .cnv file format! see UConverterStaticData) */
    static final int MAX_SUBCHAR_LEN = 4;

    /* at most 8 bytes per character in toUBytes[] (UTF-8 uses up to 6) */
    static final int MAX_CHAR_LEN = 8;

    /* converter options bits */
    static final int OPTION_VERSION = 0xf;
    static final int OPTION_SWAP_LFNL = 0x10;
    static final int OPTION_MAC = 0x20; // agljport:comment added for Mac ISCII encodings

    static final String OPTION_SWAP_LFNL_STRING = ",swaplfnl";

    /** values for the unicodeMask */
    static final int HAS_SUPPLEMENTARY = 1;

    static final int HAS_SURROGATES = 2;
    // end bld.h

    // begin cnv.h
    /* this is used in fromUnicode DBCS tables as an "unassigned" marker */
    static final int missingCharMarker = 0xFFFF;

    /**
     * @author ram
     */
    static interface UConverterResetChoice {
        static final int RESET_BOTH = 0;
        static final int RESET_TO_UNICODE = RESET_BOTH + 1;
        static final int RESET_FROM_UNICODE = RESET_TO_UNICODE + 1;
    }

    // begin utf16.h
    /** The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff). */
    static final int U16_MAX_LENGTH = 2;

    // end utf16.h

    // begin err.h
    /** FROM_U, TO_U context options for sub callback */
    static byte[] SUB_STOP_ON_ILLEGAL = {'i'};

    /** FROM_U, TO_U context options for skip callback */
    static byte[] SKIP_STOP_ON_ILLEGAL = {'i'};

    /**
     * The process condition code to be used with the callbacks. Codes which are greater than
     * IRREGULAR should be passed on to any chained callbacks.
     */
    static interface UConverterCallbackReason {
        static final int UNASSIGNED = 0;

        /** < The code point is unassigned. The error code U_INVALID_CHAR_FOUND will be set. */
        static final int ILLEGAL = 1;

        /**
         * < The code point is illegal. For example, \\x81\\x2E is illegal in SJIS because \\x2E is
         * not a valid trail byte for the \\x81 lead byte. Also, starting with Unicode 3.0.1,
         * non-shortest byte sequences in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061) are
         * also illegal, not just irregular. The error code U_ILLEGAL_CHAR_FOUND will be set.
         */
        static final int IRREGULAR = 2;

        /**
         * < The codepoint is not a regular sequence in the encoding. For example,
         * \\xED\\xA0\\x80..\\xED\\xBF\\xBF are irregular UTF-8 byte sequences for single surrogate
         * code points. The error code U_INVALID_CHAR_FOUND will be set.
         */
        static final int RESET = 3;

        /**
         * < The callback is called with this reason when a 'reset' has occurred. Callback should
         * reset all state.
         */
        static final int CLOSE = 4;

        /**
         * < Called when the converter is closed. The callback should release any allocated memory.
         */
        static final int CLONE = 5;
        /**
         * < Called when safeClone() is called on the converter. the pointer available as the
         * 'context' is an alias to the original converters' context pointer. If the context must be
         * owned by the new converter, the callback must clone the data and call setFromUCallback
         * (or setToUCallback) with the correct pointer.
         */
    }

    // end err.h

    static final String DATA_TYPE = "cnv";
    static final int CNV_DATA_BUFFER_SIZE = 25000;
    static final int SIZE_OF_UCONVERTER_SHARED_DATA = 100;

    static final int MAXIMUM_UCS2 = 0x0000FFFF;
    static final int MAXIMUM_UTF = 0x0010FFFF;
    // static final int MAXIMUM_UCS4 =            0x7FFFFFFF;
    static final int HALF_SHIFT = 10;
    static final int HALF_BASE = 0x0010000;
    static final int HALF_MASK = 0x3FF;
    static final int SURROGATE_HIGH_START = 0xD800;
    static final int SURROGATE_HIGH_END = 0xDBFF;
    static final int SURROGATE_LOW_START = 0xDC00;
    static final int SURROGATE_LOW_END = 0xDFFF;

    /* -SURROGATE_LOW_START + HALF_BASE */
    static final int SURROGATE_LOW_BASE = 9216;
}