USerializedSet.java

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 *******************************************************************************
 *   Copyright (C) 2002-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *******************************************************************************
 */

package com.ibm.icu.impl;

/**
 * @version 1.1
 * @author Markus W. Scherer Ram: Add documentation, remove unwanted methods, improve coverage.
 */

/**
 * Simple class for handling serialized USet/UnicodeSet structures without object creation. See
 * ICU4C icu/source/common/uset.c.
 *
 * @internal
 */
public final class USerializedSet {
    /**
     * Fill in the given serialized set object.
     *
     * @param src pointer to start of array
     * @param srcStart pointer to start of serialized data (length value)
     * @return true if the given array is valid, otherwise false
     */
    public final boolean getSet(char src[], int srcStart) {
        // leave most argument checking up to Java exceptions
        array = null;
        arrayOffset = bmpLength = length = 0;

        length = src[srcStart++];

        if ((length & 0x8000) != 0) {
            /* there are supplementary values */
            length &= 0x7fff;
            if (src.length < (srcStart + 1 + length)) {
                length = 0;
                throw new IndexOutOfBoundsException();
            }
            bmpLength = src[srcStart++];
        } else {
            /* only BMP values */
            if (src.length < (srcStart + length)) {
                length = 0;
                throw new IndexOutOfBoundsException();
            }
            bmpLength = length;
        }
        array = new char[length];
        System.arraycopy(src, srcStart, array, 0, length);
        // arrayOffset=srcStart;
        return true;
    }

    /** Set the USerializedSet to contain the given character (and nothing else). */
    public final void setToOne(int c) {
        if (0x10ffff < c) {
            return;
        }

        if (c < 0xffff) {
            bmpLength = length = 2;
            array[0] = (char) c;
            array[1] = (char) (c + 1);
        } else if (c == 0xffff) {
            bmpLength = 1;
            length = 3;
            array[0] = 0xffff;
            array[1] = 1;
            array[2] = 0;
        } else if (c < 0x10ffff) {
            bmpLength = 0;
            length = 4;
            array[0] = (char) (c >> 16);
            array[1] = (char) c;
            ++c;
            array[2] = (char) (c >> 16);
            array[3] = (char) c;
        } else /* c==0x10ffff */ {
            bmpLength = 0;
            length = 2;
            array[0] = 0x10;
            array[1] = 0xffff;
        }
    }

    /**
     * Returns a range of characters contained in the given serialized set.
     *
     * @param rangeIndex a non-negative integer in the range <code>0..
     * getSerializedRangeCount()-1</code>
     * @param range variable to receive the data in the range
     * @return true if rangeIndex is valid, otherwise false
     */
    public final boolean getRange(int rangeIndex, int[] range) {
        if (rangeIndex < 0) {
            return false;
        }
        if (array == null) {
            array = new char[8];
        }
        if (range == null || range.length < 2) {
            throw new IllegalArgumentException();
        }
        rangeIndex *= 2; /* address start/limit pairs */
        if (rangeIndex < bmpLength) {
            range[0] = array[rangeIndex++];
            if (rangeIndex < bmpLength) {
                range[1] = array[rangeIndex] - 1;
            } else if (rangeIndex < length) {
                range[1] = ((((int) array[rangeIndex]) << 16) | array[rangeIndex + 1]) - 1;
            } else {
                range[1] = 0x10ffff;
            }
            return true;
        } else {
            rangeIndex -= bmpLength;
            rangeIndex *= 2; /* address pairs of pairs of units */
            int suppLength = length - bmpLength;
            if (rangeIndex < suppLength) {
                int offset = arrayOffset + bmpLength;
                range[0] =
                        (((int) array[offset + rangeIndex]) << 16) | array[offset + rangeIndex + 1];
                rangeIndex += 2;
                if (rangeIndex < suppLength) {
                    range[1] =
                            ((((int) array[offset + rangeIndex]) << 16)
                                            | array[offset + rangeIndex + 1])
                                    - 1;
                } else {
                    range[1] = 0x10ffff;
                }
                return true;
            } else {
                return false;
            }
        }
    }

    /**
     * Returns true if the given USerializedSet contains the given character.
     *
     * @param c the character to test for
     * @return true if set contains c
     */
    public final boolean contains(int c) {

        if (c > 0x10ffff) {
            return false;
        }

        if (c <= 0xffff) {
            int i;
            /* find c in the BMP part */
            for (i = 0; i < bmpLength && (char) c >= array[i]; ++i) {}
            return ((i & 1) != 0);
        } else {
            int i;
            /* find c in the supplementary part */
            char high = (char) (c >> 16), low = (char) c;
            for (i = bmpLength;
                    i < length && (high > array[i] || (high == array[i] && low >= array[i + 1]));
                    i += 2) {}

            /* count pairs of 16-bit units even per BMP and check if the number of pairs is odd */
            return (((i + bmpLength) & 2) != 0);
        }
    }

    /**
     * Returns the number of disjoint ranges of characters contained in the given serialized set.
     * Ignores any strings contained in the set.
     *
     * @return a non-negative integer counting the character ranges contained in set
     */
    public final int countRanges() {
        return (bmpLength + (length - bmpLength) / 2 + 1) / 2;
    }

    private char array[] = new char[8];
    private int arrayOffset, bmpLength, length;
}