UPropertyAliases.java

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 **********************************************************************
 * Copyright (c) 2002-2015, International Business Machines
 * Corporation and others.  All Rights Reserved.
 **********************************************************************
 * Author: Alan Liu
 * Created: November 5 2002
 * Since: ICU 2.4
 * 2010nov19 Markus Scherer  Rewrite for formatVersion 2.
 **********************************************************************
 */

package com.ibm.icu.impl;

import com.ibm.icu.lang.UProperty;
import com.ibm.icu.util.BytesTrie;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.MissingResourceException;

/**
 * Wrapper for the pnames.icu binary data file. This data file is imported from icu4c. It contains
 * property and property value aliases from the UCD files PropertyAliases.txt and
 * PropertyValueAliases.txt. The file is built by the icu4c tool genpname. It must be an ASCII
 * big-endian file to be usable in icu4j.
 *
 * <p>This class performs two functions.
 *
 * <p>(1) It can import the flat binary data into usable objects.
 *
 * <p>(2) It provides an API to access the tree of objects.
 *
 * <p>Needless to say, this class is tightly coupled to the binary format of icu4c's pnames.icu
 * file.
 *
 * <p>Each time a UPropertyAliases is constructed, the pnames.icu file is read, parsed, and data
 * structures assembled. Clients should create one singleton instance and cache it.
 *
 * @author Alan Liu
 * @since ICU 2.4
 */
public final class UPropertyAliases {
    // Byte offsets from the start of the data, after the generic header.
    private static final int IX_VALUE_MAPS_OFFSET = 0;
    private static final int IX_BYTE_TRIES_OFFSET = 1;
    private static final int IX_NAME_GROUPS_OFFSET = 2;
    private static final int IX_RESERVED3_OFFSET = 3;
    // private static final int IX_RESERVED4_OFFSET=4;
    // private static final int IX_TOTAL_SIZE=5;

    // Other values.
    // private static final int IX_MAX_NAME_LENGTH=6;
    // private static final int IX_RESERVED7=7;
    // private static final int IX_COUNT=8;

    // ----------------------------------------------------------------
    // Runtime data.  This is an unflattened representation of the
    // data in pnames.icu.

    private int[] valueMaps;
    private byte[] bytesTries;
    private String nameGroups;

    private static final class IsAcceptable implements ICUBinary.Authenticate {
        @Override
        public boolean isDataVersionAcceptable(byte version[]) {
            return version[0] == 2;
        }
    }

    private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
    private static final int DATA_FORMAT = 0x706E616D; // "pnam"

    private void load(ByteBuffer bytes) throws IOException {
        // dataVersion=ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE);
        ICUBinary.readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE);
        int indexesLength = bytes.getInt() / 4; // inIndexes[IX_VALUE_MAPS_OFFSET]/4
        if (indexesLength < 8) { // formatVersion 2 initially has 8 indexes
            throw new IOException("pnames.icu: not enough indexes");
        }
        int[] inIndexes = new int[indexesLength];
        inIndexes[0] = indexesLength * 4;
        for (int i = 1; i < indexesLength; ++i) {
            inIndexes[i] = bytes.getInt();
        }

        // Read the valueMaps.
        int offset = inIndexes[IX_VALUE_MAPS_OFFSET];
        int nextOffset = inIndexes[IX_BYTE_TRIES_OFFSET];
        int numInts = (nextOffset - offset) / 4;
        valueMaps = ICUBinary.getInts(bytes, numInts, 0);

        // Read the bytesTries.
        offset = nextOffset;
        nextOffset = inIndexes[IX_NAME_GROUPS_OFFSET];
        int numBytes = nextOffset - offset;
        bytesTries = new byte[numBytes];
        bytes.get(bytesTries);

        // Read the nameGroups and turn them from ASCII bytes into a Java String.
        offset = nextOffset;
        nextOffset = inIndexes[IX_RESERVED3_OFFSET];
        numBytes = nextOffset - offset;
        StringBuilder sb = new StringBuilder(numBytes);
        for (int i = 0; i < numBytes; ++i) {
            sb.append((char) bytes.get());
        }
        nameGroups = sb.toString();
    }

    private UPropertyAliases() throws IOException {
        ByteBuffer bytes = ICUBinary.getRequiredData("pnames.icu");
        load(bytes);
    }

    private int findProperty(int property) {
        int i = 1; // valueMaps index, initially after numRanges
        for (int numRanges = valueMaps[0]; numRanges > 0; --numRanges) {
            // Read and skip the start and limit of this range.
            int start = valueMaps[i];
            int limit = valueMaps[i + 1];
            i += 2;
            if (property < start) {
                break;
            }
            if (property < limit) {
                return i + (property - start) * 2;
            }
            i += (limit - start) * 2; // Skip all entries for this range.
        }
        return 0;
    }

    private int findPropertyValueNameGroup(int valueMapIndex, int value) {
        if (valueMapIndex == 0) {
            return 0; // The property does not have named values.
        }
        ++valueMapIndex; // Skip the BytesTrie offset.
        int numRanges = valueMaps[valueMapIndex++];
        if (numRanges < 0x10) {
            // Ranges of values.
            for (; numRanges > 0; --numRanges) {
                // Read and skip the start and limit of this range.
                int start = valueMaps[valueMapIndex];
                int limit = valueMaps[valueMapIndex + 1];
                valueMapIndex += 2;
                if (value < start) {
                    break;
                }
                if (value < limit) {
                    return valueMaps[valueMapIndex + value - start];
                }
                valueMapIndex += limit - start; // Skip all entries for this range.
            }
        } else {
            // List of values.
            int valuesStart = valueMapIndex;
            int nameGroupOffsetsStart = valueMapIndex + numRanges - 0x10;
            do {
                int v = valueMaps[valueMapIndex];
                if (value < v) {
                    break;
                }
                if (value == v) {
                    return valueMaps[nameGroupOffsetsStart + valueMapIndex - valuesStart];
                }
            } while (++valueMapIndex < nameGroupOffsetsStart);
        }
        return 0;
    }

    private String getName(int nameGroupsIndex, int nameIndex) {
        int numNames = nameGroups.charAt(nameGroupsIndex++);
        if (nameIndex < 0 || numNames <= nameIndex) {
            throw new IllegalIcuArgumentException("Invalid property (value) name choice");
        }
        // Skip nameIndex names.
        for (; nameIndex > 0; --nameIndex) {
            while (0 != nameGroups.charAt(nameGroupsIndex++)) {}
        }
        // Find the end of this name.
        int nameStart = nameGroupsIndex;
        while (0 != nameGroups.charAt(nameGroupsIndex)) {
            ++nameGroupsIndex;
        }
        if (nameStart == nameGroupsIndex) {
            return null; // no name (Property[Value]Aliases.txt has "n/a")
        }
        return nameGroups.substring(nameStart, nameGroupsIndex);
    }

    private static int asciiToLowercase(int c) {
        return 'A' <= c && c <= 'Z' ? c + 0x20 : c;
    }

    private boolean containsName(BytesTrie trie, CharSequence name) {
        BytesTrie.Result result = BytesTrie.Result.NO_VALUE;
        for (int i = 0; i < name.length(); ++i) {
            int c = name.charAt(i);
            // Ignore delimiters '-', '_', and ASCII White_Space.
            if (c == '-' || c == '_' || c == ' ' || (0x09 <= c && c <= 0x0d)) {
                continue;
            }
            if (!result.hasNext()) {
                return false;
            }
            c = asciiToLowercase(c);
            result = trie.next(c);
        }
        return result.hasValue();
    }

    // ----------------------------------------------------------------
    // Public API

    public static final UPropertyAliases INSTANCE;

    static {
        try {
            INSTANCE = new UPropertyAliases();
        } catch (IOException e) {
            /// CLOVER:OFF
            MissingResourceException mre =
                    new MissingResourceException(
                            "Could not construct UPropertyAliases. Missing pnames.icu", "", "");
            mre.initCause(e);
            throw mre;
            /// CLOVER:ON
        }
    }

    /**
     * Returns a property name given a property enum. Multiple names may be available for each
     * property; the nameChoice selects among them.
     */
    public String getPropertyName(int property, int nameChoice) {
        int valueMapIndex = findProperty(property);
        if (valueMapIndex == 0) {
            throw new IllegalArgumentException(
                    "Invalid property enum "
                            + property
                            + " (0x"
                            + Integer.toHexString(property)
                            + ")");
        }
        return getName(valueMaps[valueMapIndex], nameChoice);
    }

    /**
     * Returns a value name given a property enum and a value enum. Multiple names may be available
     * for each value; the nameChoice selects among them.
     */
    public String getPropertyValueName(int property, int value, int nameChoice) {
        int valueMapIndex = findProperty(property);
        if (valueMapIndex == 0) {
            throw new IllegalArgumentException(
                    "Invalid property enum "
                            + property
                            + " (0x"
                            + Integer.toHexString(property)
                            + ")");
        }
        int nameGroupOffset = findPropertyValueNameGroup(valueMaps[valueMapIndex + 1], value);
        if (nameGroupOffset == 0) {
            throw new IllegalArgumentException(
                    "Property "
                            + property
                            + " (0x"
                            + Integer.toHexString(property)
                            + ") does not have named values");
        }
        return getName(nameGroupOffset, nameChoice);
    }

    private int getPropertyOrValueEnum(int bytesTrieOffset, CharSequence alias) {
        BytesTrie trie = new BytesTrie(bytesTries, bytesTrieOffset);
        if (containsName(trie, alias)) {
            return trie.getValue();
        } else {
            return UProperty.UNDEFINED;
        }
    }

    /**
     * Returns a property enum given one of its property names. If the property name is not known,
     * this method returns UProperty.UNDEFINED.
     */
    public int getPropertyEnum(CharSequence alias) {
        return getPropertyOrValueEnum(0, alias);
    }

    /** Returns a value enum given a property enum and one of its value names. */
    public int getPropertyValueEnum(int property, CharSequence alias) {
        int valueMapIndex = findProperty(property);
        if (valueMapIndex == 0) {
            throw new IllegalArgumentException(
                    "Invalid property enum "
                            + property
                            + " (0x"
                            + Integer.toHexString(property)
                            + ")");
        }
        valueMapIndex = valueMaps[valueMapIndex + 1];
        if (valueMapIndex == 0) {
            throw new IllegalArgumentException(
                    "Property "
                            + property
                            + " (0x"
                            + Integer.toHexString(property)
                            + ") does not have named values");
        }
        // valueMapIndex is the start of the property's valueMap,
        // where the first word is the BytesTrie offset.
        return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
    }

    /**
     * Returns a value enum given a property enum and one of its value names. Does not throw.
     *
     * @return value enum, or UProperty.UNDEFINED if not defined for that property
     */
    public int getPropertyValueEnumNoThrow(int property, CharSequence alias) {
        int valueMapIndex = findProperty(property);
        if (valueMapIndex == 0) {
            return UProperty.UNDEFINED;
        }
        valueMapIndex = valueMaps[valueMapIndex + 1];
        if (valueMapIndex == 0) {
            return UProperty.UNDEFINED;
        }
        // valueMapIndex is the start of the property's valueMap,
        // where the first word is the BytesTrie offset.
        return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
    }

    /**
     * Compare two property names, returning <0, 0, or >0. The comparison is that described as
     * "loose" matching in the Property*Aliases.txt files.
     */
    public static int compare(String stra, String strb) {
        // Note: This implementation is a literal copy of
        // uprv_comparePropertyNames.  It can probably be improved.
        int istra = 0, istrb = 0, rc;
        int cstra = 0, cstrb = 0;
        for (; ; ) {
            /* Ignore delimiters '-', '_', and ASCII White_Space */
            while (istra < stra.length()) {
                cstra = stra.charAt(istra);
                switch (cstra) {
                    case '-':
                    case '_':
                    case ' ':
                    case '\t':
                    case '\n':
                    case 0xb /*\v*/:
                    case '\f':
                    case '\r':
                        ++istra;
                        continue;
                }
                break;
            }

            while (istrb < strb.length()) {
                cstrb = strb.charAt(istrb);
                switch (cstrb) {
                    case '-':
                    case '_':
                    case ' ':
                    case '\t':
                    case '\n':
                    case 0xb /*\v*/:
                    case '\f':
                    case '\r':
                        ++istrb;
                        continue;
                }
                break;
            }

            /* If we reach the ends of both strings then they match */
            boolean endstra = istra == stra.length();
            boolean endstrb = istrb == strb.length();
            if (endstra) {
                if (endstrb) return 0;
                cstra = 0;
            } else if (endstrb) {
                cstrb = 0;
            }

            rc = asciiToLowercase(cstra) - asciiToLowercase(cstrb);
            if (rc != 0) {
                return rc;
            }

            ++istra;
            ++istrb;
        }
    }
}