CharsetRecognizer.java

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 *******************************************************************************
 * Copyright (C) 2005-2012, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */
package com.ibm.icu.text;

/**
 * Abstract class for recognizing a single charset. Part of the implementation of ICU's
 * CharsetDetector.
 *
 * <p>Each specific charset that can be recognized will have an instance of some subclass of this
 * class. All interaction between the overall CharsetDetector and the stuff specific to an
 * individual charset happens via the interface provided here.
 *
 * <p>Instances of CharsetDetector DO NOT have or maintain state pertaining to a specific match or
 * detect operation. The WILL be shared by multiple instances of CharsetDetector. They encapsulate
 * const charset-specific information.
 */
abstract class CharsetRecognizer {
    /**
     * Get the IANA name of this charset.
     *
     * @return the charset name.
     */
    abstract String getName();

    /**
     * Get the ISO language code for this charset.
     *
     * @return the language code, or <code>null</code> if the language cannot be determined.
     */
    public String getLanguage() {
        return null;
    }

    /**
     * Test the match of this charset with the input text data which is obtained via the
     * CharsetDetector object.
     *
     * @param det The CharsetDetector, which contains the input text to be checked for being in this
     *     charset.
     * @return A CharsetMatch object containing details of match with this charset, or null if there
     *     was no match.
     */
    abstract CharsetMatch match(CharsetDetector det);
}