LocalizedSegmenter.java
// © 2025 and later: Unicode, Inc. and others.
// License & terms of use: https://www.unicode.org/copyright.html
package com.ibm.icu.segmenter;
import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.util.ULocale;
import java.util.Locale;
/**
* Performs segmentation according to the rules defined for the locale.
*
* @draft ICU 78
*/
public class LocalizedSegmenter implements Segmenter {
private BreakIterator breakIterPrototype;
/**
* Returns a {@link Segments} object that encapsulates the segmentation of the input {@code
* CharSequence}. The {@code Segments} object, in turn, provides the main APIs to support
* traversal over the resulting segments and boundaries via the Java {@code Stream} abstraction.
*
* @param s input {@code CharSequence} on which segmentation is performed. The input must not be
* modified while using the resulting {@code Segments} object.
* @return A {@code Segments} object with APIs to access the results of segmentation, including
* APIs that return {@code Stream}s of the segments and boundaries.
* @draft ICU 78
*/
@Override
public Segments segment(CharSequence s) {
return new SegmentsImpl(breakIterPrototype, s);
}
/**
* @return a builder for constructing {@code LocalizedSegmenter}
* @draft ICU 78
*/
public static Builder builder() {
return new Builder();
}
private LocalizedSegmenter(ULocale locale, SegmentationType segmentationType) {
switch (segmentationType) {
case LINE:
breakIterPrototype = BreakIterator.getLineInstance(locale);
break;
case SENTENCE:
breakIterPrototype = BreakIterator.getSentenceInstance(locale);
break;
case WORD:
breakIterPrototype = BreakIterator.getWordInstance(locale);
break;
case GRAPHEME_CLUSTER:
breakIterPrototype = BreakIterator.getCharacterInstance(locale);
break;
}
}
/**
* The type of segmentation to be performed. See the ICU User Guide page <a
* href="https://unicode-org.github.io/icu/userguide/boundaryanalysis/#four-types-of-breakiterator">Boundary
* Analysis</a> for further details.
*
* @draft ICU 78
*/
public enum SegmentationType {
/**
* @draft ICU 78
*/
GRAPHEME_CLUSTER,
/**
* @draft ICU 78
*/
WORD,
/**
* @draft ICU 78
*/
LINE,
/**
* @draft ICU 78
*/
SENTENCE,
}
/**
* Builder for {@link LocalizedSegmenter}
*
* @draft ICU 78
*/
public static class Builder {
private ULocale locale = ULocale.ROOT;
private SegmentationType segmentationType = null;
private Builder() {}
/**
* Set the locale for which segmentation rules will be loaded
*
* @param locale an ICU locale object
* @draft ICU 78
*/
public Builder setLocale(ULocale locale) {
if (locale == null) {
throw new IllegalArgumentException("locale cannot be set to null.");
}
this.locale = locale;
return this;
}
/**
* Set the locale for which segmentation rules will be loaded
*
* @param locale a Java locale object
* @draft ICU 78
*/
public Builder setLocale(Locale locale) {
if (locale == null) {
throw new IllegalArgumentException("locale cannot be set to null.");
}
this.locale = ULocale.forLocale(locale);
return this;
}
/**
* Set the segmentation type to be performed.
*
* @param segmentationType
* @draft ICU 78
*/
public Builder setSegmentationType(SegmentationType segmentationType) {
if (segmentationType == null) {
throw new IllegalArgumentException("segmentationType cannot be set to null.");
}
this.segmentationType = segmentationType;
return this;
}
/**
* Builds the {@code Segmenter}
*
* @return the constructed {@code Segmenter} instance
* @draft ICU 78
*/
public Segmenter build() {
if (segmentationType == null) {
throw new IllegalArgumentException(
"segmentationType is null and must be set to a specific value.");
}
return new LocalizedSegmenter(locale, segmentationType);
}
}
}