ICU 77.1  77.1
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
messagepattern.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2011-2013, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: messagepattern.h
9 * encoding: UTF-8
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2011mar14
14 * created by: Markus W. Scherer
15 */
16 
17 #ifndef __MESSAGEPATTERN_H__
18 #define __MESSAGEPATTERN_H__
19 
25 #include "unicode/utypes.h"
26 
27 #if U_SHOW_CPLUSPLUS_API
28 
29 #if !UCONFIG_NO_FORMATTING
30 
31 #include "unicode/parseerr.h"
32 #include "unicode/unistr.h"
33 
92 };
97 
207 };
212 
260 };
265 
272 #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
273  ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
274 
275 enum {
282 
291 };
292 
299 #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
300 
301 U_NAMESPACE_BEGIN
302 
303 class MessagePatternDoubleList;
304 class MessagePatternPartsList;
305 
363 public:
373 
384 
403  MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
404 
411 
419 
424  virtual ~MessagePattern();
425 
444  UParseError *parseError, UErrorCode &errorCode);
445 
464  UParseError *parseError, UErrorCode &errorCode);
465 
484  UParseError *parseError, UErrorCode &errorCode);
485 
504  UParseError *parseError, UErrorCode &errorCode);
505 
511  void clear();
512 
520  clear();
521  aposMode=mode;
522  }
523 
529  bool operator==(const MessagePattern &other) const;
530 
536  inline bool operator!=(const MessagePattern &other) const {
537  return !operator==(other);
538  }
539 
544  int32_t hashCode() const;
545 
551  return aposMode;
552  }
553 
554  // Java has package-private jdkAposMode() here.
555  // In C++, this is declared in the MessageImpl class.
556 
562  return msg;
563  }
564 
571  return hasArgNames;
572  }
573 
580  return hasArgNumbers;
581  }
582 
594  static int32_t validateArgumentName(const UnicodeString &name);
595 
607 
608  class Part;
609 
616  int32_t countParts() const {
617  return partsLength;
618  }
619 
626  const Part &getPart(int32_t i) const {
627  return parts[i];
628  }
629 
638  return getPart(i).type;
639  }
640 
648  int32_t getPatternIndex(int32_t partIndex) const {
649  return getPart(partIndex).index;
650  }
651 
659  UnicodeString getSubstring(const Part &part) const {
660  return msg.tempSubString(part.index, part.length);
661  }
662 
670  UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
671  return 0==msg.compare(part.index, part.length, s);
672  }
673 
680  double getNumericValue(const Part &part) const;
681 
688  double getPluralOffset(int32_t pluralStart) const;
689 
698  int32_t getLimitPartIndex(int32_t start) const {
699  int32_t limit=getPart(start).limitPartIndex;
700  if(limit<start) {
701  return start;
702  }
703  return limit;
704  }
705 
713  class Part : public UMemory {
714  public:
719  Part() {}
720 
727  return type;
728  }
729 
735  int32_t getIndex() const {
736  return index;
737  }
738 
745  int32_t getLength() const {
746  return length;
747  }
748 
755  int32_t getLimit() const {
756  return index+length;
757  }
758 
765  int32_t getValue() const {
766  return value;
767  }
768 
776  UMessagePatternPartType msgType=getType();
777  if(msgType ==UMSGPAT_PART_TYPE_ARG_START || msgType ==UMSGPAT_PART_TYPE_ARG_LIMIT) {
778  return static_cast<UMessagePatternArgType>(value);
779  } else {
780  return UMSGPAT_ARG_TYPE_NONE;
781  }
782  }
783 
793  }
794 
800  bool operator==(const Part &other) const;
801 
807  inline bool operator!=(const Part &other) const {
808  return !operator==(other);
809  }
810 
815  int32_t hashCode() const {
816  return ((type*37+index)*37+length)*37+value;
817  }
818 
819  private:
820  friend class MessagePattern;
821 
822  static const int32_t MAX_LENGTH=0xffff;
823  static const int32_t MAX_VALUE=0x7fff;
824  static const int32_t MAX_NESTED_LEVELS=0x03ff;
825 
826  // Some fields are not final because they are modified during pattern parsing.
827  // After pattern parsing, the parts are effectively immutable.
829  int32_t index;
830  uint16_t length;
831  int16_t value;
832  int32_t limitPartIndex;
833  };
834 
835 private:
836  void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
837 
838  void postParse();
839 
840  int32_t parseMessage(int32_t index, int32_t msgStartLength,
841  int32_t nestingLevel, UMessagePatternArgType parentType,
842  UParseError *parseError, UErrorCode &errorCode);
843 
844  int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
845  UParseError *parseError, UErrorCode &errorCode);
846 
847  int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
848 
849  int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
850  UParseError *parseError, UErrorCode &errorCode);
851 
852  int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
853  UParseError *parseError, UErrorCode &errorCode);
854 
863  static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
864 
865  int32_t parseArgNumber(int32_t start, int32_t limit) {
866  return parseArgNumber(msg, start, limit);
867  }
868 
877  void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
878  UParseError *parseError, UErrorCode &errorCode);
879 
880  // Java has package-private appendReducedApostrophes() here.
881  // In C++, this is declared in the MessageImpl class.
882 
883  int32_t skipWhiteSpace(int32_t index);
884 
885  int32_t skipIdentifier(int32_t index);
886 
891  int32_t skipDouble(int32_t index);
892 
893  static UBool isArgTypeChar(UChar32 c);
894 
895  UBool isChoice(int32_t index);
896 
897  UBool isPlural(int32_t index);
898 
899  UBool isSelect(int32_t index);
900 
901  UBool isOrdinal(int32_t index);
902 
907  UBool inMessageFormatPattern(int32_t nestingLevel);
908 
913  UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
914 
915  void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
916  int32_t value, UErrorCode &errorCode);
917 
918  void addLimitPart(int32_t start,
919  UMessagePatternPartType type, int32_t index, int32_t length,
920  int32_t value, UErrorCode &errorCode);
921 
922  void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
923 
924  void setParseError(UParseError *parseError, int32_t index);
925 
926  UBool init(UErrorCode &errorCode);
927  UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
928 
930  UnicodeString msg;
931  // ArrayList<Part> parts=new ArrayList<Part>();
932  MessagePatternPartsList *partsList;
933  Part *parts;
934  int32_t partsLength;
935  // ArrayList<Double> numericValues;
936  MessagePatternDoubleList *numericValuesList;
937  double *numericValues;
938  int32_t numericValuesLength;
939  UBool hasArgNames;
940  UBool hasArgNumbers;
941  UBool needsAutoQuoting;
942 };
943 
944 U_NAMESPACE_END
945 
946 #endif // !UCONFIG_NO_FORMATTING
947 
948 #endif /* U_SHOW_CPLUSPLUS_API */
949 
950 #endif // __MESSAGEPATTERN_H__
A message pattern "part", representing a pattern parsing event.
bool operator==(const Part &other) const
UMessagePatternArgType getArgType() const
Returns the argument type if this part is of type ARG_START or ARG_LIMIT, otherwise UMSGPAT_ARG_TYPE_...
int32_t getIndex() const
Returns the pattern string index associated with this Part.
int32_t getValue() const
Returns a value associated with this part.
bool operator!=(const Part &other) const
UMessagePatternPartType getType() const
Returns the type of this part.
int32_t getLength() const
Returns the length of the pattern substring associated with this Part.
int32_t getLimit() const
Returns the pattern string limit (exclusive-end) index associated with this Part.
static UBool hasNumericValue(UMessagePatternPartType type)
Indicates whether the Part type has a numeric value.
Part()
Default constructor, do not use.
Parses and represents ICU MessageFormat patterns.
void clear()
Clears this MessagePattern.
bool operator!=(const MessagePattern &other) const
MessagePattern & parseSelectStyle(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
Parses a SelectFormat pattern string.
int32_t getLimitPartIndex(int32_t start) const
Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
MessagePattern & operator=(const MessagePattern &other)
Assignment operator.
UnicodeString getSubstring(const Part &part) const
Returns the substring of the pattern string indicated by the Part.
double getNumericValue(const Part &part) const
Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
int32_t countParts() const
Returns the number of "parts" created by parsing the pattern string.
MessagePattern & parseChoiceStyle(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
Parses a ChoiceFormat pattern string.
UnicodeString autoQuoteApostropheDeep() const
Returns a version of the parsed pattern string where each ASCII apostrophe is doubled (escaped) if it...
UMessagePatternPartType getPartType(int32_t i) const
Returns the UMessagePatternPartType of the i-th pattern "part".
void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode)
Clears this MessagePattern and sets the UMessagePatternApostropheMode.
MessagePattern & parse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
Parses a MessageFormat pattern string.
UBool hasNamedArguments() const
Does the parsed pattern have named arguments like {first_name}?
MessagePattern(UErrorCode &errorCode)
Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
MessagePattern(const MessagePattern &other)
Copy constructor.
static int32_t validateArgumentName(const UnicodeString &name)
Validates and parses an argument name or argument number string.
MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
Constructs a MessagePattern with default UMessagePatternApostropheMode and parses the MessageFormat p...
int32_t hashCode() const
double getPluralOffset(int32_t pluralStart) const
Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
int32_t getPatternIndex(int32_t partIndex) const
Returns the pattern index of the specified pattern "part".
MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode)
Constructs an empty MessagePattern.
bool operator==(const MessagePattern &other) const
UBool hasNumberedArguments() const
Does the parsed pattern have numbered arguments like {2}?
MessagePattern & parsePluralStyle(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
Parses a PluralFormat pattern string.
const Part & getPart(int32_t i) const
Gets the i-th pattern "part".
UMessagePatternApostropheMode getApostropheMode() const
virtual ~MessagePattern()
Destructor.
UBool partSubstringMatches(const Part &part, const UnicodeString &s) const
Compares the part's substring with the input string s.
const UnicodeString & getPatternString() const
UMemory is the common ICU base class.
Definition: uobject.h:115
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:223
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:296
UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const
Create a temporary substring for the specified range.
UMessagePatternPartType
MessagePattern::Part type constants.
@ UMSGPAT_PART_TYPE_INSERT_CHAR
Indicates that a syntax character needs to be inserted for auto-quoting.
@ UMSGPAT_PART_TYPE_ARG_TYPE
The argument type.
@ UMSGPAT_PART_TYPE_MSG_START
Start of a message pattern (main or nested).
@ UMSGPAT_PART_TYPE_ARG_SELECTOR
A selector substring in a "complex" argument style.
@ UMSGPAT_PART_TYPE_ARG_LIMIT
End of an argument.
@ UMSGPAT_PART_TYPE_ARG_NUMBER
The argument number, provided by the value.
@ UMSGPAT_PART_TYPE_MSG_LIMIT
End of a message pattern (main or nested).
@ UMSGPAT_PART_TYPE_ARG_DOUBLE
A numeric value, for example the offset or an explicit selector value in a PluralFormat style.
@ UMSGPAT_PART_TYPE_ARG_NAME
The argument name.
@ UMSGPAT_PART_TYPE_ARG_START
Start of an argument.
@ UMSGPAT_PART_TYPE_REPLACE_NUMBER
Indicates a syntactic (non-escaped) # symbol in a plural variant.
@ UMSGPAT_PART_TYPE_ARG_STYLE
The argument style text.
@ UMSGPAT_PART_TYPE_ARG_INT
An integer value, for example the offset or an explicit selector value in a PluralFormat style.
@ UMSGPAT_PART_TYPE_SKIP_SYNTAX
Indicates a substring of the pattern string which is to be skipped when formatting.
UMessagePatternArgType
Argument type constants.
@ UMSGPAT_ARG_TYPE_SELECT
The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
@ UMSGPAT_ARG_TYPE_NONE
The argument has no specified type.
@ UMSGPAT_ARG_TYPE_PLURAL
The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset (e....
@ UMSGPAT_ARG_TYPE_SIMPLE
The argument has a "simple" type which is provided by the ARG_TYPE part.
@ UMSGPAT_ARG_TYPE_SELECTORDINAL
The argument is an ordinal-number PluralFormat with the same style parts sequence and semantics as UM...
@ UMSGPAT_ARG_TYPE_CHOICE
The argument is a ChoiceFormat with one or more ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR,...
@ UMSGPAT_ARG_NAME_NOT_VALID
Return value from MessagePattern.validateArgumentName() for when the string is invalid.
@ UMSGPAT_ARG_NAME_NOT_NUMBER
Return value from MessagePattern.validateArgumentName() for when the string is a valid "pattern ident...
UMessagePatternApostropheMode
Mode for when an apostrophe starts quoted literal text for MessageFormat output.
@ UMSGPAT_APOS_DOUBLE_OPTIONAL
A literal apostrophe is represented by either a single or a double apostrophe pattern character.
@ UMSGPAT_APOS_DOUBLE_REQUIRED
A literal apostrophe must be represented by a double apostrophe pattern character.
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
C API: Parse Error Information.
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:427
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:247
C++ API: Unicode String.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:430
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition: utypes.h:315