ICU 77.1  77.1
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
messageformat2.h
Go to the documentation of this file.
1 // © 2024 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #ifndef MESSAGEFORMAT2_H
7 #define MESSAGEFORMAT2_H
8 
9 #if U_SHOW_CPLUSPLUS_API
10 
11 #if !UCONFIG_NO_NORMALIZATION
12 
13 #if !UCONFIG_NO_FORMATTING
14 
15 #if !UCONFIG_NO_MF2
16 
23 #include "unicode/messageformat2_data_model.h"
24 #include "unicode/messageformat2_function_registry.h"
25 #include "unicode/normalizer2.h"
26 #include "unicode/unistr.h"
27 
28 #ifndef U_HIDE_DEPRECATED_API
29 
30 U_NAMESPACE_BEGIN
31 
32 namespace message2 {
33 
34  class Environment;
35  class MessageContext;
36  class StaticErrors;
37  class InternalValue;
38 
55  // Note: This class does not currently inherit from the existing
56  // `Format` class.
57  public:
72  virtual ~MessageFormatter();
73 
88  UnicodeString formatToString(const MessageArguments& arguments, UErrorCode &status);
89 
105  FormattedMessage format(const MessageArguments& arguments, UErrorCode &status) const {
106  (void) arguments;
107  if (U_SUCCESS(status)) {
108  status = U_UNSUPPORTED_ERROR;
109  }
110  return FormattedMessage(status);
111  }
112 
121  const Locale& getLocale() const { return locale; }
122 
133 
143  const MFDataModel& getDataModel() const;
144 
159  U_MF_BEST_EFFORT = 0,
167  U_MF_STRICT
168  } UMFErrorHandlingBehavior;
169 
176  class U_I18N_API Builder : public UObject {
177  private:
178  friend class MessageFormatter;
179 
180  // The pattern to be parsed to generate the formatted message
181  UnicodeString pattern;
182  bool hasPattern = false;
183  bool hasDataModel = false;
184  // The data model to be used to generate the formatted message
185  // Initialized either by `setDataModel()`, or by the parser
186  // through a call to `setPattern()`
187  MFDataModel dataModel;
188  // Normalized representation of the pattern;
189  // ignored if `setPattern()` wasn't called
190  UnicodeString normalizedInput;
191  // Errors (internal representation of parse errors)
192  // Ignored if `setPattern()` wasn't called
193  StaticErrors* errors;
194  Locale locale;
195  // Not owned
196  const MFFunctionRegistry* customMFFunctionRegistry;
197  // Error behavior; see comment in `MessageFormatter` class
198  bool signalErrors = false;
199 
200  void clearState();
201  public:
211  Builder& setLocale(const Locale& locale);
227  Builder& setPattern(const UnicodeString& pattern, UParseError& parseError, UErrorCode& status);
241  Builder& setFunctionRegistry(const MFFunctionRegistry& functionRegistry);
308  Builder(UErrorCode& status);
315  virtual ~Builder();
316  }; // class MessageFormatter::Builder
317 
318  // TODO: Shouldn't be public; only used for testing
327  const UnicodeString& getNormalizedPattern() const { return normalizedInput; }
328 
329  private:
330  friend class Builder;
331  friend class Checker;
332  friend class MessageArguments;
333  friend class MessageContext;
334 
335  MessageFormatter(const MessageFormatter::Builder& builder, UErrorCode &status);
336 
337  MessageFormatter() = delete; // default constructor not implemented
338 
339  // Do not define default assignment operator
340  const MessageFormatter &operator=(const MessageFormatter &) = delete;
341 
342  // Selection methods
343 
344  // Takes a vector of FormattedPlaceholders
345  void resolveSelectors(MessageContext&, const Environment& env, UErrorCode&, UVector&) const;
346  // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (output)
347  void filterVariants(const UVector&, UVector&, UErrorCode&) const;
348  // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (input/output)
349  void sortVariants(const UVector&, UVector&, UErrorCode&) const;
350  // Takes a vector of strings (input) and a vector of strings (output)
351  void matchSelectorKeys(const UVector&, MessageContext&, InternalValue* rv, UVector&, UErrorCode&) const;
352  // Takes a vector of FormattedPlaceholders (input),
353  // and a vector of vectors of strings (output)
354  void resolvePreferences(MessageContext&, UVector&, UVector&, UErrorCode&) const;
355 
356  // Formatting methods
357 
358  // Used for normalizing variable names and keys for comparison
359  UnicodeString normalizeNFC(const UnicodeString&) const;
360  [[nodiscard]] FormattedPlaceholder formatLiteral(const data_model::Literal&) const;
361  void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const;
362  // Evaluates a function call
363  // Dispatches on argument type
364  [[nodiscard]] InternalValue* evalFunctionCall(FormattedPlaceholder&& argument,
365  MessageContext& context,
366  UErrorCode& status) const;
367  // Dispatches on function name
368  [[nodiscard]] InternalValue* evalFunctionCall(const FunctionName& functionName,
369  InternalValue* argument,
370  FunctionOptions&& options,
371  MessageContext& context,
372  UErrorCode& status) const;
373  // Formats an expression that appears in a pattern or as the definition of a local variable
374  [[nodiscard]] InternalValue* formatExpression(const Environment&,
375  const data_model::Expression&,
376  MessageContext&,
377  UErrorCode&) const;
378  [[nodiscard]] FunctionOptions resolveOptions(const Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const;
379  [[nodiscard]] InternalValue* formatOperand(const Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const;
380  [[nodiscard]] FormattedPlaceholder evalArgument(const data_model::VariableName&, MessageContext&, UErrorCode&) const;
381  void formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const;
382 
383  // Function registry methods
384  bool hasCustomMFFunctionRegistry() const {
385  return (customMFFunctionRegistry != nullptr);
386  }
387 
388  // Precondition: custom function registry exists
389  // Note: this is non-const because the values in the MFFunctionRegistry are mutable
390  // (a FormatterFactory can have mutable state)
391  const MFFunctionRegistry& getCustomMFFunctionRegistry() const;
392 
393  bool isCustomFormatter(const FunctionName&) const;
394  FormatterFactory* lookupFormatterFactory(const FunctionName&, UErrorCode& status) const;
395  bool isBuiltInSelector(const FunctionName&) const;
396  bool isBuiltInFormatter(const FunctionName&) const;
397  bool isCustomSelector(const FunctionName&) const;
398  const SelectorFactory* lookupSelectorFactory(MessageContext&, const FunctionName&, UErrorCode&) const;
399  bool isSelector(const FunctionName& fn) const { return isBuiltInSelector(fn) || isCustomSelector(fn); }
400  bool isFormatter(const FunctionName& fn) const { return isBuiltInFormatter(fn) || isCustomFormatter(fn); }
401  const Formatter* lookupFormatter(const FunctionName&, UErrorCode&) const;
402 
403  Selector* getSelector(MessageContext&, const FunctionName&, UErrorCode&) const;
404  Formatter* getFormatter(const FunctionName&, UErrorCode&) const;
405  bool getDefaultFormatterNameByType(const UnicodeString&, FunctionName&) const;
406 
407  // Checking for resolution errors
408  void checkDeclarations(MessageContext&, Environment*&, UErrorCode&) const;
409  void check(MessageContext&, const Environment&, const data_model::Expression&, UErrorCode&) const;
410  void check(MessageContext&, const Environment&, const data_model::Operand&, UErrorCode&) const;
411  void check(MessageContext&, const Environment&, const OptionMap&, UErrorCode&) const;
412 
413  void initErrors(UErrorCode&);
414  void clearErrors() const;
415  void cleanup() noexcept;
416 
417  // The locale this MessageFormatter was created with
418  /* const */ Locale locale;
419 
420  // Registry for built-in functions
421  MFFunctionRegistry standardMFFunctionRegistry;
422  // Registry for custom functions; may be null if no custom registry supplied
423  // Note: this is *not* owned by the MessageFormatter object
424  // The reason for this choice is to have a non-destructive MessageFormatter::Builder,
425  // while also not requiring the function registry to be deeply-copyable. Making the
426  // function registry copyable would impose a requirement on any implementations
427  // of the FormatterFactory and SelectorFactory interfaces to implement a custom
428  // clone() method, which is necessary to avoid sharing between copies of the
429  // function registry (and thus double-frees)
430  // Not deeply immutable (the values in the function registry are mutable,
431  // as a FormatterFactory can have mutable state
432  const MFFunctionRegistry* customMFFunctionRegistry;
433 
434  // Data model, representing the parsed message
435  MFDataModel dataModel;
436 
437  // Normalized version of the input string (optional whitespace removed)
438  UnicodeString normalizedInput;
439 
440  // Errors -- only used while parsing and checking for data model errors; then
441  // the MessageContext keeps track of errors
442  // Must be a raw pointer to avoid including the internal header file
443  // defining StaticErrors
444  // Owned by `this`
445  StaticErrors* errors = nullptr;
446 
447  // Error handling behavior.
448  // If true, then formatting methods set their UErrorCode arguments
449  // to signal MessageFormat errors, and no useful output is returned.
450  // If false, then MessageFormat errors are not signaled and the
451  // formatting methods return best-effort output.
452  // The default is false.
453  bool signalErrors = false;
454 
455  // Used for implementing normalizeNFC()
456  const Normalizer2* nfcNormalizer = nullptr;
457 
458  }; // class MessageFormatter
459 
460 } // namespace message2
461 
462 U_NAMESPACE_END
463 
464 #endif // U_HIDE_DEPRECATED_API
465 
466 #endif /* #if !UCONFIG_NO_MF2 */
467 
468 #endif /* #if !UCONFIG_NO_FORMATTING */
469 
470 #endif /* #if !UCONFIG_NO_NORMALIZATION */
471 
472 #endif /* U_SHOW_CPLUSPLUS_API */
473 
474 #endif // MESSAGEFORMAT2_H
475 
476 // eof
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:195
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:223
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:296
Not yet implemented: The result of a message formatting operation.
A FormattablePlaceholder encapsulates an input value (a message2::Formattable) together with an optio...
Structure encapsulating named options passed to a custom selector or formatter.
The MFDataModel class describes a parsed representation of the text of a message.
Defines mappings from names of formatters and selectors to functions implementing them.
The MessageArguments class represents the named arguments to a message.
The mutable Builder class allows each part of the MessageFormatter to be initialized separately; call...
Builder & setPattern(const UnicodeString &pattern, UParseError &parseError, UErrorCode &status)
Sets the pattern (contents of the message) and parses it into a data model.
MessageFormatter build(UErrorCode &status) const
Constructs a new immutable MessageFormatter using the pattern or data model that was previously set,...
Builder(UErrorCode &status)
Default constructor.
Builder & setDataModel(MFDataModel &&dataModel)
Sets a data model.
Builder & setFunctionRegistry(const MFFunctionRegistry &functionRegistry)
Sets a custom function registry.
Builder & setErrorHandlingBehavior(UMFErrorHandlingBehavior type)
Set the error handling behavior for this formatter.
Builder & setLocale(const Locale &locale)
Sets the locale to use for formatting.
const UnicodeString & getNormalizedPattern() const
Returns a string consisting of the input with optional spaces removed.
const Locale & getLocale() const
Accesses the locale that this MessageFormatter object was created with.
UnicodeString getPattern() const
Serializes the data model as a string in MessageFormat 2.0 syntax.
const MFDataModel & getDataModel() const
Accesses the data model referred to by this MessageFormatter object.
UMFErrorHandlingBehavior
Used in conjunction with the MessageFormatter::Builder::setErrorHandlingBehavior() method.
MessageFormatter & operator=(MessageFormatter &&) noexcept
Move assignment operator: The source MessageFormatter will be left in a valid but undefined state.
The Expression class corresponds to the expression nonterminal in the MessageFormat 2 grammar and the...
The Literal class corresponds to the literal nonterminal in the MessageFormat 2 grammar,...
The Operand class corresponds to the operand nonterminal in the MessageFormat 2 grammar,...
A Pattern is a sequence of formattable parts.
C++ API: Formats messages using the draft MessageFormat 2.0.
C++ API: New API for Unicode Normalization.
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
C++ API: Unicode String.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:430
@ U_UNSUPPORTED_ERROR
Requested operation not supported in current context.
Definition: utypes.h:482
#define U_SUCCESS(x)
Does the error code indicate success?
Definition: utypes.h:743
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside.
Definition: utypes.h:316