ICU 78.1  78.1
messageformat2.h
Go to the documentation of this file.
1 // © 2024 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #ifndef MESSAGEFORMAT2_H
7 #define MESSAGEFORMAT2_H
8 
9 #if U_SHOW_CPLUSPLUS_API
10 
11 #if !UCONFIG_NO_NORMALIZATION
12 
13 #if !UCONFIG_NO_FORMATTING
14 
15 #if !UCONFIG_NO_MF2
16 
23 #include "unicode/messageformat2_data_model.h"
24 #include "unicode/messageformat2_function_registry.h"
25 #include "unicode/normalizer2.h"
26 #include "unicode/unistr.h"
27 
28 #ifndef U_HIDE_DEPRECATED_API
29 
30 U_NAMESPACE_BEGIN
31 
32 namespace message2 {
33 
34  class Environment;
35  class MessageContext;
36  class StaticErrors;
37  class InternalValue;
38 
55  // Note: This class does not currently inherit from the existing
56  // `Format` class.
57  public:
73 
88  U_I18N_API UnicodeString formatToString(const MessageArguments& arguments, UErrorCode& status);
89 
105  U_I18N_API FormattedMessage format(const MessageArguments& arguments, UErrorCode& status) const {
106  (void) arguments;
107  if (U_SUCCESS(status)) {
108  status = U_UNSUPPORTED_ERROR;
109  }
110  return FormattedMessage(status);
111  }
112 
121  U_I18N_API const Locale& getLocale() const { return locale; }
122 
133 
144 
159  U_MF_BEST_EFFORT = 0,
167  U_MF_STRICT
168  } UMFErrorHandlingBehavior;
169 
177  private:
178  friend class MessageFormatter;
179 
180  // The pattern to be parsed to generate the formatted message
181  UnicodeString pattern;
182  bool hasPattern = false;
183  bool hasDataModel = false;
184  // The data model to be used to generate the formatted message
185  // Initialized either by `setDataModel()`, or by the parser
186  // through a call to `setPattern()`
187  MFDataModel dataModel;
188  // Normalized representation of the pattern;
189  // ignored if `setPattern()` wasn't called
190  UnicodeString normalizedInput;
191  // Errors (internal representation of parse errors)
192  // Ignored if `setPattern()` wasn't called
193  StaticErrors* errors;
194  Locale locale;
195  // Not owned
196  const MFFunctionRegistry* customMFFunctionRegistry;
197  // Error behavior; see comment in `MessageFormatter` class
198  bool signalErrors = false;
199 
200  void clearState();
201  public:
228  UParseError& parseError,
229  UErrorCode& status);
317  U_I18N_API virtual ~Builder();
318  }; // class MessageFormatter::Builder
319 
320  // TODO: Shouldn't be public; only used for testing
329  U_I18N_API const UnicodeString& getNormalizedPattern() const { return normalizedInput; }
330 
331  private:
332  friend class Builder;
333  friend class Checker;
334  friend class MessageArguments;
335  friend class MessageContext;
336 
337  MessageFormatter(const MessageFormatter::Builder& builder, UErrorCode &status);
338 
339  MessageFormatter() = delete; // default constructor not implemented
340 
341  // Do not define default assignment operator
342  const MessageFormatter &operator=(const MessageFormatter &) = delete;
343 
344  // Selection methods
345 
346  // Takes a vector of FormattedPlaceholders
347  void resolveSelectors(MessageContext&, const Environment& env, UErrorCode&, UVector&) const;
348  // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (output)
349  void filterVariants(const UVector&, UVector&, UErrorCode&) const;
350  // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (input/output)
351  void sortVariants(const UVector&, UVector&, UErrorCode&) const;
352  // Takes a vector of strings (input) and a vector of strings (output)
353  void matchSelectorKeys(const UVector&, MessageContext&, InternalValue* rv, UVector&, UErrorCode&) const;
354  // Takes a vector of FormattedPlaceholders (input),
355  // and a vector of vectors of strings (output)
356  void resolvePreferences(MessageContext&, UVector&, UVector&, UErrorCode&) const;
357 
358  // Formatting methods
359 
360  [[nodiscard]] FormattedPlaceholder formatLiteral(const UnicodeString&, const data_model::Literal&) const;
361  void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const;
362  // Evaluates a function call
363  // Dispatches on argument type
364  [[nodiscard]] InternalValue* evalFunctionCall(FormattedPlaceholder&& argument,
365  MessageContext& context,
366  UErrorCode& status) const;
367  // Dispatches on function name
368  [[nodiscard]] InternalValue* evalFunctionCall(const FunctionName& functionName,
369  InternalValue* argument,
370  FunctionOptions&& options,
371  MessageContext& context,
372  UErrorCode& status) const;
373  // Formats an expression that appears in a pattern or as the definition of a local variable
374  [[nodiscard]] InternalValue* formatExpression(const UnicodeString&,
375  const Environment&,
376  const data_model::Expression&,
377  MessageContext&,
378  UErrorCode&) const;
379  [[nodiscard]] FunctionOptions resolveOptions(const Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const;
380  [[nodiscard]] InternalValue* formatOperand(const UnicodeString&,
381  const Environment&,
382  const data_model::Operand&,
383  MessageContext&,
384  UErrorCode&) const;
385  [[nodiscard]] FormattedPlaceholder evalArgument(const UnicodeString&,
387  MessageContext&,
388  UErrorCode&) const;
389  void formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const;
390 
391  // Function registry methods
392  bool hasCustomMFFunctionRegistry() const {
393  return (customMFFunctionRegistry != nullptr);
394  }
395 
396  // Precondition: custom function registry exists
397  // Note: this is non-const because the values in the MFFunctionRegistry are mutable
398  // (a FormatterFactory can have mutable state)
399  const MFFunctionRegistry& getCustomMFFunctionRegistry() const;
400 
401  bool isCustomFormatter(const FunctionName&) const;
402  FormatterFactory* lookupFormatterFactory(const FunctionName&, UErrorCode& status) const;
403  bool isBuiltInSelector(const FunctionName&) const;
404  bool isBuiltInFormatter(const FunctionName&) const;
405  bool isCustomSelector(const FunctionName&) const;
406  const SelectorFactory* lookupSelectorFactory(MessageContext&, const FunctionName&, UErrorCode&) const;
407  bool isSelector(const FunctionName& fn) const { return isBuiltInSelector(fn) || isCustomSelector(fn); }
408  bool isFormatter(const FunctionName& fn) const { return isBuiltInFormatter(fn) || isCustomFormatter(fn); }
409  const Formatter* lookupFormatter(const FunctionName&, UErrorCode&) const;
410 
411  Selector* getSelector(MessageContext&, const FunctionName&, UErrorCode&) const;
412  Formatter* getFormatter(const FunctionName&, UErrorCode&) const;
413  bool getDefaultFormatterNameByType(const UnicodeString&, FunctionName&) const;
414 
415  // Checking for resolution errors
416  void checkDeclarations(MessageContext&, Environment*&, UErrorCode&) const;
417  void check(MessageContext&, const Environment&, const data_model::Expression&, UErrorCode&) const;
418  void check(MessageContext&, const Environment&, const data_model::Operand&, UErrorCode&) const;
419  void check(MessageContext&, const Environment&, const OptionMap&, UErrorCode&) const;
420 
421  void initErrors(UErrorCode&);
422  void clearErrors() const;
423  void cleanup() noexcept;
424 
425  // The locale this MessageFormatter was created with
426  /* const */ Locale locale;
427 
428  // Registry for built-in functions
429  MFFunctionRegistry standardMFFunctionRegistry;
430  // Registry for custom functions; may be null if no custom registry supplied
431  // Note: this is *not* owned by the MessageFormatter object
432  // The reason for this choice is to have a non-destructive MessageFormatter::Builder,
433  // while also not requiring the function registry to be deeply-copyable. Making the
434  // function registry copyable would impose a requirement on any implementations
435  // of the FormatterFactory and SelectorFactory interfaces to implement a custom
436  // clone() method, which is necessary to avoid sharing between copies of the
437  // function registry (and thus double-frees)
438  // Not deeply immutable (the values in the function registry are mutable,
439  // as a FormatterFactory can have mutable state
440  const MFFunctionRegistry* customMFFunctionRegistry;
441 
442  // Data model, representing the parsed message
443  MFDataModel dataModel;
444 
445  // Normalized version of the input string (optional whitespace removed)
446  UnicodeString normalizedInput;
447 
448  // Errors -- only used while parsing and checking for data model errors; then
449  // the MessageContext keeps track of errors
450  // Must be a raw pointer to avoid including the internal header file
451  // defining StaticErrors
452  // Owned by `this`
453  StaticErrors* errors = nullptr;
454 
455  // Error handling behavior.
456  // If true, then formatting methods set their UErrorCode arguments
457  // to signal MessageFormat errors, and no useful output is returned.
458  // If false, then MessageFormat errors are not signaled and the
459  // formatting methods return best-effort output.
460  // The default is false.
461  bool signalErrors = false;
462 
463  }; // class MessageFormatter
464 
465 } // namespace message2
466 
467 U_NAMESPACE_END
468 
469 #endif // U_HIDE_DEPRECATED_API
470 
471 #endif /* #if !UCONFIG_NO_MF2 */
472 
473 #endif /* #if !UCONFIG_NO_FORMATTING */
474 
475 #endif /* #if !UCONFIG_NO_NORMALIZATION */
476 
477 #endif /* U_SHOW_CPLUSPLUS_API */
478 
479 #endif // MESSAGEFORMAT2_H
480 
481 // eof
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:198
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:222
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:303
Not yet implemented: The result of a message formatting operation.
A FormattablePlaceholder encapsulates an input value (a message2::Formattable) together with an optio...
Structure encapsulating named options passed to a custom selector or formatter.
Defines mappings from names of formatters and selectors to functions implementing them.
The MessageArguments class represents the named arguments to a message.
The mutable Builder class allows each part of the MessageFormatter to be initialized separately; call...
U_I18N_API Builder & setPattern(const UnicodeString &pattern, UParseError &parseError, UErrorCode &status)
Sets the pattern (contents of the message) and parses it into a data model.
U_I18N_API Builder & setErrorHandlingBehavior(UMFErrorHandlingBehavior type)
Set the error handling behavior for this formatter.
U_I18N_API Builder & setFunctionRegistry(const MFFunctionRegistry &functionRegistry)
Sets a custom function registry.
U_I18N_API Builder & setDataModel(MFDataModel &&dataModel)
Sets a data model.
U_I18N_API Builder & setLocale(const Locale &locale)
Sets the locale to use for formatting.
virtual U_I18N_API ~Builder()
Destructor.
U_I18N_API Builder(UErrorCode &status)
Default constructor.
U_I18N_API MessageFormatter build(UErrorCode &status) const
Constructs a new immutable MessageFormatter using the pattern or data model that was previously set,...
U_I18N_API const UnicodeString & getNormalizedPattern() const
Returns a string consisting of the input with optional spaces removed.
U_I18N_API const MFDataModel & getDataModel() const
Accesses the data model referred to by this MessageFormatter object.
U_I18N_API MessageFormatter & operator=(MessageFormatter &&) noexcept
Move assignment operator: The source MessageFormatter will be left in a valid but undefined state.
U_I18N_API const Locale & getLocale() const
Accesses the locale that this MessageFormatter object was created with.
U_I18N_API UnicodeString getPattern() const
Serializes the data model as a string in MessageFormat 2.0 syntax.
UMFErrorHandlingBehavior
Used in conjunction with the MessageFormatter::Builder::setErrorHandlingBehavior() method.
The Expression class corresponds to the expression nonterminal in the MessageFormat 2 grammar and the...
The Literal class corresponds to the literal nonterminal in the MessageFormat 2 grammar,...
The MFDataModel class describes a parsed representation of the text of a message.
The Operand class corresponds to the operand nonterminal in the MessageFormat 2 grammar,...
A Pattern is a sequence of formattable parts.
C++ API: Formats messages using the draft MessageFormat 2.0.
C++ API: New API for Unicode Normalization.
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
C++ API: Unicode String.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:509
@ U_UNSUPPORTED_ERROR
Requested operation not supported in current context.
Definition: utypes.h:561
#define U_SUCCESS(x)
Does the error code indicate success?
Definition: utypes.h:822
#define U_I18N_API_CLASS
Set to export library symbols from inside the i18n library, and to import them from outside,...
Definition: utypes.h:457
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside.
Definition: utypes.h:316