Spaces:
Runtime error
Runtime error
| # -*- coding: utf-8 -*- | |
| """ | |
| babel.numbers | |
| ~~~~~~~~~~~~~ | |
| CLDR Plural support. See UTS #35. | |
| :copyright: (c) 2013-2021 by the Babel Team. | |
| :license: BSD, see LICENSE for more details. | |
| """ | |
| import re | |
| from babel._compat import decimal | |
| _plural_tags = ('zero', 'one', 'two', 'few', 'many', 'other') | |
| _fallback_tag = 'other' | |
| def extract_operands(source): | |
| """Extract operands from a decimal, a float or an int, according to `CLDR rules`_. | |
| The result is a 6-tuple (n, i, v, w, f, t), where those symbols are as follows: | |
| ====== =============================================================== | |
| Symbol Value | |
| ------ --------------------------------------------------------------- | |
| n absolute value of the source number (integer and decimals). | |
| i integer digits of n. | |
| v number of visible fraction digits in n, with trailing zeros. | |
| w number of visible fraction digits in n, without trailing zeros. | |
| f visible fractional digits in n, with trailing zeros. | |
| t visible fractional digits in n, without trailing zeros. | |
| ====== =============================================================== | |
| .. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Operands | |
| :param source: A real number | |
| :type source: int|float|decimal.Decimal | |
| :return: A n-i-v-w-f-t tuple | |
| :rtype: tuple[decimal.Decimal, int, int, int, int, int] | |
| """ | |
| n = abs(source) | |
| i = int(n) | |
| if isinstance(n, float): | |
| if i == n: | |
| n = i | |
| else: | |
| # Cast the `float` to a number via the string representation. | |
| # This is required for Python 2.6 anyway (it will straight out fail to | |
| # do the conversion otherwise), and it's highly unlikely that the user | |
| # actually wants the lossless conversion behavior (quoting the Python | |
| # documentation): | |
| # > If value is a float, the binary floating point value is losslessly | |
| # > converted to its exact decimal equivalent. | |
| # > This conversion can often require 53 or more digits of precision. | |
| # Should the user want that behavior, they can simply pass in a pre- | |
| # converted `Decimal` instance of desired accuracy. | |
| n = decimal.Decimal(str(n)) | |
| if isinstance(n, decimal.Decimal): | |
| dec_tuple = n.as_tuple() | |
| exp = dec_tuple.exponent | |
| fraction_digits = dec_tuple.digits[exp:] if exp < 0 else () | |
| trailing = ''.join(str(d) for d in fraction_digits) | |
| no_trailing = trailing.rstrip('0') | |
| v = len(trailing) | |
| w = len(no_trailing) | |
| f = int(trailing or 0) | |
| t = int(no_trailing or 0) | |
| else: | |
| v = w = f = t = 0 | |
| return n, i, v, w, f, t | |
| class PluralRule(object): | |
| """Represents a set of language pluralization rules. The constructor | |
| accepts a list of (tag, expr) tuples or a dict of `CLDR rules`_. The | |
| resulting object is callable and accepts one parameter with a positive or | |
| negative number (both integer and float) for the number that indicates the | |
| plural form for a string and returns the tag for the format: | |
| >>> rule = PluralRule({'one': 'n is 1'}) | |
| >>> rule(1) | |
| 'one' | |
| >>> rule(2) | |
| 'other' | |
| Currently the CLDR defines these tags: zero, one, two, few, many and | |
| other where other is an implicit default. Rules should be mutually | |
| exclusive; for a given numeric value, only one rule should apply (i.e. | |
| the condition should only be true for one of the plural rule elements. | |
| .. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Language_Plural_Rules | |
| """ | |
| __slots__ = ('abstract', '_func') | |
| def __init__(self, rules): | |
| """Initialize the rule instance. | |
| :param rules: a list of ``(tag, expr)``) tuples with the rules | |
| conforming to UTS #35 or a dict with the tags as keys | |
| and expressions as values. | |
| :raise RuleError: if the expression is malformed | |
| """ | |
| if isinstance(rules, dict): | |
| rules = rules.items() | |
| found = set() | |
| self.abstract = [] | |
| for key, expr in sorted(list(rules)): | |
| if key not in _plural_tags: | |
| raise ValueError('unknown tag %r' % key) | |
| elif key in found: | |
| raise ValueError('tag %r defined twice' % key) | |
| found.add(key) | |
| ast = _Parser(expr).ast | |
| if ast: | |
| self.abstract.append((key, ast)) | |
| def __repr__(self): | |
| rules = self.rules | |
| return '<%s %r>' % ( | |
| type(self).__name__, | |
| ', '.join(['%s: %s' % (tag, rules[tag]) for tag in _plural_tags | |
| if tag in rules]) | |
| ) | |
| def parse(cls, rules): | |
| """Create a `PluralRule` instance for the given rules. If the rules | |
| are a `PluralRule` object, that object is returned. | |
| :param rules: the rules as list or dict, or a `PluralRule` object | |
| :raise RuleError: if the expression is malformed | |
| """ | |
| if isinstance(rules, cls): | |
| return rules | |
| return cls(rules) | |
| def rules(self): | |
| """The `PluralRule` as a dict of unicode plural rules. | |
| >>> rule = PluralRule({'one': 'n is 1'}) | |
| >>> rule.rules | |
| {'one': 'n is 1'} | |
| """ | |
| _compile = _UnicodeCompiler().compile | |
| return dict([(tag, _compile(ast)) for tag, ast in self.abstract]) | |
| tags = property(lambda x: frozenset([i[0] for i in x.abstract]), doc=""" | |
| A set of explicitly defined tags in this rule. The implicit default | |
| ``'other'`` rules is not part of this set unless there is an explicit | |
| rule for it.""") | |
| def __getstate__(self): | |
| return self.abstract | |
| def __setstate__(self, abstract): | |
| self.abstract = abstract | |
| def __call__(self, n): | |
| if not hasattr(self, '_func'): | |
| self._func = to_python(self) | |
| return self._func(n) | |
| def to_javascript(rule): | |
| """Convert a list/dict of rules or a `PluralRule` object into a JavaScript | |
| function. This function depends on no external library: | |
| >>> to_javascript({'one': 'n is 1'}) | |
| "(function(n) { return (n == 1) ? 'one' : 'other'; })" | |
| Implementation detail: The function generated will probably evaluate | |
| expressions involved into range operations multiple times. This has the | |
| advantage that external helper functions are not required and is not a | |
| big performance hit for these simple calculations. | |
| :param rule: the rules as list or dict, or a `PluralRule` object | |
| :raise RuleError: if the expression is malformed | |
| """ | |
| to_js = _JavaScriptCompiler().compile | |
| result = ['(function(n) { return '] | |
| for tag, ast in PluralRule.parse(rule).abstract: | |
| result.append('%s ? %r : ' % (to_js(ast), tag)) | |
| result.append('%r; })' % _fallback_tag) | |
| return ''.join(result) | |
| def to_python(rule): | |
| """Convert a list/dict of rules or a `PluralRule` object into a regular | |
| Python function. This is useful in situations where you need a real | |
| function and don't are about the actual rule object: | |
| >>> func = to_python({'one': 'n is 1', 'few': 'n in 2..4'}) | |
| >>> func(1) | |
| 'one' | |
| >>> func(3) | |
| 'few' | |
| >>> func = to_python({'one': 'n in 1,11', 'few': 'n in 3..10,13..19'}) | |
| >>> func(11) | |
| 'one' | |
| >>> func(15) | |
| 'few' | |
| :param rule: the rules as list or dict, or a `PluralRule` object | |
| :raise RuleError: if the expression is malformed | |
| """ | |
| namespace = { | |
| 'IN': in_range_list, | |
| 'WITHIN': within_range_list, | |
| 'MOD': cldr_modulo, | |
| 'extract_operands': extract_operands, | |
| } | |
| to_python_func = _PythonCompiler().compile | |
| result = [ | |
| 'def evaluate(n):', | |
| ' n, i, v, w, f, t = extract_operands(n)', | |
| ] | |
| for tag, ast in PluralRule.parse(rule).abstract: | |
| # the str() call is to coerce the tag to the native string. It's | |
| # a limited ascii restricted set of tags anyways so that is fine. | |
| result.append(' if (%s): return %r' % (to_python_func(ast), str(tag))) | |
| result.append(' return %r' % _fallback_tag) | |
| code = compile('\n'.join(result), '<rule>', 'exec') | |
| eval(code, namespace) | |
| return namespace['evaluate'] | |
| def to_gettext(rule): | |
| """The plural rule as gettext expression. The gettext expression is | |
| technically limited to integers and returns indices rather than tags. | |
| >>> to_gettext({'one': 'n is 1', 'two': 'n is 2'}) | |
| 'nplurals=3; plural=((n == 1) ? 0 : (n == 2) ? 1 : 2)' | |
| :param rule: the rules as list or dict, or a `PluralRule` object | |
| :raise RuleError: if the expression is malformed | |
| """ | |
| rule = PluralRule.parse(rule) | |
| used_tags = rule.tags | {_fallback_tag} | |
| _compile = _GettextCompiler().compile | |
| _get_index = [tag for tag in _plural_tags if tag in used_tags].index | |
| result = ['nplurals=%d; plural=(' % len(used_tags)] | |
| for tag, ast in rule.abstract: | |
| result.append('%s ? %d : ' % (_compile(ast), _get_index(tag))) | |
| result.append('%d)' % _get_index(_fallback_tag)) | |
| return ''.join(result) | |
| def in_range_list(num, range_list): | |
| """Integer range list test. This is the callback for the "in" operator | |
| of the UTS #35 pluralization rule language: | |
| >>> in_range_list(1, [(1, 3)]) | |
| True | |
| >>> in_range_list(3, [(1, 3)]) | |
| True | |
| >>> in_range_list(3, [(1, 3), (5, 8)]) | |
| True | |
| >>> in_range_list(1.2, [(1, 4)]) | |
| False | |
| >>> in_range_list(10, [(1, 4)]) | |
| False | |
| >>> in_range_list(10, [(1, 4), (6, 8)]) | |
| False | |
| """ | |
| return num == int(num) and within_range_list(num, range_list) | |
| def within_range_list(num, range_list): | |
| """Float range test. This is the callback for the "within" operator | |
| of the UTS #35 pluralization rule language: | |
| >>> within_range_list(1, [(1, 3)]) | |
| True | |
| >>> within_range_list(1.0, [(1, 3)]) | |
| True | |
| >>> within_range_list(1.2, [(1, 4)]) | |
| True | |
| >>> within_range_list(8.8, [(1, 4), (7, 15)]) | |
| True | |
| >>> within_range_list(10, [(1, 4)]) | |
| False | |
| >>> within_range_list(10.5, [(1, 4), (20, 30)]) | |
| False | |
| """ | |
| return any(num >= min_ and num <= max_ for min_, max_ in range_list) | |
| def cldr_modulo(a, b): | |
| """Javaish modulo. This modulo operator returns the value with the sign | |
| of the dividend rather than the divisor like Python does: | |
| >>> cldr_modulo(-3, 5) | |
| -3 | |
| >>> cldr_modulo(-3, -5) | |
| -3 | |
| >>> cldr_modulo(3, 5) | |
| 3 | |
| """ | |
| reverse = 0 | |
| if a < 0: | |
| a *= -1 | |
| reverse = 1 | |
| if b < 0: | |
| b *= -1 | |
| rv = a % b | |
| if reverse: | |
| rv *= -1 | |
| return rv | |
| class RuleError(Exception): | |
| """Raised if a rule is malformed.""" | |
| _VARS = 'nivwft' | |
| _RULES = [ | |
| (None, re.compile(r'\s+', re.UNICODE)), | |
| ('word', re.compile(r'\b(and|or|is|(?:with)?in|not|mod|[{0}])\b' | |
| .format(_VARS))), | |
| ('value', re.compile(r'\d+')), | |
| ('symbol', re.compile(r'%|,|!=|=')), | |
| ('ellipsis', re.compile(r'\.{2,3}|\u2026', re.UNICODE)) # U+2026: ELLIPSIS | |
| ] | |
| def tokenize_rule(s): | |
| s = s.split('@')[0] | |
| result = [] | |
| pos = 0 | |
| end = len(s) | |
| while pos < end: | |
| for tok, rule in _RULES: | |
| match = rule.match(s, pos) | |
| if match is not None: | |
| pos = match.end() | |
| if tok: | |
| result.append((tok, match.group())) | |
| break | |
| else: | |
| raise RuleError('malformed CLDR pluralization rule. ' | |
| 'Got unexpected %r' % s[pos]) | |
| return result[::-1] | |
| def test_next_token(tokens, type_, value=None): | |
| return tokens and tokens[-1][0] == type_ and \ | |
| (value is None or tokens[-1][1] == value) | |
| def skip_token(tokens, type_, value=None): | |
| if test_next_token(tokens, type_, value): | |
| return tokens.pop() | |
| def value_node(value): | |
| return 'value', (value, ) | |
| def ident_node(name): | |
| return name, () | |
| def range_list_node(range_list): | |
| return 'range_list', range_list | |
| def negate(rv): | |
| return 'not', (rv,) | |
| class _Parser(object): | |
| """Internal parser. This class can translate a single rule into an abstract | |
| tree of tuples. It implements the following grammar:: | |
| condition = and_condition ('or' and_condition)* | |
| ('@integer' samples)? | |
| ('@decimal' samples)? | |
| and_condition = relation ('and' relation)* | |
| relation = is_relation | in_relation | within_relation | |
| is_relation = expr 'is' ('not')? value | |
| in_relation = expr (('not')? 'in' | '=' | '!=') range_list | |
| within_relation = expr ('not')? 'within' range_list | |
| expr = operand (('mod' | '%') value)? | |
| operand = 'n' | 'i' | 'f' | 't' | 'v' | 'w' | |
| range_list = (range | value) (',' range_list)* | |
| value = digit+ | |
| digit = 0|1|2|3|4|5|6|7|8|9 | |
| range = value'..'value | |
| samples = sampleRange (',' sampleRange)* (',' ('…'|'...'))? | |
| sampleRange = decimalValue '~' decimalValue | |
| decimalValue = value ('.' value)? | |
| - Whitespace can occur between or around any of the above tokens. | |
| - Rules should be mutually exclusive; for a given numeric value, only one | |
| rule should apply (i.e. the condition should only be true for one of | |
| the plural rule elements). | |
| - The in and within relations can take comma-separated lists, such as: | |
| 'n in 3,5,7..15'. | |
| - Samples are ignored. | |
| The translator parses the expression on instanciation into an attribute | |
| called `ast`. | |
| """ | |
| def __init__(self, string): | |
| self.tokens = tokenize_rule(string) | |
| if not self.tokens: | |
| # If the pattern is only samples, it's entirely possible | |
| # no stream of tokens whatsoever is generated. | |
| self.ast = None | |
| return | |
| self.ast = self.condition() | |
| if self.tokens: | |
| raise RuleError('Expected end of rule, got %r' % | |
| self.tokens[-1][1]) | |
| def expect(self, type_, value=None, term=None): | |
| token = skip_token(self.tokens, type_, value) | |
| if token is not None: | |
| return token | |
| if term is None: | |
| term = repr(value is None and type_ or value) | |
| if not self.tokens: | |
| raise RuleError('expected %s but end of rule reached' % term) | |
| raise RuleError('expected %s but got %r' % (term, self.tokens[-1][1])) | |
| def condition(self): | |
| op = self.and_condition() | |
| while skip_token(self.tokens, 'word', 'or'): | |
| op = 'or', (op, self.and_condition()) | |
| return op | |
| def and_condition(self): | |
| op = self.relation() | |
| while skip_token(self.tokens, 'word', 'and'): | |
| op = 'and', (op, self.relation()) | |
| return op | |
| def relation(self): | |
| left = self.expr() | |
| if skip_token(self.tokens, 'word', 'is'): | |
| return skip_token(self.tokens, 'word', 'not') and 'isnot' or 'is', \ | |
| (left, self.value()) | |
| negated = skip_token(self.tokens, 'word', 'not') | |
| method = 'in' | |
| if skip_token(self.tokens, 'word', 'within'): | |
| method = 'within' | |
| else: | |
| if not skip_token(self.tokens, 'word', 'in'): | |
| if negated: | |
| raise RuleError('Cannot negate operator based rules.') | |
| return self.newfangled_relation(left) | |
| rv = 'relation', (method, left, self.range_list()) | |
| return negate(rv) if negated else rv | |
| def newfangled_relation(self, left): | |
| if skip_token(self.tokens, 'symbol', '='): | |
| negated = False | |
| elif skip_token(self.tokens, 'symbol', '!='): | |
| negated = True | |
| else: | |
| raise RuleError('Expected "=" or "!=" or legacy relation') | |
| rv = 'relation', ('in', left, self.range_list()) | |
| return negate(rv) if negated else rv | |
| def range_or_value(self): | |
| left = self.value() | |
| if skip_token(self.tokens, 'ellipsis'): | |
| return left, self.value() | |
| else: | |
| return left, left | |
| def range_list(self): | |
| range_list = [self.range_or_value()] | |
| while skip_token(self.tokens, 'symbol', ','): | |
| range_list.append(self.range_or_value()) | |
| return range_list_node(range_list) | |
| def expr(self): | |
| word = skip_token(self.tokens, 'word') | |
| if word is None or word[1] not in _VARS: | |
| raise RuleError('Expected identifier variable') | |
| name = word[1] | |
| if skip_token(self.tokens, 'word', 'mod'): | |
| return 'mod', ((name, ()), self.value()) | |
| elif skip_token(self.tokens, 'symbol', '%'): | |
| return 'mod', ((name, ()), self.value()) | |
| return ident_node(name) | |
| def value(self): | |
| return value_node(int(self.expect('value')[1])) | |
| def _binary_compiler(tmpl): | |
| """Compiler factory for the `_Compiler`.""" | |
| return lambda self, l, r: tmpl % (self.compile(l), self.compile(r)) | |
| def _unary_compiler(tmpl): | |
| """Compiler factory for the `_Compiler`.""" | |
| return lambda self, x: tmpl % self.compile(x) | |
| compile_zero = lambda x: '0' | |
| class _Compiler(object): | |
| """The compilers are able to transform the expressions into multiple | |
| output formats. | |
| """ | |
| def compile(self, arg): | |
| op, args = arg | |
| return getattr(self, 'compile_' + op)(*args) | |
| compile_n = lambda x: 'n' | |
| compile_i = lambda x: 'i' | |
| compile_v = lambda x: 'v' | |
| compile_w = lambda x: 'w' | |
| compile_f = lambda x: 'f' | |
| compile_t = lambda x: 't' | |
| compile_value = lambda x, v: str(v) | |
| compile_and = _binary_compiler('(%s && %s)') | |
| compile_or = _binary_compiler('(%s || %s)') | |
| compile_not = _unary_compiler('(!%s)') | |
| compile_mod = _binary_compiler('(%s %% %s)') | |
| compile_is = _binary_compiler('(%s == %s)') | |
| compile_isnot = _binary_compiler('(%s != %s)') | |
| def compile_relation(self, method, expr, range_list): | |
| raise NotImplementedError() | |
| class _PythonCompiler(_Compiler): | |
| """Compiles an expression to Python.""" | |
| compile_and = _binary_compiler('(%s and %s)') | |
| compile_or = _binary_compiler('(%s or %s)') | |
| compile_not = _unary_compiler('(not %s)') | |
| compile_mod = _binary_compiler('MOD(%s, %s)') | |
| def compile_relation(self, method, expr, range_list): | |
| compile_range_list = '[%s]' % ','.join( | |
| ['(%s, %s)' % tuple(map(self.compile, range_)) | |
| for range_ in range_list[1]]) | |
| return '%s(%s, %s)' % (method.upper(), self.compile(expr), | |
| compile_range_list) | |
| class _GettextCompiler(_Compiler): | |
| """Compile into a gettext plural expression.""" | |
| compile_i = _Compiler.compile_n | |
| compile_v = compile_zero | |
| compile_w = compile_zero | |
| compile_f = compile_zero | |
| compile_t = compile_zero | |
| def compile_relation(self, method, expr, range_list): | |
| rv = [] | |
| expr = self.compile(expr) | |
| for item in range_list[1]: | |
| if item[0] == item[1]: | |
| rv.append('(%s == %s)' % ( | |
| expr, | |
| self.compile(item[0]) | |
| )) | |
| else: | |
| min, max = map(self.compile, item) | |
| rv.append('(%s >= %s && %s <= %s)' % ( | |
| expr, | |
| min, | |
| expr, | |
| max | |
| )) | |
| return '(%s)' % ' || '.join(rv) | |
| class _JavaScriptCompiler(_GettextCompiler): | |
| """Compiles the expression to plain of JavaScript.""" | |
| # XXX: presently javascript does not support any of the | |
| # fraction support and basically only deals with integers. | |
| compile_i = lambda x: 'parseInt(n, 10)' | |
| compile_v = compile_zero | |
| compile_w = compile_zero | |
| compile_f = compile_zero | |
| compile_t = compile_zero | |
| def compile_relation(self, method, expr, range_list): | |
| code = _GettextCompiler.compile_relation( | |
| self, method, expr, range_list) | |
| if method == 'in': | |
| expr = self.compile(expr) | |
| code = '(parseInt(%s, 10) == %s && %s)' % (expr, expr, code) | |
| return code | |
| class _UnicodeCompiler(_Compiler): | |
| """Returns a unicode pluralization rule again.""" | |
| # XXX: this currently spits out the old syntax instead of the new | |
| # one. We can change that, but it will break a whole bunch of stuff | |
| # for users I suppose. | |
| compile_is = _binary_compiler('%s is %s') | |
| compile_isnot = _binary_compiler('%s is not %s') | |
| compile_and = _binary_compiler('%s and %s') | |
| compile_or = _binary_compiler('%s or %s') | |
| compile_mod = _binary_compiler('%s mod %s') | |
| def compile_not(self, relation): | |
| return self.compile_relation(negated=True, *relation[1]) | |
| def compile_relation(self, method, expr, range_list, negated=False): | |
| ranges = [] | |
| for item in range_list[1]: | |
| if item[0] == item[1]: | |
| ranges.append(self.compile(item[0])) | |
| else: | |
| ranges.append('%s..%s' % tuple(map(self.compile, item))) | |
| return '%s%s %s %s' % ( | |
| self.compile(expr), negated and ' not' or '', | |
| method, ','.join(ranges) | |
| ) | |