| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | try: |
| | import fintl |
| | _ = fintl.gettext |
| | except ImportError: |
| | _ = lambda s: s |
| |
|
| | __doc__ = _("""pygettext -- Python equivalent of xgettext(1) |
| | |
| | Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the |
| | internationalization of C programs. Most of these tools are independent of |
| | the programming language and can be used from within Python programs. |
| | Martin von Loewis' work[1] helps considerably in this regard. |
| | |
| | There's one problem though; xgettext is the program that scans source code |
| | looking for message strings, but it groks only C (or C++). Python |
| | introduces a few wrinkles, such as dual quoting characters, triple quoted |
| | strings, and raw strings. xgettext understands none of this. |
| | |
| | Enter pygettext, which uses Python's standard tokenize module to scan |
| | Python source code, generating .pot files identical to what GNU xgettext[2] |
| | generates for C and C++ code. From there, the standard GNU tools can be |
| | used. |
| | |
| | A word about marking Python strings as candidates for translation. GNU |
| | xgettext recognizes the following keywords: gettext, dgettext, dcgettext, |
| | and gettext_noop. But those can be a lot of text to include all over your |
| | code. C and C++ have a trick: they use the C preprocessor. Most |
| | internationalized C source includes a #define for gettext() to _() so that |
| | what has to be written in the source is much less. Thus these are both |
| | translatable strings: |
| | |
| | gettext("Translatable String") |
| | _("Translatable String") |
| | |
| | Python of course has no preprocessor so this doesn't work so well. Thus, |
| | pygettext searches only for _() by default, but see the -k/--keyword flag |
| | below for how to augment this. |
| | |
| | [1] https://www.python.org/workshops/1997-10/proceedings/loewis.html |
| | [2] https://www.gnu.org/software/gettext/gettext.html |
| | |
| | NOTE: pygettext attempts to be option and feature compatible with GNU |
| | xgettext where ever possible. However some options are still missing or are |
| | not fully implemented. Also, xgettext's use of command line switches with |
| | option arguments is broken, and in these cases, pygettext just defines |
| | additional switches. |
| | |
| | Usage: pygettext [options] inputfile ... |
| | |
| | Options: |
| | |
| | -a |
| | --extract-all |
| | Extract all strings. |
| | |
| | -d name |
| | --default-domain=name |
| | Rename the default output file from messages.pot to name.pot. |
| | |
| | -E |
| | --escape |
| | Replace non-ASCII characters with octal escape sequences. |
| | |
| | -D |
| | --docstrings |
| | Extract module, class, method, and function docstrings. These do |
| | not need to be wrapped in _() markers, and in fact cannot be for |
| | Python to consider them docstrings. (See also the -X option). |
| | |
| | -h |
| | --help |
| | Print this help message and exit. |
| | |
| | -k word |
| | --keyword=word |
| | Keywords to look for in addition to the default set, which are: |
| | %(DEFAULTKEYWORDS)s |
| | |
| | You can have multiple -k flags on the command line. |
| | |
| | -K |
| | --no-default-keywords |
| | Disable the default set of keywords (see above). Any keywords |
| | explicitly added with the -k/--keyword option are still recognized. |
| | |
| | --no-location |
| | Do not write filename/lineno location comments. |
| | |
| | -n |
| | --add-location |
| | Write filename/lineno location comments indicating where each |
| | extracted string is found in the source. These lines appear before |
| | each msgid. The style of comments is controlled by the -S/--style |
| | option. This is the default. |
| | |
| | -o filename |
| | --output=filename |
| | Rename the default output file from messages.pot to filename. If |
| | filename is `-' then the output is sent to standard out. |
| | |
| | -p dir |
| | --output-dir=dir |
| | Output files will be placed in directory dir. |
| | |
| | -S stylename |
| | --style stylename |
| | Specify which style to use for location comments. Two styles are |
| | supported: |
| | |
| | Solaris # File: filename, line: line-number |
| | GNU #: filename:line |
| | |
| | The style name is case insensitive. GNU style is the default. |
| | |
| | -v |
| | --verbose |
| | Print the names of the files being processed. |
| | |
| | -V |
| | --version |
| | Print the version of pygettext and exit. |
| | |
| | -w columns |
| | --width=columns |
| | Set width of output to columns. |
| | |
| | -x filename |
| | --exclude-file=filename |
| | Specify a file that contains a list of strings that are not be |
| | extracted from the input files. Each string to be excluded must |
| | appear on a line by itself in the file. |
| | |
| | -X filename |
| | --no-docstrings=filename |
| | Specify a file that contains a list of files (one per line) that |
| | should not have their docstrings extracted. This is only useful in |
| | conjunction with the -D option above. |
| | |
| | If `inputfile' is -, standard input is read. |
| | """) |
| |
|
| | import os |
| | import importlib.machinery |
| | import importlib.util |
| | import sys |
| | import glob |
| | import time |
| | import getopt |
| | import ast |
| | import token |
| | import tokenize |
| |
|
| | __version__ = '1.5' |
| |
|
| | default_keywords = ['_'] |
| | DEFAULTKEYWORDS = ', '.join(default_keywords) |
| |
|
| | EMPTYSTRING = '' |
| |
|
| |
|
| | |
| | |
| | |
| | pot_header = _('''\ |
| | # SOME DESCRIPTIVE TITLE. |
| | # Copyright (C) YEAR ORGANIZATION |
| | # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR. |
| | # |
| | msgid "" |
| | msgstr "" |
| | "Project-Id-Version: PACKAGE VERSION\\n" |
| | "POT-Creation-Date: %(time)s\\n" |
| | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n" |
| | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n" |
| | "Language-Team: LANGUAGE <LL@li.org>\\n" |
| | "MIME-Version: 1.0\\n" |
| | "Content-Type: text/plain; charset=%(charset)s\\n" |
| | "Content-Transfer-Encoding: %(encoding)s\\n" |
| | "Generated-By: pygettext.py %(version)s\\n" |
| | |
| | ''') |
| |
|
| | |
| | def usage(code, msg=''): |
| | print(__doc__ % globals(), file=sys.stderr) |
| | if msg: |
| | print(msg, file=sys.stderr) |
| | sys.exit(code) |
| |
|
| |
|
| | |
| | def make_escapes(pass_nonascii): |
| | global escapes, escape |
| | if pass_nonascii: |
| | |
| | |
| | |
| | mod = 128 |
| | escape = escape_ascii |
| | else: |
| | mod = 256 |
| | escape = escape_nonascii |
| | escapes = [r"\%03o" % i for i in range(mod)] |
| | for i in range(32, 127): |
| | escapes[i] = chr(i) |
| | escapes[ord('\\')] = r'\\' |
| | escapes[ord('\t')] = r'\t' |
| | escapes[ord('\r')] = r'\r' |
| | escapes[ord('\n')] = r'\n' |
| | escapes[ord('\"')] = r'\"' |
| |
|
| |
|
| | def escape_ascii(s, encoding): |
| | return ''.join(escapes[ord(c)] if ord(c) < 128 else c for c in s) |
| |
|
| | def escape_nonascii(s, encoding): |
| | return ''.join(escapes[b] for b in s.encode(encoding)) |
| |
|
| |
|
| | def is_literal_string(s): |
| | return s[0] in '\'"' or (s[0] in 'rRuU' and s[1] in '\'"') |
| |
|
| |
|
| | def safe_eval(s): |
| | |
| | return eval(s, {'__builtins__':{}}, {}) |
| |
|
| |
|
| | def normalize(s, encoding): |
| | |
| | |
| | lines = s.split('\n') |
| | if len(lines) == 1: |
| | s = '"' + escape(s, encoding) + '"' |
| | else: |
| | if not lines[-1]: |
| | del lines[-1] |
| | lines[-1] = lines[-1] + '\n' |
| | for i in range(len(lines)): |
| | lines[i] = escape(lines[i], encoding) |
| | lineterm = '\\n"\n"' |
| | s = '""\n"' + lineterm.join(lines) + '"' |
| | return s |
| |
|
| | |
| | def containsAny(str, set): |
| | """Check whether 'str' contains ANY of the chars in 'set'""" |
| | return 1 in [c in str for c in set] |
| |
|
| |
|
| | def getFilesForName(name): |
| | """Get a list of module files for a filename, a module or package name, |
| | or a directory. |
| | """ |
| | if not os.path.exists(name): |
| | |
| | if containsAny(name, "*?[]"): |
| | files = glob.glob(name) |
| | list = [] |
| | for file in files: |
| | list.extend(getFilesForName(file)) |
| | return list |
| |
|
| | |
| | try: |
| | spec = importlib.util.find_spec(name) |
| | name = spec.origin |
| | except ImportError: |
| | name = None |
| | if not name: |
| | return [] |
| |
|
| | if os.path.isdir(name): |
| | |
| | list = [] |
| | |
| | _py_ext = importlib.machinery.SOURCE_SUFFIXES[0] |
| | for root, dirs, files in os.walk(name): |
| | |
| | if 'CVS' in dirs: |
| | dirs.remove('CVS') |
| | |
| | list.extend( |
| | [os.path.join(root, file) for file in files |
| | if os.path.splitext(file)[1] == _py_ext] |
| | ) |
| | return list |
| | elif os.path.exists(name): |
| | |
| | return [name] |
| |
|
| | return [] |
| |
|
| | |
| | class TokenEater: |
| | def __init__(self, options): |
| | self.__options = options |
| | self.__messages = {} |
| | self.__state = self.__waiting |
| | self.__data = [] |
| | self.__lineno = -1 |
| | self.__freshmodule = 1 |
| | self.__curfile = None |
| | self.__enclosurecount = 0 |
| |
|
| | def __call__(self, ttype, tstring, stup, etup, line): |
| | |
| | |
| | |
| | |
| | self.__state(ttype, tstring, stup[0]) |
| |
|
| | def __waiting(self, ttype, tstring, lineno): |
| | opts = self.__options |
| | |
| | if opts.docstrings and not opts.nodocstrings.get(self.__curfile): |
| | |
| | if self.__freshmodule: |
| | if ttype == tokenize.STRING and is_literal_string(tstring): |
| | self.__addentry(safe_eval(tstring), lineno, isdocstring=1) |
| | self.__freshmodule = 0 |
| | return |
| | if ttype in (tokenize.COMMENT, tokenize.NL, tokenize.ENCODING): |
| | return |
| | self.__freshmodule = 0 |
| | |
| | if ttype == tokenize.NAME and tstring in ('class', 'def'): |
| | self.__state = self.__suiteseen |
| | return |
| | if ttype == tokenize.NAME and tstring in opts.keywords: |
| | self.__state = self.__keywordseen |
| | return |
| | if ttype == tokenize.STRING: |
| | maybe_fstring = ast.parse(tstring, mode='eval').body |
| | if not isinstance(maybe_fstring, ast.JoinedStr): |
| | return |
| | for value in filter(lambda node: isinstance(node, ast.FormattedValue), |
| | maybe_fstring.values): |
| | for call in filter(lambda node: isinstance(node, ast.Call), |
| | ast.walk(value)): |
| | func = call.func |
| | if isinstance(func, ast.Name): |
| | func_name = func.id |
| | elif isinstance(func, ast.Attribute): |
| | func_name = func.attr |
| | else: |
| | continue |
| |
|
| | if func_name not in opts.keywords: |
| | continue |
| | if len(call.args) != 1: |
| | print(_( |
| | '*** %(file)s:%(lineno)s: Seen unexpected amount of' |
| | ' positional arguments in gettext call: %(source_segment)s' |
| | ) % { |
| | 'source_segment': ast.get_source_segment(tstring, call) or tstring, |
| | 'file': self.__curfile, |
| | 'lineno': lineno |
| | }, file=sys.stderr) |
| | continue |
| | if call.keywords: |
| | print(_( |
| | '*** %(file)s:%(lineno)s: Seen unexpected keyword arguments' |
| | ' in gettext call: %(source_segment)s' |
| | ) % { |
| | 'source_segment': ast.get_source_segment(tstring, call) or tstring, |
| | 'file': self.__curfile, |
| | 'lineno': lineno |
| | }, file=sys.stderr) |
| | continue |
| | arg = call.args[0] |
| | if not isinstance(arg, ast.Constant): |
| | print(_( |
| | '*** %(file)s:%(lineno)s: Seen unexpected argument type' |
| | ' in gettext call: %(source_segment)s' |
| | ) % { |
| | 'source_segment': ast.get_source_segment(tstring, call) or tstring, |
| | 'file': self.__curfile, |
| | 'lineno': lineno |
| | }, file=sys.stderr) |
| | continue |
| | if isinstance(arg.value, str): |
| | self.__addentry(arg.value, lineno) |
| |
|
| | def __suiteseen(self, ttype, tstring, lineno): |
| | |
| | if ttype == tokenize.OP: |
| | if tstring == ':' and self.__enclosurecount == 0: |
| | |
| | self.__state = self.__suitedocstring |
| | elif tstring in '([{': |
| | self.__enclosurecount += 1 |
| | elif tstring in ')]}': |
| | self.__enclosurecount -= 1 |
| |
|
| | def __suitedocstring(self, ttype, tstring, lineno): |
| | |
| | if ttype == tokenize.STRING and is_literal_string(tstring): |
| | self.__addentry(safe_eval(tstring), lineno, isdocstring=1) |
| | self.__state = self.__waiting |
| | elif ttype not in (tokenize.NEWLINE, tokenize.INDENT, |
| | tokenize.COMMENT): |
| | |
| | self.__state = self.__waiting |
| |
|
| | def __keywordseen(self, ttype, tstring, lineno): |
| | if ttype == tokenize.OP and tstring == '(': |
| | self.__data = [] |
| | self.__lineno = lineno |
| | self.__state = self.__openseen |
| | else: |
| | self.__state = self.__waiting |
| |
|
| | def __openseen(self, ttype, tstring, lineno): |
| | if ttype == tokenize.OP and tstring == ')': |
| | |
| | |
| | |
| | |
| | if self.__data: |
| | self.__addentry(EMPTYSTRING.join(self.__data)) |
| | self.__state = self.__waiting |
| | elif ttype == tokenize.STRING and is_literal_string(tstring): |
| | self.__data.append(safe_eval(tstring)) |
| | elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT, |
| | token.NEWLINE, tokenize.NL]: |
| | |
| | print(_( |
| | '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"' |
| | ) % { |
| | 'token': tstring, |
| | 'file': self.__curfile, |
| | 'lineno': self.__lineno |
| | }, file=sys.stderr) |
| | self.__state = self.__waiting |
| |
|
| | def __addentry(self, msg, lineno=None, isdocstring=0): |
| | if lineno is None: |
| | lineno = self.__lineno |
| | if not msg in self.__options.toexclude: |
| | entry = (self.__curfile, lineno) |
| | self.__messages.setdefault(msg, {})[entry] = isdocstring |
| |
|
| | def set_filename(self, filename): |
| | self.__curfile = filename |
| | self.__freshmodule = 1 |
| |
|
| | def write(self, fp): |
| | options = self.__options |
| | timestamp = time.strftime('%Y-%m-%d %H:%M%z') |
| | encoding = fp.encoding if fp.encoding else 'UTF-8' |
| | print(pot_header % {'time': timestamp, 'version': __version__, |
| | 'charset': encoding, |
| | 'encoding': '8bit'}, file=fp) |
| | |
| | |
| | reverse = {} |
| | for k, v in self.__messages.items(): |
| | keys = sorted(v.keys()) |
| | reverse.setdefault(tuple(keys), []).append((k, v)) |
| | rkeys = sorted(reverse.keys()) |
| | for rkey in rkeys: |
| | rentries = reverse[rkey] |
| | rentries.sort() |
| | for k, v in rentries: |
| | |
| | |
| | |
| | isdocstring = any(v.values()) |
| | |
| | |
| | |
| | v = sorted(v.keys()) |
| | if not options.writelocations: |
| | pass |
| | |
| | elif options.locationstyle == options.SOLARIS: |
| | for filename, lineno in v: |
| | d = {'filename': filename, 'lineno': lineno} |
| | print(_( |
| | '# File: %(filename)s, line: %(lineno)d') % d, file=fp) |
| | elif options.locationstyle == options.GNU: |
| | |
| | |
| | locline = '#:' |
| | for filename, lineno in v: |
| | d = {'filename': filename, 'lineno': lineno} |
| | s = _(' %(filename)s:%(lineno)d') % d |
| | if len(locline) + len(s) <= options.width: |
| | locline = locline + s |
| | else: |
| | print(locline, file=fp) |
| | locline = "#:" + s |
| | if len(locline) > 2: |
| | print(locline, file=fp) |
| | if isdocstring: |
| | print('#, docstring', file=fp) |
| | print('msgid', normalize(k, encoding), file=fp) |
| | print('msgstr ""\n', file=fp) |
| |
|
| |
|
| | |
| | def main(): |
| | global default_keywords |
| | try: |
| | opts, args = getopt.getopt( |
| | sys.argv[1:], |
| | 'ad:DEhk:Kno:p:S:Vvw:x:X:', |
| | ['extract-all', 'default-domain=', 'escape', 'help', |
| | 'keyword=', 'no-default-keywords', |
| | 'add-location', 'no-location', 'output=', 'output-dir=', |
| | 'style=', 'verbose', 'version', 'width=', 'exclude-file=', |
| | 'docstrings', 'no-docstrings', |
| | ]) |
| | except getopt.error as msg: |
| | usage(1, msg) |
| |
|
| | |
| | class Options: |
| | |
| | GNU = 1 |
| | SOLARIS = 2 |
| | |
| | extractall = 0 |
| | escape = 0 |
| | keywords = [] |
| | outpath = '' |
| | outfile = 'messages.pot' |
| | writelocations = 1 |
| | locationstyle = GNU |
| | verbose = 0 |
| | width = 78 |
| | excludefilename = '' |
| | docstrings = 0 |
| | nodocstrings = {} |
| |
|
| | options = Options() |
| | locations = {'gnu' : options.GNU, |
| | 'solaris' : options.SOLARIS, |
| | } |
| |
|
| | |
| | for opt, arg in opts: |
| | if opt in ('-h', '--help'): |
| | usage(0) |
| | elif opt in ('-a', '--extract-all'): |
| | options.extractall = 1 |
| | elif opt in ('-d', '--default-domain'): |
| | options.outfile = arg + '.pot' |
| | elif opt in ('-E', '--escape'): |
| | options.escape = 1 |
| | elif opt in ('-D', '--docstrings'): |
| | options.docstrings = 1 |
| | elif opt in ('-k', '--keyword'): |
| | options.keywords.append(arg) |
| | elif opt in ('-K', '--no-default-keywords'): |
| | default_keywords = [] |
| | elif opt in ('-n', '--add-location'): |
| | options.writelocations = 1 |
| | elif opt in ('--no-location',): |
| | options.writelocations = 0 |
| | elif opt in ('-S', '--style'): |
| | options.locationstyle = locations.get(arg.lower()) |
| | if options.locationstyle is None: |
| | usage(1, _('Invalid value for --style: %s') % arg) |
| | elif opt in ('-o', '--output'): |
| | options.outfile = arg |
| | elif opt in ('-p', '--output-dir'): |
| | options.outpath = arg |
| | elif opt in ('-v', '--verbose'): |
| | options.verbose = 1 |
| | elif opt in ('-V', '--version'): |
| | print(_('pygettext.py (xgettext for Python) %s') % __version__) |
| | sys.exit(0) |
| | elif opt in ('-w', '--width'): |
| | try: |
| | options.width = int(arg) |
| | except ValueError: |
| | usage(1, _('--width argument must be an integer: %s') % arg) |
| | elif opt in ('-x', '--exclude-file'): |
| | options.excludefilename = arg |
| | elif opt in ('-X', '--no-docstrings'): |
| | fp = open(arg) |
| | try: |
| | while 1: |
| | line = fp.readline() |
| | if not line: |
| | break |
| | options.nodocstrings[line[:-1]] = 1 |
| | finally: |
| | fp.close() |
| |
|
| | |
| | make_escapes(not options.escape) |
| |
|
| | |
| | options.keywords.extend(default_keywords) |
| |
|
| | |
| | if options.excludefilename: |
| | try: |
| | with open(options.excludefilename) as fp: |
| | options.toexclude = fp.readlines() |
| | except IOError: |
| | print(_( |
| | "Can't read --exclude-file: %s") % options.excludefilename, file=sys.stderr) |
| | sys.exit(1) |
| | else: |
| | options.toexclude = [] |
| |
|
| | |
| | expanded = [] |
| | for arg in args: |
| | if arg == '-': |
| | expanded.append(arg) |
| | else: |
| | expanded.extend(getFilesForName(arg)) |
| | args = expanded |
| |
|
| | |
| | eater = TokenEater(options) |
| | for filename in args: |
| | if filename == '-': |
| | if options.verbose: |
| | print(_('Reading standard input')) |
| | fp = sys.stdin.buffer |
| | closep = 0 |
| | else: |
| | if options.verbose: |
| | print(_('Working on %s') % filename) |
| | fp = open(filename, 'rb') |
| | closep = 1 |
| | try: |
| | eater.set_filename(filename) |
| | try: |
| | tokens = tokenize.tokenize(fp.readline) |
| | for _token in tokens: |
| | eater(*_token) |
| | except tokenize.TokenError as e: |
| | print('%s: %s, line %d, column %d' % ( |
| | e.args[0], filename, e.args[1][0], e.args[1][1]), |
| | file=sys.stderr) |
| | finally: |
| | if closep: |
| | fp.close() |
| |
|
| | |
| | if options.outfile == '-': |
| | fp = sys.stdout |
| | closep = 0 |
| | else: |
| | if options.outpath: |
| | options.outfile = os.path.join(options.outpath, options.outfile) |
| | fp = open(options.outfile, 'w') |
| | closep = 1 |
| | try: |
| | eater.write(fp) |
| | finally: |
| | if closep: |
| | fp.close() |
| |
|
| | |
| | if __name__ == '__main__': |
| | main() |
| | |
| | |
| | _('*** Seen unexpected token "%(token)s"') % {'token': 'test'} |
| | _('more' 'than' 'one' 'string') |
| |
|