Spaces:
Build error
Build error
| #!/usr/bin/python -O | |
| r""" | |
| Copyright 2005-2020 Peter Gacs | |
| Licensed under the Academic Free Licence version 2.1 | |
| DE-MACRO | |
| Version 1.4.1 - A small typo corrected. | |
| Version 1.4 - Luca Citi made it python2.7 and python3 compatible. | |
| Peter Gacs improved the parsing of \input{<filename>}, | |
| and made @ a letter in the style files. | |
| Version 1.3 - this version is much more conservative about deleting | |
| comments and inserting or deleting blank space: tries to | |
| leave in all comments, adds space only when necessary, and | |
| tries not to delete space in the main text. | |
| The motivating comments came from Daniel Webb. | |
| Version 1.2 - a syntactical bug corrected, thanks Brian de Alwis! | |
| PURPOSE | |
| This program can eliminate most private macros from a LaTeX file. | |
| Applications: | |
| - your publisher has difficulty dealing with many private macros | |
| - you cooperate with colleagues who do not understand your macros | |
| - preprocessing before a system like latex2html, which is somewhat | |
| unpredictable with private macros. | |
| It cannot be used to eliminate more complex macros that rely on | |
| more programming-like constructs in style files. In particular, it will | |
| not replace style files that have options. | |
| USAGE | |
| de-macro [--defs <defs-db>] <tex-file-1>[.tex] [<tex-file-2>[.tex] ...] | |
| Simplest example: de-macro testament | |
| (As you see, the <> is used only in the notation of this documentation, | |
| you should not type it.) | |
| If <tex-file-i> contains a command \usepackage{<defs-file>-private} | |
| then the file <defs-file>-private.sty will be read, and its macros will be | |
| replaced in <tex-file-i> with their definitions. | |
| The result is in <tex-file-i>-clean.tex. | |
| Only newcommand, renewcommand, newenvironment, and renewenvironment are | |
| understood (it does not matter, whether you write new or renew). | |
| These can be nested but do not be too clever, since I do not | |
| guarantee the same expansion order as in TeX. | |
| FILES | |
| <tex-file-1>.db | |
| <tex-file>-clean.tex | |
| <defs-file>-private.sty | |
| For speed, a macro database file called <defs-file>.db is created. | |
| If such a file exists already then it is used. | |
| If <defs-file>-private.sty is older than <tex-file-1>.db then it will not | |
| be used. | |
| It is possible to specify another database filename via --defs <defs-db>. | |
| Then <defs-db>.db will be used. | |
| For each <tex-file-i>, a file <tex-file-i>-clean.tex will be produced. | |
| If <tex-file-i>-clean.tex is newer than <tex-file-i>.tex then it stays. | |
| INPUT COMMAND | |
| If a tex file contains a command \input{<tex-file-j>} or \input <tex-file-j> | |
| then <tex-file-j>.tex is processed recursively, and <tex-file-j>-clean.tex | |
| will be inserted into the final output. | |
| For speed, if <tex-file-j>-clean.tex is newer than <tex-file-j>.tex | |
| then <tex-file-j>.tex will not be reprocessed. | |
| The dependency checking is not sophisticated, so if you rewrite some macros | |
| then remove all *-clean.tex files! | |
| """ | |
| import sys, os, re, shelve | |
| # Utilities | |
| class No_detail: | |
| strerror = "" | |
| no_detail = No_detail() | |
| class Error(Exception): | |
| """Base class for exceptions in this module.""" | |
| pass | |
| class Empty_text_error(Error): | |
| """Exception raised for errors in the input. | |
| Attributes: | |
| data -- data that was found empty | |
| message | |
| """ | |
| def __init__(self, data, message): | |
| self.data = data | |
| self.message = message | |
| def warn(error_message, detail = no_detail): | |
| sys.stderr.write(error_message + "\n") | |
| if no_detail != detail: | |
| sys.stderr.write(detail.strerror + "\n") | |
| def die(error_message, detail = no_detail): | |
| warn(error_message, detail = no_detail) | |
| sys.exit(1) | |
| def getopt_map(one_letter_opts, long_optlist): | |
| "Turns long options into an option map, using getopt." | |
| import getopt | |
| optlist, args = getopt.getopt(sys.argv[1:], | |
| one_letter_opts, long_optlist) | |
| opt_map = {} | |
| for pair in optlist: opt_map[pair[0]] = pair[1] or 1 | |
| return opt_map, args | |
| def newer(file1, file2): | |
| if not os.path.isfile(file1): | |
| return False | |
| try: | |
| stat_return = os.lstat(file1) | |
| except OSError as detail: | |
| die("lstat " + file1 + " failed:", detail) | |
| time1 = stat_return.st_mtime | |
| try: | |
| stat_return = os.lstat(file2) | |
| except OSError as detail: | |
| die("lstat " + file2 + " failed:", detail) | |
| time2 = stat_return.st_mtime | |
| return time1 > time2 | |
| def cut_extension(filename, ext): | |
| """ | |
| If filename has extension ext (including the possible dot), | |
| it will be cut off. | |
| """ | |
| file = filename | |
| index = filename.rfind(ext) | |
| if 0 <= index and len(file)-len(ext) == index: | |
| file = file[:index] | |
| return file | |
| class Stream: | |
| data = None | |
| pos = None | |
| item = None | |
| def legal(self): | |
| return 0 <= self.pos and self.pos < len(self.data) | |
| def uplegal(self): | |
| return self.pos < len(self.data) | |
| def __init__(self, data_v = None): | |
| self.data = data_v | |
| if self.data: | |
| self.pos = 0 | |
| self.item = self.data[self.pos] | |
| def next(self): | |
| self.pos += 1 | |
| if self.pos < len(self.data): | |
| self.item = self.data[self.pos] | |
| return self.item | |
| def reset(self): | |
| if self.data and 0 < len(self.data): | |
| self.pos = 0 | |
| self.item = self.data[0] | |
| return self.item | |
| # Basic classes | |
| blank_re = re.compile(r"\s") | |
| blanked_filename_re = re.compile(r"^\s+(\w*)\s+") | |
| braced_filename_re = re.compile(r"^\s*{\s*(\w*)\s*}") | |
| blank_or_rbrace_re = re.compile(r"[\s}]") | |
| pos_digit_re = re.compile(r"[1-9]") | |
| def isletter(c, isatletter=False): | |
| if "@" == c: | |
| return isatletter | |
| else: | |
| return c.isalpha() | |
| class Token: | |
| """Type 0 means ordinary character, types 1,2 mean escape sequence | |
| (without the \ ), type 3 means comment. | |
| """ | |
| simple_ty = 0 | |
| esc_symb_ty = 1 | |
| esc_str_ty = 2 | |
| comment_ty = 3 | |
| type = simple_ty | |
| val = " " | |
| def __init__(self, type_v=simple_ty, val_v=" "): | |
| self.type = type_v | |
| self.val = val_v | |
| def show(self): | |
| out = "" | |
| if simple_ty == self.type or comment_ty == self.type: | |
| out = self.val | |
| else: | |
| out = "\\" + self.val | |
| return out | |
| # Constants | |
| g_token = Token(0," ") # generic token | |
| simple_ty = g_token.simple_ty | |
| comment_ty = g_token.comment_ty | |
| esc_symb_ty = g_token.esc_symb_ty | |
| esc_str_ty = g_token.esc_str_ty | |
| def detokenize(text, isatletter=False): | |
| """ | |
| Input is a list of tokens. | |
| Output is a string. | |
| """ | |
| out = "" | |
| if 0 == len(text): | |
| return | |
| pos = 0 | |
| out += text[pos].show() | |
| pos += 1 | |
| while pos < len(text): | |
| previtem = text[pos-1] | |
| item = text[pos] | |
| """Insert a separating space after an escape sequence if it is a | |
| string and is followed by a letter.""" | |
| if (esc_str_ty == previtem.type | |
| and simple_ty == item.type and isletter(item.val[0], isatletter)): | |
| out += " " | |
| out += item.show() | |
| pos += 1 | |
| return out | |
| def strip_comments(text): | |
| """ | |
| Input is a list of tokens. | |
| Output is the same list except the comment tokens. | |
| """ | |
| out = [] | |
| for token in text: | |
| if not comment_ty == token.type: | |
| out.append(token) | |
| return out | |
| class Group: | |
| """type 0 means a token, type 1 means contents of a group within {} | |
| """ | |
| token_ty = 0 | |
| group_ty = 1 | |
| type = token_ty | |
| val = [] # Value is a token list. | |
| def __init__(self, type_v, val_v): | |
| self.type = type_v | |
| self.val = val_v | |
| def show(self): | |
| if token_ty == self.type: | |
| return self.val.show() | |
| else: | |
| return "{%s}" % detokenize(self.val) | |
| # Constants | |
| g_group = Group(0, []) | |
| token_ty = g_group.token_ty | |
| group_ty = g_group.group_ty | |
| def tokenize(in_str, isatletter=False): | |
| """Returns a list of tokens. | |
| """ | |
| text = [] | |
| cs = Char_stream(in_str) | |
| cs.reset() | |
| if not cs.legal(): | |
| raise Error("No string to tokenize.") | |
| while cs.uplegal(): | |
| if "%" == cs.item: | |
| comment = cs.scan_comment_token() | |
| text.append(Token(comment_ty, comment)) | |
| elif "\\" != cs.item: | |
| text.append(Token(simple_ty, cs.item)) | |
| cs.next() | |
| else: | |
| cs.next() | |
| name = cs.scan_escape_token(isatletter) | |
| if isletter(name[0], isatletter): | |
| token = Token(esc_str_ty, name) | |
| else: | |
| token = Token(esc_symb_ty, name) | |
| text.append(token) | |
| if "makeatletter" == name: | |
| isatletter=True | |
| elif "makeatother" == name: | |
| isatletter=False | |
| return text | |
| class Command_def: | |
| name = "1" | |
| numargs = 0 | |
| body= "" | |
| def __init__(self, name_v, numargs_v, body_v): | |
| self.name = name_v | |
| self.numargs = numargs_v | |
| self.body = body_v | |
| def show(self): | |
| out = "\\newcommand{\\%s}" % (self.name) | |
| if 0 < self.numargs: | |
| out += "[%d]" % self.numargs | |
| out += "{%s}" % detokenize(self.body) | |
| return out | |
| class Env_def: | |
| name = "1" | |
| numargs = 0 | |
| begin = "" | |
| end = "" | |
| def __init__(self, name_v, numargs_v, begin_v, end_v): | |
| self.name = name_v | |
| self.numargs = numargs_v | |
| self.begin = begin_v | |
| self.end = end_v | |
| def show(self): | |
| out = "\\newenvironment{%s}" % self.name | |
| if 0 < self.numargs: | |
| out += "[%d]" % self.numargs | |
| out += "{%s}" % detokenize(self.begin) | |
| out += "{%s}" % detokenize(self.end) | |
| return out | |
| class Command_instance: | |
| name = "1" | |
| args = [] | |
| def __init__(self, name_v, args_v): | |
| self.name = name_v | |
| self.args = args_v | |
| def show(self): | |
| out = "\\"+self.name | |
| for arg in self.args: | |
| out += "{%s}" % detokenize(arg) | |
| return out | |
| class Env_instance: | |
| name = "1" | |
| args = [] | |
| def __init__(self, name_v, args_v, body_v): | |
| self.name = name_v | |
| self.args = args_v | |
| self.body = body_v | |
| def show(self): | |
| out = "\\begin{%s}" % self.name | |
| for arg in self.args: | |
| out += "{%s}" % detokenize(arg) | |
| out += detokenize(self.body) | |
| out += "\\end{%s}" % self.name | |
| return out | |
| class Char_stream(Stream): | |
| def scan_escape_token(self, isatletter=False): | |
| """ | |
| Starts after the escape sign, assumes that it is scanning a symbol. | |
| Returns a token-string. | |
| """ | |
| out = self.item # Continue only if this is a letter. | |
| item = self.next() | |
| if isletter(out, isatletter): | |
| while self.uplegal() and isletter(item, isatletter): | |
| out += item | |
| item = self.next() | |
| return out | |
| def scan_comment_token(self): | |
| """ | |
| Starts at the comment sign %, assumes that it is scanning a comment. | |
| Returns the whole comment string, | |
| including the % and all empty space after it. | |
| """ | |
| comment = "" | |
| while self.uplegal() and "\n" != self.item: | |
| comment += self.item | |
| self.next() | |
| while self.uplegal() and blank_re.match(self.item): | |
| comment += self.item | |
| self.next() | |
| return comment | |
| def scan_input_filename(self): | |
| """We have just read an \input token. The next group or word will be | |
| interpreted as a filename (possibly without .tex). Filenames should not begin with spaces. | |
| Return the filename. | |
| """ | |
| item = self.item | |
| file = "" | |
| while self.uplegal() and blank_re.match(self.item): | |
| item = self.next() | |
| if "{" == item: | |
| item = self.next() | |
| while self.uplegal() and not "}" == item: | |
| file += item | |
| item = self.next() | |
| self.next() | |
| else: | |
| while self.uplegal() and not blank_re.match(item): | |
| file += item | |
| item = self.next() | |
| return file | |
| def scan_package_filenames(self): | |
| r"""We just read a \usepackage token. The next group will be | |
| interpreted as a list of filenames (without .sty) separated by commas. | |
| Return the list. | |
| """ | |
| item = self.item | |
| while self.uplegal() and blank_re.match(item): | |
| item = self.next() | |
| file = "" | |
| if not "{" == item: | |
| raise Error("\\usepackage not followed by brace.") | |
| item = self.next() | |
| while self.uplegal() and not blank_or_rbrace_re.match(item): | |
| file += item | |
| item = self.next() | |
| self.next() | |
| return file.split(",") | |
| class Tex_stream(Stream): | |
| defs = ({}, {}) | |
| defs_db = "x" | |
| defs_db_file = "x.db" | |
| debug = False | |
| def smart_tokenize(self, in_str, handle_inputs=False, isatletter=False): | |
| """Returns a list of tokens. | |
| It may interpret and carry out all \input commands. | |
| """ | |
| self.data = [] | |
| text = self.data | |
| cs = Char_stream(in_str) | |
| cs.reset() | |
| if not cs.legal(): | |
| raise Error("No string to tokenize.") | |
| while cs.uplegal(): | |
| if "%" == cs.item: | |
| comment = cs.scan_comment_token() | |
| text.append(Token(comment_ty, comment)) | |
| elif "\\" != cs.item: | |
| text.append(Token(simple_ty, cs.item)) | |
| cs.next() | |
| else: | |
| cs.next() | |
| name = cs.scan_escape_token(isatletter) | |
| if "input" == name and handle_inputs: | |
| file = cs.scan_input_filename() | |
| to_add = self.process_if_newer(file) | |
| text.extend(to_add) | |
| elif "usepackage" == name: | |
| while cs.uplegal() and blank_re.match(cs.item): | |
| cs.next() | |
| if "[" == cs.item: # Packages with options will not be processed. | |
| text.extend([Token(esc_str_ty, "usepackage"), | |
| Token(simple_ty, "[")]) | |
| cs.next() | |
| continue | |
| files = cs.scan_package_filenames() | |
| i = 0 | |
| while i < len(files): # process private packages | |
| file = files[i] | |
| p = file.rfind("-private") | |
| if p < 0 or not len(file) - len("-private") == p: | |
| i += 1 | |
| continue | |
| defs_db_file = file+".db" | |
| self.add_defs(file) | |
| del files[i:(i+1)] | |
| if files: # non-private packages left | |
| group_content = ",".join(files) | |
| to_add_str = "\\usepackage{%s}" % (group_content) | |
| to_add = tokenize(to_add_str,isatletter) | |
| text.extend(to_add) | |
| else: | |
| if isletter(name[0], isatletter): | |
| token = Token(esc_str_ty, name) | |
| else: | |
| token = Token(esc_symb_ty, name) | |
| text.append(token) | |
| if "makeatletter" == name: | |
| isatletter=True | |
| elif "makeatother" == name: | |
| isatletter=False | |
| self.reset() | |
| return self.data | |
| def smart_detokenize(self,isatletter=False): | |
| """ | |
| Output is a string. | |
| If the list contains an \input{file} then the content of file | |
| file-clean.tex replaces it in the output. | |
| """ | |
| self.reset() | |
| if not self.legal(): | |
| return "" | |
| out = "" | |
| previtem = None | |
| while self.uplegal(): | |
| item = self.item | |
| """Insert a separating space after an escape sequence if it is a | |
| string and is followed by a letter.""" | |
| if (None != previtem and esc_str_ty == previtem.type | |
| and simple_ty == item.type and isletter(item.val[0], isatletter)): | |
| out += " " | |
| previtem = item | |
| if not (esc_str_ty == item.type and "input" == item.val): | |
| out += item.show() | |
| self.next() | |
| else: | |
| self.next() | |
| group = self.scan_group() | |
| file = detokenize(group.val) | |
| clean_file = "%s-clean.tex" % (file) | |
| print("Reading file %s" % (clean_file)) | |
| fp = open(clean_file,"r") | |
| content = fp.read() | |
| fp.close() | |
| out += content | |
| return out | |
| # Basic tex scanning | |
| def skip_blank_tokens(self): # we also skip comment tokens. | |
| item = self.item | |
| while (self.uplegal() and | |
| (comment_ty == item.type or | |
| (simple_ty == item.type and blank_re.match(item.val)))): | |
| item = self.next() | |
| return item | |
| def scan_group(self): | |
| """Returns group. | |
| """ | |
| if not self.legal(): | |
| raise Error("No group to scan.") | |
| item = self.item | |
| if not (simple_ty == item.type and "{" == item.val): | |
| return Group(token_ty, [self.item]) | |
| count = 1 | |
| group = [] | |
| item = self.next() | |
| while count and self.uplegal(): | |
| if simple_ty == item.type: | |
| if "{" == item.val: | |
| count += 1 | |
| elif "}" == item.val: | |
| count -= 1 | |
| if count != 0: | |
| group.append(item) | |
| item = self.next() | |
| return Group(group_ty, group) | |
| # Command and environment definitions | |
| def scan_command_name(self): | |
| """Returns name. | |
| """ | |
| if not self.legal(): | |
| raise Error("No command name to scan.") | |
| item = self.item | |
| name = "" | |
| if item.type in [esc_symb_ty, esc_str_ty]: | |
| name = item.val | |
| else: | |
| if not "{" == item.val: | |
| raise Error("Command definition misses first {.") | |
| self.next() | |
| item = self.skip_blank_tokens() | |
| if not item.type in [esc_symb_ty, esc_str_ty]: | |
| raise Error("Command definition does not begin with control sequence.") | |
| name = item.val | |
| self.next() | |
| item = self.skip_blank_tokens() | |
| if not "}" == item.val: | |
| raise Error("Definition for commmand %s misses first }., %s" % | |
| (name, item.val)) | |
| self.next() | |
| self.skip_blank_tokens() | |
| return name | |
| def scan_numargs(self, name): | |
| """ | |
| name is the name of the command or environment definition being | |
| scanned. | |
| Starts on a nonblank token. | |
| Returns numargs | |
| where numargs is the number of arguments in a command or environment | |
| definition, | |
| """ | |
| if not self.legal(): | |
| raise Error("No numargs to scan.") | |
| item = self.item | |
| numargs = 0 | |
| if not simple_ty == item.type: | |
| raise Error("Illegal command or environment definition: "+name) | |
| if "[" == item.val: | |
| if not 4 < len(self.data): | |
| raise Error("Command or environment definition is illegal: "+name) | |
| item = self.next() | |
| if not simple_ty == item.type: | |
| raise Error("Illegal command or environment definition: "+name) | |
| numargs = item.val | |
| if not pos_digit_re.match(numargs): | |
| raise Error("%s must be argument number after %s" % (numargs, name)) | |
| numargs = int(numargs) | |
| self.next() | |
| item = self.skip_blank_tokens() | |
| if not simple_ty == item.type: | |
| raise Error("Illegal command definition: "+name) | |
| if "]" != item.val: | |
| raise Error("Illegal command definition: "+name) | |
| self.next() | |
| self.skip_blank_tokens() | |
| return numargs | |
| def scan_command_def(self): | |
| """Scan a command definition. | |
| Return command_def. | |
| Assumes that the number of arguments is at most 9. | |
| """ | |
| if not self.legal(): | |
| raise Error("No command definition to scan.") | |
| item = self.item | |
| if not 2 < len(self.data): | |
| raise Error("Command definition is illegal.") | |
| # newcommand or renewcommand | |
| if not item.type in [esc_symb_ty, esc_str_ty]: | |
| raise Error("Command definition should begin with control sequence: "+item.val) | |
| if item.val not in ["newcommand", "renewcommand"]: | |
| raise Error("Command definition should begin with control sequence.") | |
| self.next() | |
| self.skip_blank_tokens() | |
| cmd_name = self.scan_command_name() | |
| numargs = self.scan_numargs(cmd_name) | |
| body_group = self.scan_group() | |
| if group_ty != body_group.type: | |
| raise Error("Command body missing: "+cmd_name) | |
| body_val = strip_comments(body_group.val) | |
| return Command_def(cmd_name, numargs, body_val) | |
| def scan_env_name(self): | |
| """Starts on a {. | |
| Returns name. | |
| """ | |
| if not self.legal(): | |
| raise Error("No environment name to scan.") | |
| item = self.item | |
| if not "{" == item.val: | |
| raise Error("Env. definition begins with %s, not with {" % (item.val)) | |
| self.next() | |
| item = self.skip_blank_tokens() | |
| name = "" | |
| if not simple_ty == item.type: | |
| raise Error("1. Env. def. begins with cont. seq. %s, not with env.name." | |
| % (item.val)) | |
| while self.uplegal() and not blank_or_rbrace_re.match(item.val): | |
| name += item.val | |
| item = self.next() | |
| if not simple_ty == item.type: | |
| raise Error("2. Env. def. begins with cont. seq. %s, not with env.name." | |
| % (item.val)) | |
| item = self.skip_blank_tokens() | |
| if not "}" == item.val: | |
| raise Error("Command definition does not begin with control sequence.") | |
| self.next() | |
| self.skip_blank_tokens() | |
| return name | |
| def scan_env_def(self): | |
| """Scan an environment definition. | |
| Return env_def | |
| Assumes that the number of arguments is at most 9. | |
| """ | |
| if not self.legal(): | |
| raise Error("No environment definition to scan.") | |
| item = self.item | |
| if not 7 < len(self.data): | |
| raise Error("Environment definition is illegal.") | |
| pos = 0 | |
| if not item.type in [esc_symb_ty, esc_str_ty]: | |
| raise Error("Env. definition does not begin with control sequence:"+ | |
| item.val) | |
| if item.val not in ["newenvironment", "renewenvironment"]: | |
| raise Error("Env. definition does not begin with control sequence.") | |
| self.next() | |
| self.skip_blank_tokens() | |
| env_name = self.scan_env_name() | |
| numargs = self.scan_numargs(env_name) | |
| self.skip_blank_tokens() | |
| begin_group = self.scan_group() | |
| if group_ty != begin_group.type: | |
| raise Error("Begin body missing: "+env_name) | |
| begin_val = strip_comments(begin_group.val) | |
| self.skip_blank_tokens() | |
| end_group = self.scan_group() | |
| if group_ty != end_group.type: | |
| raise Error("End body missing:"+env_name) | |
| end_val = strip_comments(end_group.val) | |
| return Env_def(env_name, numargs, begin_val, end_val) | |
| def scan_defs(self): | |
| if not self.legal(): | |
| raise Error("No definitions to scan.") | |
| self.reset() | |
| command_defs, env_defs = self.defs | |
| while self.uplegal(): | |
| if (esc_str_ty == self.item.type | |
| and self.item.val in ["newcommand", "renewcommand"]): | |
| def_start_pos = self.pos | |
| command_def = self.scan_command_def() | |
| command_defs[command_def.name] = command_def | |
| def_end_pos = self.pos | |
| for del_pos in range(def_start_pos,def_end_pos): | |
| del self.data[def_start_pos] | |
| self.pos = def_start_pos | |
| self.item = self.data[self.pos] | |
| elif (esc_str_ty == self.item.type and self.item.val | |
| in ["newenvironment", "renewenvironment"]): | |
| def_start_pos = self.pos | |
| env_def = self.scan_env_def() | |
| env_defs[env_def.name] = env_def | |
| def_end_pos = self.pos | |
| for del_pos in range(def_start_pos,def_end_pos): | |
| del self.data[def_start_pos] | |
| self.pos = def_start_pos | |
| self.item = self.data[self.pos] | |
| else: | |
| self.next() | |
| # Instances | |
| def scan_args(self, command_or_env_def): | |
| """Scan the arguments of a command or environment. | |
| Return [args]. | |
| """ | |
| if not self.legal(): | |
| raise Error("No arguments to scan.") | |
| numargs = command_or_env_def.numargs | |
| name = command_or_env_def.name | |
| args = [] | |
| for i in range(numargs): | |
| arg = [] | |
| if not (simple_ty == self.item.type and "{" == self.item.val): | |
| arg = [self.item] | |
| self.next() | |
| else: | |
| group = self.scan_group() | |
| arg = group.val | |
| args.append(arg) | |
| return args | |
| def scan_command(self, command_def): | |
| """Scan the arguments of a command. | |
| Return command_instance | |
| """ | |
| if not self.legal(): | |
| raise Error("No command to scan.") | |
| if not self.item.type in [esc_symb_ty, esc_str_ty]: | |
| raise Error("Command does not begin with control sequence.") | |
| name = self.item.val | |
| self.next() | |
| if 0 < command_def.numargs: | |
| self.skip_blank_tokens() | |
| args = self.scan_args(command_def) | |
| else: | |
| args = [] | |
| return Command_instance(name, args) | |
| def test_env_boundary(self, item): | |
| """Check whether an environment begin or end follows. | |
| Return 1 if \begin, -1 if \end, 0 otherwise. | |
| """ | |
| d = 0 | |
| if esc_str_ty == item.type: | |
| if "begin"==item.val: | |
| d = 1 | |
| elif "end"==item.val: | |
| d = -1 | |
| return d | |
| def scan_env_begin(self): | |
| """Scan an environment name. | |
| Return env_name. | |
| """ | |
| if not self.legal(): | |
| raise Error("No environment begin to scan.") | |
| item = self.item | |
| if not (esc_str_ty == item.type and "begin" == item.val): | |
| raise Error("Environment does not begin with begin.") | |
| self.next() | |
| name_group = self.scan_group() | |
| name = detokenize(name_group.val) | |
| return name | |
| def scan_env_end(self): | |
| """Scan an environment end. | |
| Return env_name. | |
| """ | |
| if not self.legal(): | |
| raise Error("No environment end to scan.") | |
| item = self.item | |
| if not (esc_str_ty == item.type and "end" == item.val): | |
| raise Error("Environment does not end with end.") | |
| self.next() | |
| name_group = self.scan_group() | |
| name = detokenize(name_group.val) | |
| return name | |
| def scan_env_rest(self, env_def): | |
| """Scanning starts after \begin{envname}. | |
| Returns env_instance. | |
| """ | |
| if not self.legal(): | |
| raise Error("No environment rest to scan.") | |
| count = 1 # We are already within a boundary. | |
| args = self.scan_args(env_def) | |
| body = [] | |
| while count and self.uplegal(): | |
| old_pos = self.pos | |
| d = self.test_env_boundary(self.item) | |
| count += d | |
| if 1 == d: | |
| self.scan_env_begin() | |
| elif -1 == d: | |
| self.scan_env_end() | |
| else: | |
| self.next() | |
| if 0 < count: | |
| body.extend(self.data[old_pos : self.pos]) | |
| return Env_instance(env_def.name, args, body) | |
| # Definitions | |
| def restore_defs(self): | |
| if os.path.isfile(self.defs_db_file): | |
| print("Using defs db %s" % (self.defs_db_file)) | |
| db_h = shelve.open(self.defs_db) | |
| self.defs = db_h["defs"] | |
| db_h.close() | |
| def save_defs(self): | |
| db_h = shelve.open(self.defs_db) | |
| if "defs" in db_h: | |
| del db_h["defs"] | |
| db_h["defs"] = self.defs | |
| db_h.close() | |
| def add_defs(self, defs_file): | |
| defs_file_compl = defs_file + ".sty" | |
| if not os.path.isfile(defs_file_compl): | |
| raise Error("%s does not exist" % (defs_file_compl)) | |
| defs_db_file = self.defs_db_file | |
| if newer(defs_db_file, defs_file_compl): | |
| print("Using defs db %s for %s" % (defs_db_file, defs_file)) | |
| else: | |
| defs_fp = open(defs_file_compl, "r") | |
| defs_str = defs_fp.read() | |
| defs_fp.close() | |
| ds = Tex_stream() | |
| ds.defs = self.defs | |
| defs_text = ds.smart_tokenize(defs_str,isatletter=True) | |
| # changing ds.defs will change self.defs | |
| if self.debug: | |
| defs_seen_file = "%s-seen.sty" % (defs_file) | |
| defs_seen_fp = open(defs_seen_file, "w") | |
| out = detokenize(defs_text,isatletter=True) | |
| defs_seen_fp.write(out) | |
| defs_seen_fp.close() | |
| ds.scan_defs() | |
| if self.debug: | |
| out = "" | |
| command_defs, env_defs = self.defs | |
| for def_name in command_defs.keys(): | |
| out += command_defs[def_name].show() + "\n" | |
| for def_name in env_defs.keys(): | |
| out += env_defs[def_name].show() +"\n" | |
| print("Definitions after reading %s:" % (defs_file)) | |
| print(out) | |
| # Applying definitions, recursively | |
| # (maybe not quite in Knuth order, so avoid tricks!) | |
| def subst_args(self, body, args): | |
| out = [] | |
| pos = 0 | |
| while pos < len(body): | |
| item = body[pos] | |
| if not (simple_ty == item.type and "#" == item.val): | |
| out.append(item) | |
| pos += 1 | |
| continue | |
| pos += 1 | |
| token = body[pos] | |
| argnum = token.val | |
| if not pos_digit_re.match(argnum): | |
| raise Error("# is not followed by number.") | |
| argnum = int(argnum) | |
| if argnum > len(args): | |
| raise Error("Too large argument number.") | |
| arg = args[argnum-1] | |
| out += arg | |
| pos += 1 | |
| return out | |
| def apply_command_recur(self, command_instance): | |
| command_defs, env_defs = self.defs | |
| name = command_instance.name | |
| command_def = command_defs[name] | |
| args = command_instance.args | |
| body = command_def.body | |
| result = self.subst_args(body, args) | |
| try: | |
| result = self.apply_all_recur(result) | |
| except Empty_text_error as e: | |
| raise Error("apply_all_recur fails on command instance %s: %s, %s" % \ | |
| (command_instance.show(), detokenize(e.data), e.message)) | |
| return result | |
| def apply_env_recur(self, env_instance): | |
| command_defs, env_defs = self.defs | |
| name = env_instance.name | |
| env_def = env_defs[name] | |
| begin, end = env_def.begin, env_def.end | |
| body, args = env_instance.body, env_instance.args | |
| out = self.subst_args(begin, args) + body + self.subst_args(end, args) | |
| return self.apply_all_recur(out) | |
| def apply_all_recur(self, data, report=False): | |
| ts = Tex_stream(data) | |
| ts.defs = self.defs | |
| command_defs, env_defs = self.defs | |
| out = [] | |
| progress_step = 10000 | |
| progress = progress_step | |
| if not ts.legal(): | |
| raise Empty_text_error(data, "No text to process.") | |
| while ts.uplegal(): | |
| if self.pos > progress: | |
| if report: | |
| print(self.pos) | |
| progress += progress_step | |
| if not ts.item.type in [esc_symb_ty, esc_str_ty]: | |
| out.append(ts.item) | |
| ts.next() | |
| continue | |
| if 1 == ts.test_env_boundary(ts.item): | |
| old_pos = ts.pos | |
| env_name = ts.scan_env_begin() | |
| if env_name not in env_defs: | |
| out.extend(ts.data[old_pos : ts.pos]) | |
| continue | |
| else: | |
| env_def = env_defs[env_name] | |
| env_instance = ts.scan_env_rest(env_def) | |
| result = ts.apply_env_recur(env_instance) | |
| out.extend(result) | |
| elif ts.item.val not in command_defs: | |
| out.append(ts.item) | |
| ts.next() | |
| continue | |
| else: | |
| command_def = command_defs[ts.item.val] | |
| command_inst = ts.scan_command(command_def) | |
| result = ts.apply_command_recur(command_inst) | |
| out.extend(result) | |
| return out | |
| # Processing files | |
| def process_file(self, file): | |
| """Returns the new defs. | |
| """ | |
| file = cut_extension(file, ".tex") | |
| source_file = "%s.tex" % (file) | |
| print("File %s [" % (source_file)) | |
| source_fp = open(source_file, "r") | |
| text_str = source_fp.read() | |
| source_fp.close() | |
| self.smart_tokenize(text_str, handle_inputs=True) | |
| if not self.data: | |
| raise Error("Empty tokenization result.") | |
| self.reset() | |
| if self.debug: | |
| source_seen_fname = "%s-seen.tex" % (file) | |
| source_seen_fp = open(source_seen_fname, "w") | |
| source_seen_fp.write(detokenize(self.data)) | |
| source_seen_fp.close() | |
| self.scan_defs() | |
| self.data = self.apply_all_recur(self.data, report=True) | |
| result_fname = "%s-clean.tex" % (file) | |
| print("Writing %s [" % (result_fname)) | |
| result_fp = open(result_fname, "w") | |
| result_fp.write(self.smart_detokenize()) | |
| result_fp.close() | |
| print("] file %s" % (result_fname)) | |
| print("] file %s" % (source_file)) | |
| def process_if_newer(self, file): | |
| """ | |
| \input{file} is added to the token list. | |
| If the input file is newer it is processed. | |
| Returns tokenized \input{file}. | |
| """ | |
| file = cut_extension(file, ".tex") | |
| tex_file = file+".tex" | |
| clean_tex_file = file+"-clean.tex" | |
| if newer(clean_tex_file, tex_file): | |
| print("Using %s." % (clean_tex_file)) | |
| else: | |
| ts = Tex_stream() | |
| ts.data = [] | |
| ts.defs = self.defs | |
| ts.process_file(file) | |
| to_add = "\\input{%s}" % (file) | |
| return tokenize(to_add) | |
| # Main | |
| long_optlist = ["debug","defs="] | |
| options, restargs = getopt_map("x", long_optlist) | |
| debug = False | |
| if "--debug" in options: | |
| debug = True | |
| root = restargs[0] | |
| root = cut_extension(root, ".tex") | |
| if "--defs" in options: | |
| defs_root = options["--defs"] | |
| else: | |
| defs_root = "%s" % (root) | |
| defs_db = defs_root | |
| defs_db_file = defs_root+".db" | |
| ts = Tex_stream() | |
| ts.defs_db = defs_db | |
| ts.defs_db_file = defs_db_file | |
| ts.debug = debug | |
| ts.restore_defs() | |
| for root in restargs: | |
| ts.process_file(root) | |
| print("(Re)creating defs db %s" % (defs_db)) | |
| ts.save_defs() | |