Spaces:
Sleeping
Sleeping
| """The executable of pdftocgen""" | |
| import toml | |
| import sys | |
| import getopt | |
| import pdftocgen | |
| import io | |
| from getopt import GetoptError | |
| from typing import TextIO | |
| from fitzutils import open_pdf, dump_toc, pprint_toc, get_file_encoding | |
| from .tocgen import gen_toc | |
| usage_s = """ | |
| usage: pdftocgen [options] doc.pdf < recipe.toml | |
| """.strip() | |
| help_s = """ | |
| usage: pdftocgen [options] doc.pdf < recipe.toml | |
| Generate PDF table of contents from a recipe file. | |
| This command automatically generates a table of contents for | |
| doc.pdf based on the font attributes and position of | |
| headings specified in a TOML recipe file. See [1] for an | |
| introduction to recipe files. | |
| To generate the table of contents for a pdf, use input | |
| redirection or pipes to supply a recipe file | |
| $ pdftocgen in.pdf < recipe.toml | |
| or alternatively use the -r flag | |
| $ pdftocgen -r recipe.toml in.pdf | |
| The output of this command can be directly piped into | |
| pdftocio to generate a new pdf file using the generated | |
| table of contents | |
| $ pdftocgen -r recipe.toml in.pdf | pdftocio -o out.pdf in.pdf | |
| or you could save the output of this command to a file for | |
| further tweaking using output redirection | |
| $ pdftocgen -r recipe.toml in.pdf > toc | |
| or the -o flag: | |
| $ pdftocgen -r recipe.toml -o toc in.pdf | |
| If you only need a readable format of the table of contents, | |
| use the -H flag | |
| $ pdftocgen -r recipe.toml -H in.pdf | |
| This format cannot be parsed by pdftocio, but it is slightly | |
| more readable. | |
| arguments | |
| doc.pdf path to the input PDF document | |
| options | |
| -h, --help show help | |
| -r, --recipe=recipe.toml path to the recipe file. if this flag is | |
| not specified, the default is stdin | |
| -H, --human-readable print the toc in a readable format | |
| -v, --vpos if this flag is set, the vertical position | |
| of each heading will be generated in the | |
| output | |
| -o, --out=file path to the output file. if this flag is | |
| not specified, the default is stdout | |
| -g, --debug enable debug mode | |
| -V, --version show version number | |
| [1]: https://krasjet.com/voice/pdf.tocgen/#step-1-build-a-recipe | |
| """.strip() | |
| def main(): | |
| # parse arguments | |
| try: | |
| opts, args = getopt.gnu_getopt( | |
| sys.argv[1:], | |
| "hr:Hvo:gV", | |
| ["help", "recipe=", "human-readable", "vpos", "out=", "debug", "version"] | |
| ) | |
| except GetoptError as e: | |
| print(e, file=sys.stderr) | |
| print(usage_s, file=sys.stderr) | |
| sys.exit(2) | |
| recipe_file: TextIO = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8', errors='ignore') | |
| readable: bool = False | |
| vpos: bool = False | |
| out: TextIO = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='ignore') | |
| debug: bool = False | |
| for o, a in opts: | |
| if o in ("-H", "--human-readable"): | |
| readable = True | |
| elif o in ("-v", "--vpos"): | |
| vpos = True | |
| elif o in ("-r", "--recipe"): | |
| try: | |
| recipe_file = open(a, "r", encoding=get_file_encoding(a)) | |
| except IOError as e: | |
| print("error: can't open file for reading", file=sys.stderr) | |
| print(e, file=sys.stderr) | |
| sys.exit(1) | |
| elif o in ("-o", "--out"): | |
| try: | |
| out = open(a, "w", encoding='utf-8', errors='ignore') | |
| except IOError as e: | |
| print("error: can't open file for writing", file=sys.stderr) | |
| print(e, file=sys.stderr) | |
| sys.exit(1) | |
| elif o in ("-g", "--debug"): | |
| debug = True | |
| elif o in ("-V", "--version"): | |
| print("pdftocgen", pdftocgen.__version__, file=sys.stderr) | |
| sys.exit() | |
| elif o in ("-h", "--help"): | |
| print(help_s, file=sys.stderr) | |
| sys.exit() | |
| if len(args) < 1: | |
| print("error: no input pdf is given", file=sys.stderr) | |
| print(usage_s, file=sys.stderr) | |
| sys.exit(1) | |
| path_in: str = args[0] | |
| # done parsing arguments | |
| try: | |
| with open_pdf(path_in) as doc: | |
| recipe = toml.load(recipe_file) | |
| toc = gen_toc(doc, recipe) | |
| if readable: | |
| print(pprint_toc(toc), file=out) | |
| else: | |
| print(dump_toc(toc, vpos), end="", file=out) | |
| except ValueError as e: | |
| if debug: | |
| raise e | |
| print("error:", e, file=sys.stderr) | |
| sys.exit(1) | |
| except IOError as e: | |
| if debug: | |
| raise e | |
| print("error: unable to open file", file=sys.stderr) | |
| print(e, file=sys.stderr) | |
| sys.exit(1) | |
| except KeyboardInterrupt as e: | |
| if debug: | |
| raise e | |
| print("error: interrupted", file=sys.stderr) | |
| sys.exit(1) | |