Spaces:
Sleeping
Sleeping
| """The executable of pdftocio""" | |
| import sys | |
| import os.path | |
| import pdftocio | |
| import getopt | |
| import io | |
| from typing import Optional, TextIO | |
| from getopt import GetoptError | |
| from fitzutils import open_pdf, dump_toc, pprint_toc, get_file_encoding | |
| from .tocparser import parse_toc | |
| from .tocio import write_toc, read_toc | |
| usage_s = """ | |
| usage: pdftocio [options] in.pdf < toc | |
| pdftocio [options] in.pdf | |
| """.strip() | |
| help_s = r""" | |
| usage: pdftocio [options] in.pdf < toc | |
| pdftocio [options] in.pdf | |
| Import/output the table of contents of a PDF file. | |
| This command can operate in two ways: it can either be used | |
| to extract the table of contents of a PDF, or import table | |
| of contents to a PDF using the output of pdftocgen. | |
| 1. To extract the table of contents of a PDF for | |
| modification, only supply a input file: | |
| $ pdftocio in.pdf | |
| or if you want to print it in a readable format, use the | |
| -H flag: | |
| $ pdftocio -H in.pdf | |
| 2. To import a table of contents to a PDF using the toc file | |
| generated by pdftocgen, use input redirection, | |
| $ pdftocio in.pdf < toc | |
| pipes, | |
| $ pdftocgen -r recipe.toml in.pdf | pdftocio in.pdf | |
| or the -t flag | |
| $ pdftocio -t toc in.pdf | |
| to supply the toc file. If you want to specify an output | |
| file name, use the -o option | |
| $ pdftocio -t toc -o out.pdf in.pdf | |
| arguments | |
| in.pdf path to the input PDF document | |
| options | |
| -h, --help show help | |
| -t, --toc=toc path to the table of contents generated by | |
| pdftocgen. if this option is not given, the | |
| default is stdin, but if no input is piped or | |
| redirected to stdin, this program will instead | |
| print the existing ToC of the PDF file | |
| -v, --vpos if this flag is set, the vertical position of | |
| each heading will be dumped to the output | |
| -p, --print when flag is set, print the existing ToC in | |
| the input PDF file. this flag is usually not | |
| necessary, since it is the default behavior | |
| when no input is given | |
| -H, --human-readable print the toc in a readable format | |
| -o, --out=file.pdf path to the output file. if this flag is not | |
| specified, the default is {input}_out.pdf | |
| -g, --debug enable debug mode | |
| -V, --version show version number | |
| [1]: https://krasjet.com/voice/pdf.tocgen/#step-1-build-a-recipe | |
| """.strip() | |
| def main(): | |
| # parse arguments | |
| try: | |
| opts, args = getopt.gnu_getopt( | |
| sys.argv[1:], | |
| "hvt:pHo:gV", | |
| ["help", "vpos", "toc=", "print", "human-readable", "out=", "debug", "version"] | |
| ) | |
| except GetoptError as e: | |
| print(e, file=sys.stderr) | |
| print(usage_s, file=sys.stderr) | |
| sys.exit(2) | |
| toc_file: TextIO = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8', errors='ignore') | |
| print_toc: bool = False | |
| readable: bool = False | |
| out: Optional[str] = None | |
| vpos: bool = False | |
| debug: bool = False | |
| for o, a in opts: | |
| if o in ("-H", "--human-readable"): | |
| readable = True | |
| elif o in ("-p", "--print"): | |
| print_toc = True | |
| elif o in ("-v", "--vpos"): | |
| vpos = True | |
| elif o in ("-t", "--toc"): | |
| try: | |
| toc_file = open(a, "r", encoding=get_file_encoding(a)) | |
| except IOError as e: | |
| print("error: can't open file for reading", file=sys.stderr) | |
| print(e, file=sys.stderr) | |
| sys.exit(1) | |
| elif o in ("-o", "--out"): | |
| out = a | |
| elif o in ("-g", "--debug"): | |
| debug = True | |
| elif o in ("-V", "--version"): | |
| print("pdftocio", pdftocio.__version__, file=sys.stderr) | |
| sys.exit() | |
| elif o in ("-h", "--help"): | |
| print(help_s, file=sys.stderr) | |
| sys.exit() | |
| if len(args) < 1: | |
| print("error: no input pdf is given", file=sys.stderr) | |
| print(usage_s, file=sys.stderr) | |
| sys.exit(1) | |
| path_in: str = args[0] | |
| # done parsing arguments | |
| try: | |
| with open_pdf(path_in) as doc: | |
| if toc_file.isatty() or print_toc: | |
| # no input from user, switch to output mode and extract the toc | |
| # of pdf | |
| toc = read_toc(doc) | |
| if len(toc) == 0: | |
| print("error: no table of contents found", file=sys.stderr) | |
| sys.exit(1) | |
| stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='ignore') | |
| if readable: | |
| print(pprint_toc(toc), file=stdout) | |
| else: | |
| print(dump_toc(toc, vpos), end="", file=stdout) | |
| sys.exit(0) | |
| # an input is given, so switch to input mode | |
| toc = parse_toc(toc_file) | |
| write_toc(doc, toc) | |
| if out is None: | |
| # add suffix to input name as output | |
| pfx, ext = os.path.splitext(path_in) | |
| out = f"{pfx}_out{ext}" | |
| doc.save(out) | |
| except ValueError as e: | |
| if debug: | |
| raise e | |
| print("error:", e, file=sys.stderr) | |
| sys.exit(1) | |
| except IOError as e: | |
| if debug: | |
| raise e | |
| print("error: unable to open file", file=sys.stderr) | |
| print(e, file=sys.stderr) | |
| sys.exit(1) | |
| except IndexError as e: | |
| if debug: | |
| raise e | |
| print("index error:", e, file=sys.stderr) | |
| sys.exit(1) | |
| except KeyboardInterrupt as e: | |
| if debug: | |
| raise e | |
| print("error: interrupted", file=sys.stderr) | |
| sys.exit(1) | |