adelevett's picture
Upload 76 files
046e3b8 verified
"""The executable of pdftocio"""
import sys
import os.path
import pdftocio
import getopt
import io
from typing import Optional, TextIO
from getopt import GetoptError
from fitzutils import open_pdf, dump_toc, pprint_toc, get_file_encoding
from .tocparser import parse_toc
from .tocio import write_toc, read_toc
usage_s = """
usage: pdftocio [options] in.pdf < toc
pdftocio [options] in.pdf
""".strip()
help_s = r"""
usage: pdftocio [options] in.pdf < toc
pdftocio [options] in.pdf
Import/output the table of contents of a PDF file.
This command can operate in two ways: it can either be used
to extract the table of contents of a PDF, or import table
of contents to a PDF using the output of pdftocgen.
1. To extract the table of contents of a PDF for
modification, only supply a input file:
$ pdftocio in.pdf
or if you want to print it in a readable format, use the
-H flag:
$ pdftocio -H in.pdf
2. To import a table of contents to a PDF using the toc file
generated by pdftocgen, use input redirection,
$ pdftocio in.pdf < toc
pipes,
$ pdftocgen -r recipe.toml in.pdf | pdftocio in.pdf
or the -t flag
$ pdftocio -t toc in.pdf
to supply the toc file. If you want to specify an output
file name, use the -o option
$ pdftocio -t toc -o out.pdf in.pdf
arguments
in.pdf path to the input PDF document
options
-h, --help show help
-t, --toc=toc path to the table of contents generated by
pdftocgen. if this option is not given, the
default is stdin, but if no input is piped or
redirected to stdin, this program will instead
print the existing ToC of the PDF file
-v, --vpos if this flag is set, the vertical position of
each heading will be dumped to the output
-p, --print when flag is set, print the existing ToC in
the input PDF file. this flag is usually not
necessary, since it is the default behavior
when no input is given
-H, --human-readable print the toc in a readable format
-o, --out=file.pdf path to the output file. if this flag is not
specified, the default is {input}_out.pdf
-g, --debug enable debug mode
-V, --version show version number
[1]: https://krasjet.com/voice/pdf.tocgen/#step-1-build-a-recipe
""".strip()
def main():
# parse arguments
try:
opts, args = getopt.gnu_getopt(
sys.argv[1:],
"hvt:pHo:gV",
["help", "vpos", "toc=", "print", "human-readable", "out=", "debug", "version"]
)
except GetoptError as e:
print(e, file=sys.stderr)
print(usage_s, file=sys.stderr)
sys.exit(2)
toc_file: TextIO = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8', errors='ignore')
print_toc: bool = False
readable: bool = False
out: Optional[str] = None
vpos: bool = False
debug: bool = False
for o, a in opts:
if o in ("-H", "--human-readable"):
readable = True
elif o in ("-p", "--print"):
print_toc = True
elif o in ("-v", "--vpos"):
vpos = True
elif o in ("-t", "--toc"):
try:
toc_file = open(a, "r", encoding=get_file_encoding(a))
except IOError as e:
print("error: can't open file for reading", file=sys.stderr)
print(e, file=sys.stderr)
sys.exit(1)
elif o in ("-o", "--out"):
out = a
elif o in ("-g", "--debug"):
debug = True
elif o in ("-V", "--version"):
print("pdftocio", pdftocio.__version__, file=sys.stderr)
sys.exit()
elif o in ("-h", "--help"):
print(help_s, file=sys.stderr)
sys.exit()
if len(args) < 1:
print("error: no input pdf is given", file=sys.stderr)
print(usage_s, file=sys.stderr)
sys.exit(1)
path_in: str = args[0]
# done parsing arguments
try:
with open_pdf(path_in) as doc:
if toc_file.isatty() or print_toc:
# no input from user, switch to output mode and extract the toc
# of pdf
toc = read_toc(doc)
if len(toc) == 0:
print("error: no table of contents found", file=sys.stderr)
sys.exit(1)
stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='ignore')
if readable:
print(pprint_toc(toc), file=stdout)
else:
print(dump_toc(toc, vpos), end="", file=stdout)
sys.exit(0)
# an input is given, so switch to input mode
toc = parse_toc(toc_file)
write_toc(doc, toc)
if out is None:
# add suffix to input name as output
pfx, ext = os.path.splitext(path_in)
out = f"{pfx}_out{ext}"
doc.save(out)
except ValueError as e:
if debug:
raise e
print("error:", e, file=sys.stderr)
sys.exit(1)
except IOError as e:
if debug:
raise e
print("error: unable to open file", file=sys.stderr)
print(e, file=sys.stderr)
sys.exit(1)
except IndexError as e:
if debug:
raise e
print("index error:", e, file=sys.stderr)
sys.exit(1)
except KeyboardInterrupt as e:
if debug:
raise e
print("error: interrupted", file=sys.stderr)
sys.exit(1)