adelevett's picture
Upload 6 files
d600971 verified
"""The executable of pdftocgen"""
import toml
import sys
import getopt
import pdftocgen
import io
from getopt import GetoptError
from typing import TextIO
from fitzutils import open_pdf, dump_toc, pprint_toc, get_file_encoding
from .tocgen import gen_toc
usage_s = """
usage: pdftocgen [options] doc.pdf < recipe.toml
""".strip()
help_s = """
usage: pdftocgen [options] doc.pdf < recipe.toml
Generate PDF table of contents from a recipe file.
This command automatically generates a table of contents for
doc.pdf based on the font attributes and position of
headings specified in a TOML recipe file. See [1] for an
introduction to recipe files.
To generate the table of contents for a pdf, use input
redirection or pipes to supply a recipe file
$ pdftocgen in.pdf < recipe.toml
or alternatively use the -r flag
$ pdftocgen -r recipe.toml in.pdf
The output of this command can be directly piped into
pdftocio to generate a new pdf file using the generated
table of contents
$ pdftocgen -r recipe.toml in.pdf | pdftocio -o out.pdf in.pdf
or you could save the output of this command to a file for
further tweaking using output redirection
$ pdftocgen -r recipe.toml in.pdf > toc
or the -o flag:
$ pdftocgen -r recipe.toml -o toc in.pdf
If you only need a readable format of the table of contents,
use the -H flag
$ pdftocgen -r recipe.toml -H in.pdf
This format cannot be parsed by pdftocio, but it is slightly
more readable.
arguments
doc.pdf path to the input PDF document
options
-h, --help show help
-r, --recipe=recipe.toml path to the recipe file. if this flag is
not specified, the default is stdin
-H, --human-readable print the toc in a readable format
-v, --vpos if this flag is set, the vertical position
of each heading will be generated in the
output
-o, --out=file path to the output file. if this flag is
not specified, the default is stdout
-g, --debug enable debug mode
-V, --version show version number
[1]: https://krasjet.com/voice/pdf.tocgen/#step-1-build-a-recipe
""".strip()
def main():
# parse arguments
try:
opts, args = getopt.gnu_getopt(
sys.argv[1:],
"hr:Hvo:gV",
["help", "recipe=", "human-readable", "vpos", "out=", "debug", "version"]
)
except GetoptError as e:
print(e, file=sys.stderr)
print(usage_s, file=sys.stderr)
sys.exit(2)
recipe_file: TextIO = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8', errors='ignore')
readable: bool = False
vpos: bool = False
out: TextIO = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='ignore')
debug: bool = False
for o, a in opts:
if o in ("-H", "--human-readable"):
readable = True
elif o in ("-v", "--vpos"):
vpos = True
elif o in ("-r", "--recipe"):
try:
recipe_file = open(a, "r", encoding=get_file_encoding(a))
except IOError as e:
print("error: can't open file for reading", file=sys.stderr)
print(e, file=sys.stderr)
sys.exit(1)
elif o in ("-o", "--out"):
try:
out = open(a, "w", encoding='utf-8', errors='ignore')
except IOError as e:
print("error: can't open file for writing", file=sys.stderr)
print(e, file=sys.stderr)
sys.exit(1)
elif o in ("-g", "--debug"):
debug = True
elif o in ("-V", "--version"):
print("pdftocgen", pdftocgen.__version__, file=sys.stderr)
sys.exit()
elif o in ("-h", "--help"):
print(help_s, file=sys.stderr)
sys.exit()
if len(args) < 1:
print("error: no input pdf is given", file=sys.stderr)
print(usage_s, file=sys.stderr)
sys.exit(1)
path_in: str = args[0]
# done parsing arguments
try:
with open_pdf(path_in) as doc:
recipe = toml.load(recipe_file)
toc = gen_toc(doc, recipe)
if readable:
print(pprint_toc(toc), file=out)
else:
print(dump_toc(toc, vpos), end="", file=out)
except ValueError as e:
if debug:
raise e
print("error:", e, file=sys.stderr)
sys.exit(1)
except IOError as e:
if debug:
raise e
print("error: unable to open file", file=sys.stderr)
print(e, file=sys.stderr)
sys.exit(1)
except KeyboardInterrupt as e:
if debug:
raise e
print("error: interrupted", file=sys.stderr)
sys.exit(1)