Spaces:
Sleeping
Sleeping
File size: 5,038 Bytes
d600971 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 | """The executable of pdftocgen"""
import toml
import sys
import getopt
import pdftocgen
import io
from getopt import GetoptError
from typing import TextIO
from fitzutils import open_pdf, dump_toc, pprint_toc, get_file_encoding
from .tocgen import gen_toc
usage_s = """
usage: pdftocgen [options] doc.pdf < recipe.toml
""".strip()
help_s = """
usage: pdftocgen [options] doc.pdf < recipe.toml
Generate PDF table of contents from a recipe file.
This command automatically generates a table of contents for
doc.pdf based on the font attributes and position of
headings specified in a TOML recipe file. See [1] for an
introduction to recipe files.
To generate the table of contents for a pdf, use input
redirection or pipes to supply a recipe file
$ pdftocgen in.pdf < recipe.toml
or alternatively use the -r flag
$ pdftocgen -r recipe.toml in.pdf
The output of this command can be directly piped into
pdftocio to generate a new pdf file using the generated
table of contents
$ pdftocgen -r recipe.toml in.pdf | pdftocio -o out.pdf in.pdf
or you could save the output of this command to a file for
further tweaking using output redirection
$ pdftocgen -r recipe.toml in.pdf > toc
or the -o flag:
$ pdftocgen -r recipe.toml -o toc in.pdf
If you only need a readable format of the table of contents,
use the -H flag
$ pdftocgen -r recipe.toml -H in.pdf
This format cannot be parsed by pdftocio, but it is slightly
more readable.
arguments
doc.pdf path to the input PDF document
options
-h, --help show help
-r, --recipe=recipe.toml path to the recipe file. if this flag is
not specified, the default is stdin
-H, --human-readable print the toc in a readable format
-v, --vpos if this flag is set, the vertical position
of each heading will be generated in the
output
-o, --out=file path to the output file. if this flag is
not specified, the default is stdout
-g, --debug enable debug mode
-V, --version show version number
[1]: https://krasjet.com/voice/pdf.tocgen/#step-1-build-a-recipe
""".strip()
def main():
# parse arguments
try:
opts, args = getopt.gnu_getopt(
sys.argv[1:],
"hr:Hvo:gV",
["help", "recipe=", "human-readable", "vpos", "out=", "debug", "version"]
)
except GetoptError as e:
print(e, file=sys.stderr)
print(usage_s, file=sys.stderr)
sys.exit(2)
recipe_file: TextIO = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8', errors='ignore')
readable: bool = False
vpos: bool = False
out: TextIO = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='ignore')
debug: bool = False
for o, a in opts:
if o in ("-H", "--human-readable"):
readable = True
elif o in ("-v", "--vpos"):
vpos = True
elif o in ("-r", "--recipe"):
try:
recipe_file = open(a, "r", encoding=get_file_encoding(a))
except IOError as e:
print("error: can't open file for reading", file=sys.stderr)
print(e, file=sys.stderr)
sys.exit(1)
elif o in ("-o", "--out"):
try:
out = open(a, "w", encoding='utf-8', errors='ignore')
except IOError as e:
print("error: can't open file for writing", file=sys.stderr)
print(e, file=sys.stderr)
sys.exit(1)
elif o in ("-g", "--debug"):
debug = True
elif o in ("-V", "--version"):
print("pdftocgen", pdftocgen.__version__, file=sys.stderr)
sys.exit()
elif o in ("-h", "--help"):
print(help_s, file=sys.stderr)
sys.exit()
if len(args) < 1:
print("error: no input pdf is given", file=sys.stderr)
print(usage_s, file=sys.stderr)
sys.exit(1)
path_in: str = args[0]
# done parsing arguments
try:
with open_pdf(path_in) as doc:
recipe = toml.load(recipe_file)
toc = gen_toc(doc, recipe)
if readable:
print(pprint_toc(toc), file=out)
else:
print(dump_toc(toc, vpos), end="", file=out)
except ValueError as e:
if debug:
raise e
print("error:", e, file=sys.stderr)
sys.exit(1)
except IOError as e:
if debug:
raise e
print("error: unable to open file", file=sys.stderr)
print(e, file=sys.stderr)
sys.exit(1)
except KeyboardInterrupt as e:
if debug:
raise e
print("error: interrupted", file=sys.stderr)
sys.exit(1)
|