adelevett commited on
Commit
d600971
·
verified ·
1 Parent(s): 92cc6a2

Upload 6 files

Browse files
Files changed (1) hide show
  1. pdftocgen/app.py +158 -0
pdftocgen/app.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """The executable of pdftocgen"""
2
+
3
+ import toml
4
+ import sys
5
+ import getopt
6
+ import pdftocgen
7
+ import io
8
+
9
+ from getopt import GetoptError
10
+ from typing import TextIO
11
+ from fitzutils import open_pdf, dump_toc, pprint_toc, get_file_encoding
12
+ from .tocgen import gen_toc
13
+
14
+ usage_s = """
15
+ usage: pdftocgen [options] doc.pdf < recipe.toml
16
+ """.strip()
17
+
18
+ help_s = """
19
+ usage: pdftocgen [options] doc.pdf < recipe.toml
20
+
21
+ Generate PDF table of contents from a recipe file.
22
+
23
+ This command automatically generates a table of contents for
24
+ doc.pdf based on the font attributes and position of
25
+ headings specified in a TOML recipe file. See [1] for an
26
+ introduction to recipe files.
27
+
28
+ To generate the table of contents for a pdf, use input
29
+ redirection or pipes to supply a recipe file
30
+
31
+ $ pdftocgen in.pdf < recipe.toml
32
+
33
+ or alternatively use the -r flag
34
+
35
+ $ pdftocgen -r recipe.toml in.pdf
36
+
37
+ The output of this command can be directly piped into
38
+ pdftocio to generate a new pdf file using the generated
39
+ table of contents
40
+
41
+ $ pdftocgen -r recipe.toml in.pdf | pdftocio -o out.pdf in.pdf
42
+
43
+ or you could save the output of this command to a file for
44
+ further tweaking using output redirection
45
+
46
+ $ pdftocgen -r recipe.toml in.pdf > toc
47
+
48
+ or the -o flag:
49
+
50
+ $ pdftocgen -r recipe.toml -o toc in.pdf
51
+
52
+ If you only need a readable format of the table of contents,
53
+ use the -H flag
54
+
55
+ $ pdftocgen -r recipe.toml -H in.pdf
56
+
57
+ This format cannot be parsed by pdftocio, but it is slightly
58
+ more readable.
59
+
60
+ arguments
61
+ doc.pdf path to the input PDF document
62
+
63
+ options
64
+ -h, --help show help
65
+ -r, --recipe=recipe.toml path to the recipe file. if this flag is
66
+ not specified, the default is stdin
67
+ -H, --human-readable print the toc in a readable format
68
+ -v, --vpos if this flag is set, the vertical position
69
+ of each heading will be generated in the
70
+ output
71
+ -o, --out=file path to the output file. if this flag is
72
+ not specified, the default is stdout
73
+ -g, --debug enable debug mode
74
+ -V, --version show version number
75
+
76
+ [1]: https://krasjet.com/voice/pdf.tocgen/#step-1-build-a-recipe
77
+ """.strip()
78
+
79
+
80
+ def main():
81
+ # parse arguments
82
+ try:
83
+ opts, args = getopt.gnu_getopt(
84
+ sys.argv[1:],
85
+ "hr:Hvo:gV",
86
+ ["help", "recipe=", "human-readable", "vpos", "out=", "debug", "version"]
87
+ )
88
+ except GetoptError as e:
89
+ print(e, file=sys.stderr)
90
+ print(usage_s, file=sys.stderr)
91
+ sys.exit(2)
92
+
93
+ recipe_file: TextIO = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8', errors='ignore')
94
+ readable: bool = False
95
+ vpos: bool = False
96
+ out: TextIO = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='ignore')
97
+ debug: bool = False
98
+
99
+ for o, a in opts:
100
+ if o in ("-H", "--human-readable"):
101
+ readable = True
102
+ elif o in ("-v", "--vpos"):
103
+ vpos = True
104
+ elif o in ("-r", "--recipe"):
105
+ try:
106
+ recipe_file = open(a, "r", encoding=get_file_encoding(a))
107
+ except IOError as e:
108
+ print("error: can't open file for reading", file=sys.stderr)
109
+ print(e, file=sys.stderr)
110
+ sys.exit(1)
111
+ elif o in ("-o", "--out"):
112
+ try:
113
+ out = open(a, "w", encoding='utf-8', errors='ignore')
114
+ except IOError as e:
115
+ print("error: can't open file for writing", file=sys.stderr)
116
+ print(e, file=sys.stderr)
117
+ sys.exit(1)
118
+ elif o in ("-g", "--debug"):
119
+ debug = True
120
+ elif o in ("-V", "--version"):
121
+ print("pdftocgen", pdftocgen.__version__, file=sys.stderr)
122
+ sys.exit()
123
+ elif o in ("-h", "--help"):
124
+ print(help_s, file=sys.stderr)
125
+ sys.exit()
126
+
127
+ if len(args) < 1:
128
+ print("error: no input pdf is given", file=sys.stderr)
129
+ print(usage_s, file=sys.stderr)
130
+ sys.exit(1)
131
+
132
+ path_in: str = args[0]
133
+ # done parsing arguments
134
+
135
+ try:
136
+ with open_pdf(path_in) as doc:
137
+ recipe = toml.load(recipe_file)
138
+ toc = gen_toc(doc, recipe)
139
+ if readable:
140
+ print(pprint_toc(toc), file=out)
141
+ else:
142
+ print(dump_toc(toc, vpos), end="", file=out)
143
+ except ValueError as e:
144
+ if debug:
145
+ raise e
146
+ print("error:", e, file=sys.stderr)
147
+ sys.exit(1)
148
+ except IOError as e:
149
+ if debug:
150
+ raise e
151
+ print("error: unable to open file", file=sys.stderr)
152
+ print(e, file=sys.stderr)
153
+ sys.exit(1)
154
+ except KeyboardInterrupt as e:
155
+ if debug:
156
+ raise e
157
+ print("error: interrupted", file=sys.stderr)
158
+ sys.exit(1)