Spaces:
Sleeping
Sleeping
File size: 5,981 Bytes
046e3b8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 | """The executable of pdftocio"""
import sys
import os.path
import pdftocio
import getopt
import io
from typing import Optional, TextIO
from getopt import GetoptError
from fitzutils import open_pdf, dump_toc, pprint_toc, get_file_encoding
from .tocparser import parse_toc
from .tocio import write_toc, read_toc
usage_s = """
usage: pdftocio [options] in.pdf < toc
pdftocio [options] in.pdf
""".strip()
help_s = r"""
usage: pdftocio [options] in.pdf < toc
pdftocio [options] in.pdf
Import/output the table of contents of a PDF file.
This command can operate in two ways: it can either be used
to extract the table of contents of a PDF, or import table
of contents to a PDF using the output of pdftocgen.
1. To extract the table of contents of a PDF for
modification, only supply a input file:
$ pdftocio in.pdf
or if you want to print it in a readable format, use the
-H flag:
$ pdftocio -H in.pdf
2. To import a table of contents to a PDF using the toc file
generated by pdftocgen, use input redirection,
$ pdftocio in.pdf < toc
pipes,
$ pdftocgen -r recipe.toml in.pdf | pdftocio in.pdf
or the -t flag
$ pdftocio -t toc in.pdf
to supply the toc file. If you want to specify an output
file name, use the -o option
$ pdftocio -t toc -o out.pdf in.pdf
arguments
in.pdf path to the input PDF document
options
-h, --help show help
-t, --toc=toc path to the table of contents generated by
pdftocgen. if this option is not given, the
default is stdin, but if no input is piped or
redirected to stdin, this program will instead
print the existing ToC of the PDF file
-v, --vpos if this flag is set, the vertical position of
each heading will be dumped to the output
-p, --print when flag is set, print the existing ToC in
the input PDF file. this flag is usually not
necessary, since it is the default behavior
when no input is given
-H, --human-readable print the toc in a readable format
-o, --out=file.pdf path to the output file. if this flag is not
specified, the default is {input}_out.pdf
-g, --debug enable debug mode
-V, --version show version number
[1]: https://krasjet.com/voice/pdf.tocgen/#step-1-build-a-recipe
""".strip()
def main():
# parse arguments
try:
opts, args = getopt.gnu_getopt(
sys.argv[1:],
"hvt:pHo:gV",
["help", "vpos", "toc=", "print", "human-readable", "out=", "debug", "version"]
)
except GetoptError as e:
print(e, file=sys.stderr)
print(usage_s, file=sys.stderr)
sys.exit(2)
toc_file: TextIO = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8', errors='ignore')
print_toc: bool = False
readable: bool = False
out: Optional[str] = None
vpos: bool = False
debug: bool = False
for o, a in opts:
if o in ("-H", "--human-readable"):
readable = True
elif o in ("-p", "--print"):
print_toc = True
elif o in ("-v", "--vpos"):
vpos = True
elif o in ("-t", "--toc"):
try:
toc_file = open(a, "r", encoding=get_file_encoding(a))
except IOError as e:
print("error: can't open file for reading", file=sys.stderr)
print(e, file=sys.stderr)
sys.exit(1)
elif o in ("-o", "--out"):
out = a
elif o in ("-g", "--debug"):
debug = True
elif o in ("-V", "--version"):
print("pdftocio", pdftocio.__version__, file=sys.stderr)
sys.exit()
elif o in ("-h", "--help"):
print(help_s, file=sys.stderr)
sys.exit()
if len(args) < 1:
print("error: no input pdf is given", file=sys.stderr)
print(usage_s, file=sys.stderr)
sys.exit(1)
path_in: str = args[0]
# done parsing arguments
try:
with open_pdf(path_in) as doc:
if toc_file.isatty() or print_toc:
# no input from user, switch to output mode and extract the toc
# of pdf
toc = read_toc(doc)
if len(toc) == 0:
print("error: no table of contents found", file=sys.stderr)
sys.exit(1)
stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='ignore')
if readable:
print(pprint_toc(toc), file=stdout)
else:
print(dump_toc(toc, vpos), end="", file=stdout)
sys.exit(0)
# an input is given, so switch to input mode
toc = parse_toc(toc_file)
write_toc(doc, toc)
if out is None:
# add suffix to input name as output
pfx, ext = os.path.splitext(path_in)
out = f"{pfx}_out{ext}"
doc.save(out)
except ValueError as e:
if debug:
raise e
print("error:", e, file=sys.stderr)
sys.exit(1)
except IOError as e:
if debug:
raise e
print("error: unable to open file", file=sys.stderr)
print(e, file=sys.stderr)
sys.exit(1)
except IndexError as e:
if debug:
raise e
print("index error:", e, file=sys.stderr)
sys.exit(1)
except KeyboardInterrupt as e:
if debug:
raise e
print("error: interrupted", file=sys.stderr)
sys.exit(1)
|