File size: 5,981 Bytes
046e3b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
"""The executable of pdftocio"""

import sys
import os.path
import pdftocio
import getopt
import io

from typing import Optional, TextIO
from getopt import GetoptError
from fitzutils import open_pdf, dump_toc, pprint_toc, get_file_encoding
from .tocparser import parse_toc
from .tocio import write_toc, read_toc

usage_s = """

usage: pdftocio [options] in.pdf < toc

       pdftocio [options] in.pdf

""".strip()

help_s = r"""

usage: pdftocio [options] in.pdf < toc

       pdftocio [options] in.pdf



Import/output the table of contents of a PDF file.



This command can operate in two ways: it can either be used

to extract the table of contents of a PDF, or import table

of contents to a PDF using the output of pdftocgen.



1. To extract the table of contents of a PDF for

   modification, only supply a input file:



     $ pdftocio in.pdf



   or if you want to print it in a readable format, use the

   -H flag:



     $ pdftocio -H in.pdf



2. To import a table of contents to a PDF using the toc file

   generated by pdftocgen, use input redirection,



     $ pdftocio in.pdf < toc



   pipes,



     $ pdftocgen -r recipe.toml in.pdf | pdftocio in.pdf



   or the -t flag



     $ pdftocio -t toc in.pdf



   to supply the toc file. If you want to specify an output

   file name, use the -o option



     $ pdftocio -t toc -o out.pdf in.pdf



arguments

  in.pdf                path to the input PDF document



options

  -h, --help            show help

  -t, --toc=toc         path to the table of contents generated by

                        pdftocgen. if this option is not given, the

                        default is stdin, but if no input is piped or

                        redirected to stdin, this program will instead

                        print the existing ToC of the PDF file

  -v, --vpos            if this flag is set, the vertical position of

                        each heading will be dumped to the output

  -p, --print           when flag is set, print the existing ToC in

                        the input PDF file. this flag is usually not

                        necessary, since it is the default behavior

                        when no input is given

  -H, --human-readable  print the toc in a readable format

  -o, --out=file.pdf    path to the output file. if this flag is not

                        specified, the default is {input}_out.pdf

  -g, --debug           enable debug mode

  -V, --version         show version number



[1]: https://krasjet.com/voice/pdf.tocgen/#step-1-build-a-recipe

""".strip()


def main():
    # parse arguments
    try:
        opts, args = getopt.gnu_getopt(
            sys.argv[1:],
            "hvt:pHo:gV",
            ["help", "vpos", "toc=", "print", "human-readable", "out=", "debug", "version"]
        )
    except GetoptError as e:
        print(e, file=sys.stderr)
        print(usage_s, file=sys.stderr)
        sys.exit(2)

    toc_file: TextIO = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8', errors='ignore')
    print_toc: bool = False
    readable: bool = False
    out: Optional[str] = None
    vpos: bool = False
    debug: bool = False

    for o, a in opts:
        if o in ("-H", "--human-readable"):
            readable = True
        elif o in ("-p", "--print"):
            print_toc = True
        elif o in ("-v", "--vpos"):
            vpos = True
        elif o in ("-t", "--toc"):
            try:
                toc_file = open(a, "r", encoding=get_file_encoding(a))
            except IOError as e:
                print("error: can't open file for reading", file=sys.stderr)
                print(e, file=sys.stderr)
                sys.exit(1)
        elif o in ("-o", "--out"):
            out = a
        elif o in ("-g", "--debug"):
            debug = True
        elif o in ("-V", "--version"):
            print("pdftocio", pdftocio.__version__, file=sys.stderr)
            sys.exit()
        elif o in ("-h", "--help"):
            print(help_s, file=sys.stderr)
            sys.exit()

    if len(args) < 1:
        print("error: no input pdf is given", file=sys.stderr)
        print(usage_s, file=sys.stderr)
        sys.exit(1)

    path_in: str = args[0]
    # done parsing arguments

    try:
        with open_pdf(path_in) as doc:
            if toc_file.isatty() or print_toc:
                # no input from user, switch to output mode and extract the toc
                # of pdf
                toc = read_toc(doc)
                if len(toc) == 0:
                    print("error: no table of contents found", file=sys.stderr)
                    sys.exit(1)

                stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='ignore')

                if readable:
                    print(pprint_toc(toc), file=stdout)
                else:
                    print(dump_toc(toc, vpos), end="", file=stdout)
                sys.exit(0)

            # an input is given, so switch to input mode
            toc = parse_toc(toc_file)
            write_toc(doc, toc)

            if out is None:
                # add suffix to input name as output
                pfx, ext = os.path.splitext(path_in)
                out = f"{pfx}_out{ext}"
            doc.save(out)
    except ValueError as e:
        if debug:
            raise e
        print("error:", e, file=sys.stderr)
        sys.exit(1)
    except IOError as e:
        if debug:
            raise e
        print("error: unable to open file", file=sys.stderr)
        print(e, file=sys.stderr)
        sys.exit(1)
    except IndexError as e:
        if debug:
            raise e
        print("index error:", e, file=sys.stderr)
        sys.exit(1)
    except KeyboardInterrupt as e:
        if debug:
            raise e
        print("error: interrupted", file=sys.stderr)
        sys.exit(1)