File size: 7,940 Bytes
046e3b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import os
import fitz
import toml

from mamba import description, it, before
from fitzutils import ToCEntry
from pdftocgen.tocgen import gen_toc

dirpath = os.path.dirname(os.path.abspath(__file__))

with description("gen_toc") as self:
    with before.all:
        self.level2 = fitz.open(os.path.join(dirpath, "files/level2.pdf"))
        self.level2_recipe = toml.load(
            open(os.path.join(dirpath, "files/level2_recipe.toml"))
        )
        self.level2_expect = [
            ToCEntry(level=1, title='1 Section One',
                     pagenum=1, vpos=237.6484375),
            ToCEntry(level=1, title='2 Section Two',
                     pagenum=1, vpos=567.3842163085938),
            ToCEntry(level=2, title='2.1 Subsection Two.One',
                     pagenum=2, vpos=452.56671142578125),
            ToCEntry(level=1,
                     title='3 Section Three, with looong loooong looong ti- tle',
                     pagenum=3, vpos=335.569580078125),
            ToCEntry(level=2, title='3.1 Subsection Three.One, '
                     'with even loooooooooooonger title, and probably even more',
                     pagenum=3, vpos=619.4886474609375),
            ToCEntry(level=2, title='3.2 Subsection Three.Two',
                     pagenum=4, vpos=512.3426513671875),
            ToCEntry(level=2, title='3.3 Subsection Three.Three',
                     pagenum=5, vpos=125.79861450195312),
            ToCEntry(level=1, title='4 The End',
                     pagenum=5, vpos=366.62347412109375)
        ]

        self.onepage = fitz.open(os.path.join(dirpath, "files/onepage.pdf"))
        self.onepage_recipe = toml.load(
            open(os.path.join(dirpath, "files/onepage_recipe.toml"))
        )
        self.onepage_greedy = toml.load(
            open(os.path.join(dirpath, "files/onepage_greedy.toml"))
        )
        self.onepage_expect = [
            # false positive, but easy to remove in post-processing
            ToCEntry(level=2, title='krasjet',
                     pagenum=1, vpos=196.53366088867188),
            ToCEntry(level=1, title='1 Section One',
                     pagenum=1, vpos=237.6484375),
            ToCEntry(level=1, title='2 Section Two',
                     pagenum=1, vpos=265.44744873046875),
            ToCEntry(level=2, title='2.1 Subsection Two.One',
                     pagenum=1, vpos=291.0536804199219),
            ToCEntry(level=2, title='2.2 Subsection Two.Two \xd7 2',
                     pagenum=1, vpos=311.1368103027344),
            ToCEntry(level=1, title='3 Section Three, with looong loooong looong ti- tle',
                     pagenum=1, vpos=334.00946044921875),
            ToCEntry(level=2, title='3.1 Subsection Three.One, '
                     'with even loooooooooooonger title, and probably even more',
                     pagenum=1, vpos=377.5487060546875),
            ToCEntry(level=2, title='3.2 Subsection Three.Two',
                     pagenum=1, vpos=411.8786926269531),
            ToCEntry(level=2, title='3.3 Subsection Three.Three',
                     pagenum=1, vpos=432.26068115234375),
            ToCEntry(level=3, title='3.3.1 Subsubsection Three.Three.One',
                     pagenum=1, vpos=452.1441345214844),
            ToCEntry(level=3, title='3.3.2 Subsubsection Three.Three.Two',
                     pagenum=1, vpos=470.53314208984375),
            ToCEntry(level=3, title='3.3.3 Subsubsection Three.Three.Three',
                     pagenum=1, vpos=488.9231262207031),
            ToCEntry(level=2, title='3.4 Subsection Three.Four',
                     pagenum=1, vpos=507.8106994628906),
            ToCEntry(level=2, title='3.5 Subsection Three.Five',
                     pagenum=1, vpos=528.191650390625),
            ToCEntry(level=1, title='4 The End',
                     pagenum=1, vpos=550.7654418945312)
        ]

        self.onepage_greedy_expect = [
            # hooray, no more false positives
            ToCEntry(level=1, title='1 Section One',
                     pagenum=1, vpos=237.6484375),
            ToCEntry(level=1, title='2 Section Two',
                     pagenum=1, vpos=265.44744873046875),
            ToCEntry(level=2, title='2.1 Subsection Two.One',
                     pagenum=1, vpos=291.0536804199219),
            ToCEntry(level=2, title='2.2 Subsection Two.Two \xd7 2',
                     pagenum=1, vpos=311.1368103027344),
            ToCEntry(level=1, title='3 Section Three, with looong loooong looong ti- tle',
                     pagenum=1, vpos=334.00946044921875),
            ToCEntry(level=2, title='3.1 Subsection Three.One, '
                     'with even loooooooooooonger title, and probably even more',
                     pagenum=1, vpos=377.5487060546875),
            ToCEntry(level=2, title='3.2 Subsection Three.Two',
                     pagenum=1, vpos=411.8786926269531),
            ToCEntry(level=2, title='3.3 Subsection Three.Three',
                     pagenum=1, vpos=432.26068115234375),
            ToCEntry(level=3, title='3.3.1 Subsubsection Three.Three.One',
                     pagenum=1, vpos=452.1441345214844),
            ToCEntry(level=3, title='3.3.2 Subsubsection Three.Three.Two',
                     pagenum=1, vpos=470.53314208984375),
            ToCEntry(level=3, title='3.3.3 Subsubsection Three.Three.Three',
                     pagenum=1, vpos=488.9231262207031),
            ToCEntry(level=2, title='3.4 Subsection Three.Four',
                     pagenum=1, vpos=507.8106994628906),
            ToCEntry(level=2, title='3.5 Subsection Three.Five',
                     pagenum=1, vpos=528.191650390625),
            ToCEntry(level=1, title='4 The End',
                     pagenum=1, vpos=550.7654418945312)
        ]

        self.hardmode = fitz.open(os.path.join(dirpath, "files/hardmode.pdf"))
        self.hardmode_recipe = toml.load(
            open(os.path.join(dirpath, "files/hardmode_recipe.toml"))
        )

        self.hardmode_expect = [
            ToCEntry(level=1, title='1 Section One',
                     pagenum=1, vpos=174.1232452392578),
            ToCEntry(level=1, title='2 Section 1 + 1 = 2',
                     pagenum=1, vpos=584.5831909179688),
            ToCEntry(level=2, title='2.1 Subsection Two.One',
                     pagenum=1, vpos=425.2061462402344),
            ToCEntry(level=1, title='e ln(3)',
                     pagenum=2, vpos=516.01708984375),
            ToCEntry(level=2, title='3.1 Subsection e ln(3) .1, '
                     'with looo- ooooooooong title',
                     pagenum=2, vpos=302.5021057128906),
            ToCEntry(level=2, title='3.2 S ubsection Three.Two, another long title',
                     pagenum=3, vpos=396.212158203125),
            ToCEntry(level=2, title='3.3 Subsection Three.Three',
                     pagenum=3, vpos=68.84815979003906),
            ToCEntry(level=1, title='4 The x → ∞ End',
                     pagenum=3, vpos=483.49920654296875)
        ]

    with it("generates 2-level toc correctly"):
        assert gen_toc(self.level2, self.level2_recipe) == self.level2_expect

    with it("handles headings on same page correctly"):
        assert gen_toc(
            self.onepage, self.onepage_recipe
        ) == self.onepage_expect

    with it("handles math in heading correctly"):
        assert gen_toc(
            self.onepage, self.onepage_recipe
        ) == self.onepage_expect

    with it("handles greedy filter correctly"):
        assert gen_toc(
            self.onepage, self.onepage_greedy
        ) == self.onepage_greedy_expect

    with it("passes the HARD MODE"):
        assert gen_toc(
            self.hardmode, self.hardmode_recipe
        ) == self.hardmode_expect