Spaces:
Sleeping
Sleeping
File size: 7,940 Bytes
046e3b8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 | import os
import fitz
import toml
from mamba import description, it, before
from fitzutils import ToCEntry
from pdftocgen.tocgen import gen_toc
dirpath = os.path.dirname(os.path.abspath(__file__))
with description("gen_toc") as self:
with before.all:
self.level2 = fitz.open(os.path.join(dirpath, "files/level2.pdf"))
self.level2_recipe = toml.load(
open(os.path.join(dirpath, "files/level2_recipe.toml"))
)
self.level2_expect = [
ToCEntry(level=1, title='1 Section One',
pagenum=1, vpos=237.6484375),
ToCEntry(level=1, title='2 Section Two',
pagenum=1, vpos=567.3842163085938),
ToCEntry(level=2, title='2.1 Subsection Two.One',
pagenum=2, vpos=452.56671142578125),
ToCEntry(level=1,
title='3 Section Three, with looong loooong looong ti- tle',
pagenum=3, vpos=335.569580078125),
ToCEntry(level=2, title='3.1 Subsection Three.One, '
'with even loooooooooooonger title, and probably even more',
pagenum=3, vpos=619.4886474609375),
ToCEntry(level=2, title='3.2 Subsection Three.Two',
pagenum=4, vpos=512.3426513671875),
ToCEntry(level=2, title='3.3 Subsection Three.Three',
pagenum=5, vpos=125.79861450195312),
ToCEntry(level=1, title='4 The End',
pagenum=5, vpos=366.62347412109375)
]
self.onepage = fitz.open(os.path.join(dirpath, "files/onepage.pdf"))
self.onepage_recipe = toml.load(
open(os.path.join(dirpath, "files/onepage_recipe.toml"))
)
self.onepage_greedy = toml.load(
open(os.path.join(dirpath, "files/onepage_greedy.toml"))
)
self.onepage_expect = [
# false positive, but easy to remove in post-processing
ToCEntry(level=2, title='krasjet',
pagenum=1, vpos=196.53366088867188),
ToCEntry(level=1, title='1 Section One',
pagenum=1, vpos=237.6484375),
ToCEntry(level=1, title='2 Section Two',
pagenum=1, vpos=265.44744873046875),
ToCEntry(level=2, title='2.1 Subsection Two.One',
pagenum=1, vpos=291.0536804199219),
ToCEntry(level=2, title='2.2 Subsection Two.Two \xd7 2',
pagenum=1, vpos=311.1368103027344),
ToCEntry(level=1, title='3 Section Three, with looong loooong looong ti- tle',
pagenum=1, vpos=334.00946044921875),
ToCEntry(level=2, title='3.1 Subsection Three.One, '
'with even loooooooooooonger title, and probably even more',
pagenum=1, vpos=377.5487060546875),
ToCEntry(level=2, title='3.2 Subsection Three.Two',
pagenum=1, vpos=411.8786926269531),
ToCEntry(level=2, title='3.3 Subsection Three.Three',
pagenum=1, vpos=432.26068115234375),
ToCEntry(level=3, title='3.3.1 Subsubsection Three.Three.One',
pagenum=1, vpos=452.1441345214844),
ToCEntry(level=3, title='3.3.2 Subsubsection Three.Three.Two',
pagenum=1, vpos=470.53314208984375),
ToCEntry(level=3, title='3.3.3 Subsubsection Three.Three.Three',
pagenum=1, vpos=488.9231262207031),
ToCEntry(level=2, title='3.4 Subsection Three.Four',
pagenum=1, vpos=507.8106994628906),
ToCEntry(level=2, title='3.5 Subsection Three.Five',
pagenum=1, vpos=528.191650390625),
ToCEntry(level=1, title='4 The End',
pagenum=1, vpos=550.7654418945312)
]
self.onepage_greedy_expect = [
# hooray, no more false positives
ToCEntry(level=1, title='1 Section One',
pagenum=1, vpos=237.6484375),
ToCEntry(level=1, title='2 Section Two',
pagenum=1, vpos=265.44744873046875),
ToCEntry(level=2, title='2.1 Subsection Two.One',
pagenum=1, vpos=291.0536804199219),
ToCEntry(level=2, title='2.2 Subsection Two.Two \xd7 2',
pagenum=1, vpos=311.1368103027344),
ToCEntry(level=1, title='3 Section Three, with looong loooong looong ti- tle',
pagenum=1, vpos=334.00946044921875),
ToCEntry(level=2, title='3.1 Subsection Three.One, '
'with even loooooooooooonger title, and probably even more',
pagenum=1, vpos=377.5487060546875),
ToCEntry(level=2, title='3.2 Subsection Three.Two',
pagenum=1, vpos=411.8786926269531),
ToCEntry(level=2, title='3.3 Subsection Three.Three',
pagenum=1, vpos=432.26068115234375),
ToCEntry(level=3, title='3.3.1 Subsubsection Three.Three.One',
pagenum=1, vpos=452.1441345214844),
ToCEntry(level=3, title='3.3.2 Subsubsection Three.Three.Two',
pagenum=1, vpos=470.53314208984375),
ToCEntry(level=3, title='3.3.3 Subsubsection Three.Three.Three',
pagenum=1, vpos=488.9231262207031),
ToCEntry(level=2, title='3.4 Subsection Three.Four',
pagenum=1, vpos=507.8106994628906),
ToCEntry(level=2, title='3.5 Subsection Three.Five',
pagenum=1, vpos=528.191650390625),
ToCEntry(level=1, title='4 The End',
pagenum=1, vpos=550.7654418945312)
]
self.hardmode = fitz.open(os.path.join(dirpath, "files/hardmode.pdf"))
self.hardmode_recipe = toml.load(
open(os.path.join(dirpath, "files/hardmode_recipe.toml"))
)
self.hardmode_expect = [
ToCEntry(level=1, title='1 Section One',
pagenum=1, vpos=174.1232452392578),
ToCEntry(level=1, title='2 Section 1 + 1 = 2',
pagenum=1, vpos=584.5831909179688),
ToCEntry(level=2, title='2.1 Subsection Two.One',
pagenum=1, vpos=425.2061462402344),
ToCEntry(level=1, title='e ln(3)',
pagenum=2, vpos=516.01708984375),
ToCEntry(level=2, title='3.1 Subsection e ln(3) .1, '
'with looo- ooooooooong title',
pagenum=2, vpos=302.5021057128906),
ToCEntry(level=2, title='3.2 S ubsection Three.Two, another long title',
pagenum=3, vpos=396.212158203125),
ToCEntry(level=2, title='3.3 Subsection Three.Three',
pagenum=3, vpos=68.84815979003906),
ToCEntry(level=1, title='4 The x → ∞ End',
pagenum=3, vpos=483.49920654296875)
]
with it("generates 2-level toc correctly"):
assert gen_toc(self.level2, self.level2_recipe) == self.level2_expect
with it("handles headings on same page correctly"):
assert gen_toc(
self.onepage, self.onepage_recipe
) == self.onepage_expect
with it("handles math in heading correctly"):
assert gen_toc(
self.onepage, self.onepage_recipe
) == self.onepage_expect
with it("handles greedy filter correctly"):
assert gen_toc(
self.onepage, self.onepage_greedy
) == self.onepage_greedy_expect
with it("passes the HARD MODE"):
assert gen_toc(
self.hardmode, self.hardmode_recipe
) == self.hardmode_expect
|