import os import fitz import toml from mamba import description, it, before from fitzutils import ToCEntry from pdftocgen.tocgen import gen_toc dirpath = os.path.dirname(os.path.abspath(__file__)) with description("gen_toc") as self: with before.all: self.level2 = fitz.open(os.path.join(dirpath, "files/level2.pdf")) self.level2_recipe = toml.load( open(os.path.join(dirpath, "files/level2_recipe.toml")) ) self.level2_expect = [ ToCEntry(level=1, title='1 Section One', pagenum=1, vpos=237.6484375), ToCEntry(level=1, title='2 Section Two', pagenum=1, vpos=567.3842163085938), ToCEntry(level=2, title='2.1 Subsection Two.One', pagenum=2, vpos=452.56671142578125), ToCEntry(level=1, title='3 Section Three, with looong loooong looong ti- tle', pagenum=3, vpos=335.569580078125), ToCEntry(level=2, title='3.1 Subsection Three.One, ' 'with even loooooooooooonger title, and probably even more', pagenum=3, vpos=619.4886474609375), ToCEntry(level=2, title='3.2 Subsection Three.Two', pagenum=4, vpos=512.3426513671875), ToCEntry(level=2, title='3.3 Subsection Three.Three', pagenum=5, vpos=125.79861450195312), ToCEntry(level=1, title='4 The End', pagenum=5, vpos=366.62347412109375) ] self.onepage = fitz.open(os.path.join(dirpath, "files/onepage.pdf")) self.onepage_recipe = toml.load( open(os.path.join(dirpath, "files/onepage_recipe.toml")) ) self.onepage_greedy = toml.load( open(os.path.join(dirpath, "files/onepage_greedy.toml")) ) self.onepage_expect = [ # false positive, but easy to remove in post-processing ToCEntry(level=2, title='krasjet', pagenum=1, vpos=196.53366088867188), ToCEntry(level=1, title='1 Section One', pagenum=1, vpos=237.6484375), ToCEntry(level=1, title='2 Section Two', pagenum=1, vpos=265.44744873046875), ToCEntry(level=2, title='2.1 Subsection Two.One', pagenum=1, vpos=291.0536804199219), ToCEntry(level=2, title='2.2 Subsection Two.Two \xd7 2', pagenum=1, vpos=311.1368103027344), ToCEntry(level=1, title='3 Section Three, with looong loooong looong ti- tle', pagenum=1, vpos=334.00946044921875), ToCEntry(level=2, title='3.1 Subsection Three.One, ' 'with even loooooooooooonger title, and probably even more', pagenum=1, vpos=377.5487060546875), ToCEntry(level=2, title='3.2 Subsection Three.Two', pagenum=1, vpos=411.8786926269531), ToCEntry(level=2, title='3.3 Subsection Three.Three', pagenum=1, vpos=432.26068115234375), ToCEntry(level=3, title='3.3.1 Subsubsection Three.Three.One', pagenum=1, vpos=452.1441345214844), ToCEntry(level=3, title='3.3.2 Subsubsection Three.Three.Two', pagenum=1, vpos=470.53314208984375), ToCEntry(level=3, title='3.3.3 Subsubsection Three.Three.Three', pagenum=1, vpos=488.9231262207031), ToCEntry(level=2, title='3.4 Subsection Three.Four', pagenum=1, vpos=507.8106994628906), ToCEntry(level=2, title='3.5 Subsection Three.Five', pagenum=1, vpos=528.191650390625), ToCEntry(level=1, title='4 The End', pagenum=1, vpos=550.7654418945312) ] self.onepage_greedy_expect = [ # hooray, no more false positives ToCEntry(level=1, title='1 Section One', pagenum=1, vpos=237.6484375), ToCEntry(level=1, title='2 Section Two', pagenum=1, vpos=265.44744873046875), ToCEntry(level=2, title='2.1 Subsection Two.One', pagenum=1, vpos=291.0536804199219), ToCEntry(level=2, title='2.2 Subsection Two.Two \xd7 2', pagenum=1, vpos=311.1368103027344), ToCEntry(level=1, title='3 Section Three, with looong loooong looong ti- tle', pagenum=1, vpos=334.00946044921875), ToCEntry(level=2, title='3.1 Subsection Three.One, ' 'with even loooooooooooonger title, and probably even more', pagenum=1, vpos=377.5487060546875), ToCEntry(level=2, title='3.2 Subsection Three.Two', pagenum=1, vpos=411.8786926269531), ToCEntry(level=2, title='3.3 Subsection Three.Three', pagenum=1, vpos=432.26068115234375), ToCEntry(level=3, title='3.3.1 Subsubsection Three.Three.One', pagenum=1, vpos=452.1441345214844), ToCEntry(level=3, title='3.3.2 Subsubsection Three.Three.Two', pagenum=1, vpos=470.53314208984375), ToCEntry(level=3, title='3.3.3 Subsubsection Three.Three.Three', pagenum=1, vpos=488.9231262207031), ToCEntry(level=2, title='3.4 Subsection Three.Four', pagenum=1, vpos=507.8106994628906), ToCEntry(level=2, title='3.5 Subsection Three.Five', pagenum=1, vpos=528.191650390625), ToCEntry(level=1, title='4 The End', pagenum=1, vpos=550.7654418945312) ] self.hardmode = fitz.open(os.path.join(dirpath, "files/hardmode.pdf")) self.hardmode_recipe = toml.load( open(os.path.join(dirpath, "files/hardmode_recipe.toml")) ) self.hardmode_expect = [ ToCEntry(level=1, title='1 Section One', pagenum=1, vpos=174.1232452392578), ToCEntry(level=1, title='2 Section 1 + 1 = 2', pagenum=1, vpos=584.5831909179688), ToCEntry(level=2, title='2.1 Subsection Two.One', pagenum=1, vpos=425.2061462402344), ToCEntry(level=1, title='e ln(3)', pagenum=2, vpos=516.01708984375), ToCEntry(level=2, title='3.1 Subsection e ln(3) .1, ' 'with looo- ooooooooong title', pagenum=2, vpos=302.5021057128906), ToCEntry(level=2, title='3.2 S ubsection Three.Two, another long title', pagenum=3, vpos=396.212158203125), ToCEntry(level=2, title='3.3 Subsection Three.Three', pagenum=3, vpos=68.84815979003906), ToCEntry(level=1, title='4 The x → ∞ End', pagenum=3, vpos=483.49920654296875) ] with it("generates 2-level toc correctly"): assert gen_toc(self.level2, self.level2_recipe) == self.level2_expect with it("handles headings on same page correctly"): assert gen_toc( self.onepage, self.onepage_recipe ) == self.onepage_expect with it("handles math in heading correctly"): assert gen_toc( self.onepage, self.onepage_recipe ) == self.onepage_expect with it("handles greedy filter correctly"): assert gen_toc( self.onepage, self.onepage_greedy ) == self.onepage_greedy_expect with it("passes the HARD MODE"): assert gen_toc( self.hardmode, self.hardmode_recipe ) == self.hardmode_expect