File size: 2,313 Bytes
046e3b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import os
import io

from mamba import description, it, before
from fitzutils import (
    dump_toc,
    ToCEntry
)
from pdftocio.tocparser import parse_toc

dirpath = os.path.dirname(os.path.abspath(__file__))

valid_file = os.path.join(dirpath, "files/level2.pdf")
invalid_file = os.path.join(dirpath, "files/nothing.pdf")

with description("parse_toc") as self:
    with before.all:
        self.toc = [
            ToCEntry(level=1, title="title1", pagenum=1, vpos=100.0),
            ToCEntry(level=2, title="title2", pagenum=1, vpos=150.0),
            ToCEntry(level=3, title="title3", pagenum=2, vpos=90.0),
            ToCEntry(level=2, title="title4", pagenum=2, vpos=150.0),
            ToCEntry(level=2, title="title5", pagenum=3, vpos=0.0),
            ToCEntry(level=1, title="title6", pagenum=5, vpos=200.0)
        ]

        self.toc_novpos = [
            ToCEntry(level=1, title="title1", pagenum=1),
            ToCEntry(level=2, title="title2", pagenum=1),
            ToCEntry(level=3, title="title3", pagenum=2),
            ToCEntry(level=2, title="title4", pagenum=2),
            ToCEntry(level=2, title="title5", pagenum=3),
            ToCEntry(level=1, title="title6", pagenum=5)
        ]


    with it("can recover the result from dump_toc"):
        toc_s = dump_toc(self.toc, True)
        f = io.StringIO(toc_s)
        assert parse_toc(f) == self.toc
        assert parse_toc(f) != self.toc_novpos

        toc_s = dump_toc(self.toc_novpos, False)
        f = io.StringIO(toc_s)
        assert parse_toc(f) == self.toc_novpos
        assert parse_toc(f) != self.toc

    with it("escapes quotations correctly"):
        quoted = '"a ""quoted"" title" 2\n    "a single \'quoted\' title" 4'
        expect = [
            ToCEntry(level=1, title='a "quoted" title', pagenum=2),
            ToCEntry(level=2, title="a single 'quoted' title", pagenum=4)
        ]
        f = io.StringIO(quoted)
        assert parse_toc(f) == expect

    with it("raises error when toc entry is invalid"):
        malformed = '"entry" 1\n    "error entry"'
        f = io.StringIO(malformed)
        try:
            parse_toc(f)
        except IndexError:
            pass
        else:
            assert False, "must raise error"