Spaces:
Sleeping
Sleeping
File size: 1,312 Bytes
046e3b8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 | """Parser for table of content csv file"""
import csv
import sys
from typing import IO, List
from fitzutils import ToCEntry
from itertools import takewhile
def parse_entry(entry: List) -> ToCEntry:
"""parse a row in csv to a toc entry"""
# a somewhat weird hack, csv reader would read spaces as an empty '', so we
# only need to count the number of '' before an entry to determined the
# heading level
indent = len(list(takewhile(lambda x: x == '', entry)))
try:
toc_entry = ToCEntry(
int(indent / 4) + 1, # 4 spaces = 1 level
entry[indent], # heading
int(entry[indent + 1]), # pagenum
*entry[indent + 2:] # vpos
)
return toc_entry
except IndexError as e:
print(f"Unable to parse toc entry {entry};",
f"Need at least {indent + 2} parts but only have {len(entry)}.",
"Make sure the page number is present.",
file=sys.stderr)
raise e
def parse_toc(file: IO) -> List[ToCEntry]:
"""Parse a toc file to a list of toc entries"""
reader = csv.reader(file, lineterminator='\n',
delimiter=' ', quoting=csv.QUOTE_NONNUMERIC)
return list(map(parse_entry, reader))
|