Marthee commited on
Commit
7b38140
·
verified ·
1 Parent(s): 7013b67

Update InitialMarkups.py

Browse files
Files changed (1) hide show
  1. InitialMarkups.py +2 -1
InitialMarkups.py CHANGED
@@ -74,7 +74,7 @@ def get_toc_page_numbers(doc, max_pages_to_check=15):
74
  # 2. NEW: Title Pattern (looking for specific headers)
75
  # ^ and $ ensure the line is JUST that word (ignoring "The contents of the bag...")
76
  # re.IGNORECASE makes it match "CONTENTS", "Contents", "Index", etc.
77
- title_pattern = re.compile(r"^\s*(table of contents|contents|index)\s*$", re.IGNORECASE)
78
 
79
  for page_num in range(min(len(doc), max_pages_to_check)):
80
  page = doc.load_page(page_num)
@@ -108,6 +108,7 @@ def get_toc_page_numbers(doc, max_pages_to_check=15):
108
  # If we found TOC pages (e.g., [2, 3]), we return [0, 1, 2, 3]
109
  # This covers the cover page, inside cover, and the TOC itself.
110
  if toc_pages:
 
111
  last_toc_page = toc_pages[-1]
112
  return list(range(0, last_toc_page + 1))
113
 
 
74
  # 2. NEW: Title Pattern (looking for specific headers)
75
  # ^ and $ ensure the line is JUST that word (ignoring "The contents of the bag...")
76
  # re.IGNORECASE makes it match "CONTENTS", "Contents", "Index", etc.
77
+ title_pattern = re.compile(r"^\s*(table of contents|contents|index|content)\s*$", re.IGNORECASE)
78
 
79
  for page_num in range(min(len(doc), max_pages_to_check)):
80
  page = doc.load_page(page_num)
 
108
  # If we found TOC pages (e.g., [2, 3]), we return [0, 1, 2, 3]
109
  # This covers the cover page, inside cover, and the TOC itself.
110
  if toc_pages:
111
+ print('toccc',toc_pages)
112
  last_toc_page = toc_pages[-1]
113
  return list(range(0, last_toc_page + 1))
114