Spaces:
Runtime error
Runtime error
Update InitialMarkups.py
Browse files- InitialMarkups.py +2 -1
InitialMarkups.py
CHANGED
|
@@ -74,7 +74,7 @@ def get_toc_page_numbers(doc, max_pages_to_check=15):
|
|
| 74 |
# 2. NEW: Title Pattern (looking for specific headers)
|
| 75 |
# ^ and $ ensure the line is JUST that word (ignoring "The contents of the bag...")
|
| 76 |
# re.IGNORECASE makes it match "CONTENTS", "Contents", "Index", etc.
|
| 77 |
-
title_pattern = re.compile(r"^\s*(table of contents|contents|index)\s*$", re.IGNORECASE)
|
| 78 |
|
| 79 |
for page_num in range(min(len(doc), max_pages_to_check)):
|
| 80 |
page = doc.load_page(page_num)
|
|
@@ -108,6 +108,7 @@ def get_toc_page_numbers(doc, max_pages_to_check=15):
|
|
| 108 |
# If we found TOC pages (e.g., [2, 3]), we return [0, 1, 2, 3]
|
| 109 |
# This covers the cover page, inside cover, and the TOC itself.
|
| 110 |
if toc_pages:
|
|
|
|
| 111 |
last_toc_page = toc_pages[-1]
|
| 112 |
return list(range(0, last_toc_page + 1))
|
| 113 |
|
|
|
|
| 74 |
# 2. NEW: Title Pattern (looking for specific headers)
|
| 75 |
# ^ and $ ensure the line is JUST that word (ignoring "The contents of the bag...")
|
| 76 |
# re.IGNORECASE makes it match "CONTENTS", "Contents", "Index", etc.
|
| 77 |
+
title_pattern = re.compile(r"^\s*(table of contents|contents|index|content)\s*$", re.IGNORECASE)
|
| 78 |
|
| 79 |
for page_num in range(min(len(doc), max_pages_to_check)):
|
| 80 |
page = doc.load_page(page_num)
|
|
|
|
| 108 |
# If we found TOC pages (e.g., [2, 3]), we return [0, 1, 2, 3]
|
| 109 |
# This covers the cover page, inside cover, and the TOC itself.
|
| 110 |
if toc_pages:
|
| 111 |
+
print('toccc',toc_pages)
|
| 112 |
last_toc_page = toc_pages[-1]
|
| 113 |
return list(range(0, last_toc_page + 1))
|
| 114 |
|