Spaces:
Runtime error
Runtime error
Update InitialMarkups.py
Browse files- InitialMarkups.py +16 -3
InitialMarkups.py
CHANGED
|
@@ -997,12 +997,25 @@ def extract_section_under_header(multiplePDF_Paths):
|
|
| 997 |
i += 2
|
| 998 |
continue
|
| 999 |
if collecting:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1000 |
try:
|
| 1001 |
-
|
| 1002 |
-
|
| 1003 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1004 |
except (ParserError, ValueError, OverflowError):
|
|
|
|
| 1005 |
pass
|
|
|
|
| 1006 |
norm_line = normalize_text(line_text)
|
| 1007 |
|
| 1008 |
# Optimized URL check
|
|
|
|
| 997 |
i += 2
|
| 998 |
continue
|
| 999 |
if collecting:
|
| 1000 |
+
# ----------------------------------------------------
|
| 1001 |
+
# ADD THIS BLOCK IN ITS PLACE
|
| 1002 |
+
# ----------------------------------------------------
|
| 1003 |
+
# NEW: Check if the line is a date, and if so, stop collecting
|
| 1004 |
try:
|
| 1005 |
+
# Use the 'line_text_raw' we defined earlier for an accurate parse
|
| 1006 |
+
dateparse(line_text, fuzzy=True)
|
| 1007 |
+
|
| 1008 |
+
# --- Date Found: Stop Collecting ---
|
| 1009 |
+
print(f"🛑 Stop at date: '{line_text}'")
|
| 1010 |
+
collecting = False
|
| 1011 |
+
done = True # Mark this header as finished
|
| 1012 |
+
break_collecting = True # Signal outer loops to stop
|
| 1013 |
+
break # Break this 'while' loop
|
| 1014 |
+
|
| 1015 |
except (ParserError, ValueError, OverflowError):
|
| 1016 |
+
# No date found, continue normally to process this line
|
| 1017 |
pass
|
| 1018 |
+
# ----------------------------------------------------
|
| 1019 |
norm_line = normalize_text(line_text)
|
| 1020 |
|
| 1021 |
# Optimized URL check
|