InitialMarkups

Runtime error

Marthee commited on Nov 7, 2025

Commit

4221eea

verified ·

1 Parent(s): 9e1773c

Update InitialMarkups.py

Files changed (1) hide show

InitialMarkups.py CHANGED Viewed

@@ -997,12 +997,25 @@ def extract_section_under_header(multiplePDF_Paths):
                                         i += 2
                                         continue
                         if collecting:
                             try:
-                                if dateparse(line_text, fuzzy=True):
-                                    collecting=False
-                                    break
                             except (ParserError, ValueError, OverflowError):
                                 pass
                             norm_line = normalize_text(line_text)
                             # Optimized URL check

                                         i += 2
                                         continue
                         if collecting:
+                            # ----------------------------------------------------
+                            # ADD THIS BLOCK IN ITS PLACE
+                            # ----------------------------------------------------
+                            # NEW: Check if the line is a date, and if so, stop collecting
                             try:
+                                # Use the 'line_text_raw' we defined earlier for an accurate parse
+                                dateparse(line_text, fuzzy=True)
+                                # --- Date Found: Stop Collecting ---
+                                print(f"🛑 Stop at date: '{line_text}'")
+                                collecting = False
+                                done = True # Mark this header as finished
+                                break_collecting = True # Signal outer loops to stop
+                                break # Break this 'while' loop
                             except (ParserError, ValueError, OverflowError):
+                                # No date found, continue normally to process this line
                                 pass
+                            # ----------------------------------------------------
                             norm_line = normalize_text(line_text)
                             # Optimized URL check