Spaces:
Runtime error
Runtime error
Update InitialMarkups.py
Browse files- InitialMarkups.py +3 -23
InitialMarkups.py
CHANGED
|
@@ -13,8 +13,7 @@ tobebilledonlyLink='https://adr.trevorsadd.co.uk/api/view-pdf-tobebilled?'
|
|
| 13 |
|
| 14 |
|
| 15 |
|
| 16 |
-
|
| 17 |
-
from dateutil.parser._parser import ParserError # More specific error
|
| 18 |
from urllib.parse import urlparse, unquote
|
| 19 |
import os
|
| 20 |
from io import BytesIO
|
|
@@ -43,7 +42,7 @@ import urllib.parse
|
|
| 43 |
|
| 44 |
def changepdflinks(json_data, pdf_path):
|
| 45 |
print('ll , ' ,json_data,pdf_path)
|
| 46 |
-
|
| 47 |
|
| 48 |
updated_json = []
|
| 49 |
for entry in json_data:
|
|
@@ -55,7 +54,7 @@ def changepdflinks(json_data, pdf_path):
|
|
| 55 |
encoded_pdf_link = urllib.parse.quote(pdf_path, safe='')
|
| 56 |
|
| 57 |
# Construct the final link
|
| 58 |
-
final_url = f"{
|
| 59 |
|
| 60 |
# Replace the old NBSLink value with the full URL
|
| 61 |
entry["NBSLink"] = final_url
|
|
@@ -997,25 +996,6 @@ def extract_section_under_header(multiplePDF_Paths):
|
|
| 997 |
i += 2
|
| 998 |
continue
|
| 999 |
if collecting:
|
| 1000 |
-
# ----------------------------------------------------
|
| 1001 |
-
# ADD THIS BLOCK IN ITS PLACE
|
| 1002 |
-
# ----------------------------------------------------
|
| 1003 |
-
# NEW: Check if the line is a date, and if so, stop collecting
|
| 1004 |
-
try:
|
| 1005 |
-
# Use the 'line_text_raw' we defined earlier for an accurate parse
|
| 1006 |
-
dateparse(line_text, fuzzy=True)
|
| 1007 |
-
|
| 1008 |
-
# --- Date Found: Stop Collecting ---
|
| 1009 |
-
print(f"🛑 Stop at date: '{line_text}'")
|
| 1010 |
-
collecting = False
|
| 1011 |
-
done = True # Mark this header as finished
|
| 1012 |
-
break_collecting = True # Signal outer loops to stop
|
| 1013 |
-
break # Break this 'while' loop
|
| 1014 |
-
|
| 1015 |
-
except (ParserError, ValueError, OverflowError):
|
| 1016 |
-
# No date found, continue normally to process this line
|
| 1017 |
-
pass
|
| 1018 |
-
# ----------------------------------------------------
|
| 1019 |
norm_line = normalize_text(line_text)
|
| 1020 |
|
| 1021 |
# Optimized URL check
|
|
|
|
| 13 |
|
| 14 |
|
| 15 |
|
| 16 |
+
|
|
|
|
| 17 |
from urllib.parse import urlparse, unquote
|
| 18 |
import os
|
| 19 |
from io import BytesIO
|
|
|
|
| 42 |
|
| 43 |
def changepdflinks(json_data, pdf_path):
|
| 44 |
print('ll , ' ,json_data,pdf_path)
|
| 45 |
+
base_viewer_link = "https://findconsole-initialmarkups.hf.space/view-pdf?"
|
| 46 |
|
| 47 |
updated_json = []
|
| 48 |
for entry in json_data:
|
|
|
|
| 54 |
encoded_pdf_link = urllib.parse.quote(pdf_path, safe='')
|
| 55 |
|
| 56 |
# Construct the final link
|
| 57 |
+
final_url = f"{base_viewer_link}pdfLink={encoded_pdf_link}#page={str(page_str)}&zoom={zoom_str}"
|
| 58 |
|
| 59 |
# Replace the old NBSLink value with the full URL
|
| 60 |
entry["NBSLink"] = final_url
|
|
|
|
| 996 |
i += 2
|
| 997 |
continue
|
| 998 |
if collecting:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 999 |
norm_line = normalize_text(line_text)
|
| 1000 |
|
| 1001 |
# Optimized URL check
|