Spaces:
Runtime error
Runtime error
Update InitialMarkups.py
Browse files- InitialMarkups.py +12 -6
InitialMarkups.py
CHANGED
|
@@ -1040,29 +1040,35 @@ def extract_section_under_header(pdf_path):
|
|
| 1040 |
json_output=changepdflinks(json_output,pdflink)
|
| 1041 |
return pdf_bytes.getvalue(), docHighlights , json_output
|
| 1042 |
|
| 1043 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1044 |
# Loop through all entries and update their NBSLink
|
| 1045 |
for entry in data_list_JSON:
|
| 1046 |
-
old_url = entry
|
| 1047 |
|
| 1048 |
-
# Parse
|
| 1049 |
parsed = urllib.parse.urlparse(old_url)
|
| 1050 |
query = urllib.parse.parse_qs(parsed.query)
|
| 1051 |
|
| 1052 |
-
# Replace only the 'pdfLink' parameter
|
| 1053 |
if "pdfLink" in query:
|
| 1054 |
query["pdfLink"] = [pdflink]
|
| 1055 |
|
| 1056 |
-
# Rebuild
|
| 1057 |
new_query = urllib.parse.urlencode(query, doseq=True)
|
| 1058 |
|
| 1059 |
-
# Rebuild
|
| 1060 |
new_url = f"{parsed.scheme}://{parsed.netloc}{parsed.path}?{new_query}"
|
| 1061 |
if parsed.fragment:
|
| 1062 |
new_url += f"#{parsed.fragment}"
|
| 1063 |
|
| 1064 |
# Update the entry
|
| 1065 |
entry["NBSLink"] = new_url
|
|
|
|
| 1066 |
return data_list_JSON
|
| 1067 |
|
| 1068 |
|
|
|
|
| 1040 |
json_output=changepdflinks(json_output,pdflink)
|
| 1041 |
return pdf_bytes.getvalue(), docHighlights , json_output
|
| 1042 |
|
| 1043 |
+
|
| 1044 |
+
def changepdflinks(data_list_JSON, pdflink):
|
| 1045 |
+
# If the input is a JSON string, convert it to a Python list
|
| 1046 |
+
if isinstance(data_list_JSON, str):
|
| 1047 |
+
data_list_JSON = json.loads(data_list_JSON)
|
| 1048 |
+
|
| 1049 |
# Loop through all entries and update their NBSLink
|
| 1050 |
for entry in data_list_JSON:
|
| 1051 |
+
old_url = entry.get("NBSLink", "")
|
| 1052 |
|
| 1053 |
+
# Parse URL and query params
|
| 1054 |
parsed = urllib.parse.urlparse(old_url)
|
| 1055 |
query = urllib.parse.parse_qs(parsed.query)
|
| 1056 |
|
| 1057 |
+
# Replace only the 'pdfLink' parameter if present
|
| 1058 |
if "pdfLink" in query:
|
| 1059 |
query["pdfLink"] = [pdflink]
|
| 1060 |
|
| 1061 |
+
# Rebuild query string
|
| 1062 |
new_query = urllib.parse.urlencode(query, doseq=True)
|
| 1063 |
|
| 1064 |
+
# Rebuild full URL with the same fragment (page/zoom)
|
| 1065 |
new_url = f"{parsed.scheme}://{parsed.netloc}{parsed.path}?{new_query}"
|
| 1066 |
if parsed.fragment:
|
| 1067 |
new_url += f"#{parsed.fragment}"
|
| 1068 |
|
| 1069 |
# Update the entry
|
| 1070 |
entry["NBSLink"] = new_url
|
| 1071 |
+
|
| 1072 |
return data_list_JSON
|
| 1073 |
|
| 1074 |
|