Marthee commited on
Commit
6b0a48b
·
verified ·
1 Parent(s): 2802276

Update InitialMarkups.py

Browse files
Files changed (1) hide show
  1. InitialMarkups.py +12 -6
InitialMarkups.py CHANGED
@@ -1040,29 +1040,35 @@ def extract_section_under_header(pdf_path):
1040
  json_output=changepdflinks(json_output,pdflink)
1041
  return pdf_bytes.getvalue(), docHighlights , json_output
1042
 
1043
- def changepdflinks(data_list_JSON,pdflink):
 
 
 
 
 
1044
  # Loop through all entries and update their NBSLink
1045
  for entry in data_list_JSON:
1046
- old_url = entry["NBSLink"]
1047
 
1048
- # Parse the URL and query params
1049
  parsed = urllib.parse.urlparse(old_url)
1050
  query = urllib.parse.parse_qs(parsed.query)
1051
 
1052
- # Replace only the 'pdfLink' parameter
1053
  if "pdfLink" in query:
1054
  query["pdfLink"] = [pdflink]
1055
 
1056
- # Rebuild the query string
1057
  new_query = urllib.parse.urlencode(query, doseq=True)
1058
 
1059
- # Rebuild the final URL with the same base, page, and zoom fragment
1060
  new_url = f"{parsed.scheme}://{parsed.netloc}{parsed.path}?{new_query}"
1061
  if parsed.fragment:
1062
  new_url += f"#{parsed.fragment}"
1063
 
1064
  # Update the entry
1065
  entry["NBSLink"] = new_url
 
1066
  return data_list_JSON
1067
 
1068
 
 
1040
  json_output=changepdflinks(json_output,pdflink)
1041
  return pdf_bytes.getvalue(), docHighlights , json_output
1042
 
1043
+
1044
+ def changepdflinks(data_list_JSON, pdflink):
1045
+ # If the input is a JSON string, convert it to a Python list
1046
+ if isinstance(data_list_JSON, str):
1047
+ data_list_JSON = json.loads(data_list_JSON)
1048
+
1049
  # Loop through all entries and update their NBSLink
1050
  for entry in data_list_JSON:
1051
+ old_url = entry.get("NBSLink", "")
1052
 
1053
+ # Parse URL and query params
1054
  parsed = urllib.parse.urlparse(old_url)
1055
  query = urllib.parse.parse_qs(parsed.query)
1056
 
1057
+ # Replace only the 'pdfLink' parameter if present
1058
  if "pdfLink" in query:
1059
  query["pdfLink"] = [pdflink]
1060
 
1061
+ # Rebuild query string
1062
  new_query = urllib.parse.urlencode(query, doseq=True)
1063
 
1064
+ # Rebuild full URL with the same fragment (page/zoom)
1065
  new_url = f"{parsed.scheme}://{parsed.netloc}{parsed.path}?{new_query}"
1066
  if parsed.fragment:
1067
  new_url += f"#{parsed.fragment}"
1068
 
1069
  # Update the entry
1070
  entry["NBSLink"] = new_url
1071
+
1072
  return data_list_JSON
1073
 
1074