Marthee commited on
Commit
2bf08bc
·
verified ·
1 Parent(s): 88906a0

Update InitialMarkups.py

Browse files
Files changed (1) hide show
  1. InitialMarkups.py +32 -30
InitialMarkups.py CHANGED
@@ -36,7 +36,38 @@ def filteredJsons(pdf_path,filteredjsonsfromrawan):
36
 
37
 
38
 
39
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  def get_regular_font_size_and_color(doc):
41
  font_sizes = []
42
  colors = []
@@ -1041,35 +1072,6 @@ def extract_section_under_header(pdf_path):
1041
  return pdf_bytes.getvalue(), docHighlights , json_output
1042
 
1043
 
1044
- def changepdflinks(data_list_JSON, pdflink):
1045
- # If the input is a JSON string, convert it to a Python list
1046
- if isinstance(data_list_JSON, str):
1047
- data_list_JSON = json.loads(data_list_JSON)
1048
-
1049
- # Loop through all entries and update their NBSLink
1050
- for entry in data_list_JSON:
1051
- old_url = entry.get("NBSLink", "")
1052
-
1053
- # Parse URL and query params
1054
- parsed = urllib.parse.urlparse(old_url)
1055
- query = urllib.parse.parse_qs(parsed.query)
1056
-
1057
- # Replace only the 'pdfLink' parameter if present
1058
- if "pdfLink" in query:
1059
- query["pdfLink"] = [pdflink]
1060
-
1061
- # Rebuild query string
1062
- new_query = urllib.parse.urlencode(query, doseq=True)
1063
-
1064
- # Rebuild full URL with the same fragment (page/zoom)
1065
- new_url = f"{parsed.scheme}://{parsed.netloc}{parsed.path}?{new_query}"
1066
- if parsed.fragment:
1067
- new_url += f"#{parsed.fragment}"
1068
-
1069
- # Update the entry
1070
- entry["NBSLink"] = new_url
1071
-
1072
- return data_list_JSON
1073
 
1074
 
1075
 
 
36
 
37
 
38
 
39
+
40
+ def changepdflinks(data_list_JSON, pdflink):
41
+ print('henaaaa weee')
42
+ # If the input is a JSON string, convert it to a Python list
43
+ if isinstance(data_list_JSON, str):
44
+ data_list_JSON = json.loads(data_list_JSON)
45
+
46
+ # Loop through all entries and update their NBSLink
47
+ for entry in data_list_JSON:
48
+ old_url = entry.get("NBSLink", "")
49
+
50
+ # Parse URL and query params
51
+ parsed = urllib.parse.urlparse(old_url)
52
+ query = urllib.parse.parse_qs(parsed.query)
53
+
54
+ # Replace only the 'pdfLink' parameter if present
55
+ if "pdfLink" in query:
56
+ query["pdfLink"] = [pdflink]
57
+
58
+ # Rebuild query string
59
+ new_query = urllib.parse.urlencode(query, doseq=True)
60
+
61
+ # Rebuild full URL with the same fragment (page/zoom)
62
+ new_url = f"{parsed.scheme}://{parsed.netloc}{parsed.path}?{new_query}"
63
+ if parsed.fragment:
64
+ new_url += f"#{parsed.fragment}"
65
+
66
+ # Update the entry
67
+ entry["NBSLink"] = new_url
68
+
69
+ return data_list_JSON
70
+
71
  def get_regular_font_size_and_color(doc):
72
  font_sizes = []
73
  colors = []
 
1072
  return pdf_bytes.getvalue(), docHighlights , json_output
1073
 
1074
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1075
 
1076
 
1077