Spaces:
Runtime error
Runtime error
Update InitialMarkups.py
Browse files- InitialMarkups.py +32 -30
InitialMarkups.py
CHANGED
|
@@ -36,7 +36,38 @@ def filteredJsons(pdf_path,filteredjsonsfromrawan):
|
|
| 36 |
|
| 37 |
|
| 38 |
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
def get_regular_font_size_and_color(doc):
|
| 41 |
font_sizes = []
|
| 42 |
colors = []
|
|
@@ -1041,35 +1072,6 @@ def extract_section_under_header(pdf_path):
|
|
| 1041 |
return pdf_bytes.getvalue(), docHighlights , json_output
|
| 1042 |
|
| 1043 |
|
| 1044 |
-
def changepdflinks(data_list_JSON, pdflink):
|
| 1045 |
-
# If the input is a JSON string, convert it to a Python list
|
| 1046 |
-
if isinstance(data_list_JSON, str):
|
| 1047 |
-
data_list_JSON = json.loads(data_list_JSON)
|
| 1048 |
-
|
| 1049 |
-
# Loop through all entries and update their NBSLink
|
| 1050 |
-
for entry in data_list_JSON:
|
| 1051 |
-
old_url = entry.get("NBSLink", "")
|
| 1052 |
-
|
| 1053 |
-
# Parse URL and query params
|
| 1054 |
-
parsed = urllib.parse.urlparse(old_url)
|
| 1055 |
-
query = urllib.parse.parse_qs(parsed.query)
|
| 1056 |
-
|
| 1057 |
-
# Replace only the 'pdfLink' parameter if present
|
| 1058 |
-
if "pdfLink" in query:
|
| 1059 |
-
query["pdfLink"] = [pdflink]
|
| 1060 |
-
|
| 1061 |
-
# Rebuild query string
|
| 1062 |
-
new_query = urllib.parse.urlencode(query, doseq=True)
|
| 1063 |
-
|
| 1064 |
-
# Rebuild full URL with the same fragment (page/zoom)
|
| 1065 |
-
new_url = f"{parsed.scheme}://{parsed.netloc}{parsed.path}?{new_query}"
|
| 1066 |
-
if parsed.fragment:
|
| 1067 |
-
new_url += f"#{parsed.fragment}"
|
| 1068 |
-
|
| 1069 |
-
# Update the entry
|
| 1070 |
-
entry["NBSLink"] = new_url
|
| 1071 |
-
|
| 1072 |
-
return data_list_JSON
|
| 1073 |
|
| 1074 |
|
| 1075 |
|
|
|
|
| 36 |
|
| 37 |
|
| 38 |
|
| 39 |
+
|
| 40 |
+
def changepdflinks(data_list_JSON, pdflink):
|
| 41 |
+
print('henaaaa weee')
|
| 42 |
+
# If the input is a JSON string, convert it to a Python list
|
| 43 |
+
if isinstance(data_list_JSON, str):
|
| 44 |
+
data_list_JSON = json.loads(data_list_JSON)
|
| 45 |
+
|
| 46 |
+
# Loop through all entries and update their NBSLink
|
| 47 |
+
for entry in data_list_JSON:
|
| 48 |
+
old_url = entry.get("NBSLink", "")
|
| 49 |
+
|
| 50 |
+
# Parse URL and query params
|
| 51 |
+
parsed = urllib.parse.urlparse(old_url)
|
| 52 |
+
query = urllib.parse.parse_qs(parsed.query)
|
| 53 |
+
|
| 54 |
+
# Replace only the 'pdfLink' parameter if present
|
| 55 |
+
if "pdfLink" in query:
|
| 56 |
+
query["pdfLink"] = [pdflink]
|
| 57 |
+
|
| 58 |
+
# Rebuild query string
|
| 59 |
+
new_query = urllib.parse.urlencode(query, doseq=True)
|
| 60 |
+
|
| 61 |
+
# Rebuild full URL with the same fragment (page/zoom)
|
| 62 |
+
new_url = f"{parsed.scheme}://{parsed.netloc}{parsed.path}?{new_query}"
|
| 63 |
+
if parsed.fragment:
|
| 64 |
+
new_url += f"#{parsed.fragment}"
|
| 65 |
+
|
| 66 |
+
# Update the entry
|
| 67 |
+
entry["NBSLink"] = new_url
|
| 68 |
+
|
| 69 |
+
return data_list_JSON
|
| 70 |
+
|
| 71 |
def get_regular_font_size_and_color(doc):
|
| 72 |
font_sizes = []
|
| 73 |
colors = []
|
|
|
|
| 1072 |
return pdf_bytes.getvalue(), docHighlights , json_output
|
| 1073 |
|
| 1074 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1075 |
|
| 1076 |
|
| 1077 |
|