Spaces:
Runtime error
Runtime error
Update InitialMarkups.py
Browse files- InitialMarkups.py +19 -13
InitialMarkups.py
CHANGED
|
@@ -34,11 +34,12 @@ from fuzzywuzzy import fuzz
|
|
| 34 |
import copy
|
| 35 |
import tsadropboxretrieval
|
| 36 |
|
|
|
|
| 37 |
|
| 38 |
def changepdflinks(data_list_JSON, pdflink):
|
| 39 |
print('Received JSON:', data_list_JSON)
|
| 40 |
|
| 41 |
-
# Ensure
|
| 42 |
if isinstance(data_list_JSON, str):
|
| 43 |
try:
|
| 44 |
data_list_JSON = json.loads(data_list_JSON)
|
|
@@ -47,29 +48,34 @@ def changepdflinks(data_list_JSON, pdflink):
|
|
| 47 |
elif not isinstance(data_list_JSON, list):
|
| 48 |
raise ValueError(f"Input must be JSON string or list, got {type(data_list_JSON)}")
|
| 49 |
|
|
|
|
|
|
|
|
|
|
| 50 |
for entry in data_list_JSON:
|
| 51 |
old_url = entry.get("NBSLink", "")
|
| 52 |
if not old_url:
|
| 53 |
continue
|
| 54 |
|
| 55 |
parsed = urllib.parse.urlparse(old_url)
|
| 56 |
-
query = urllib.parse.parse_qs(parsed.query)
|
| 57 |
-
|
| 58 |
-
# Replace only if present
|
| 59 |
-
if "pdfLink" in query:
|
| 60 |
-
# Decode old link for readability
|
| 61 |
-
decoded_pdf_link = urllib.parse.unquote(query["pdfLink"][0])
|
| 62 |
-
|
| 63 |
-
# Assign the new one (encode once)
|
| 64 |
-
query["pdfLink"] = [urllib.parse.quote(pdflink, safe=":/")]
|
| 65 |
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
|
|
|
|
|
|
| 69 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
return data_list_JSON
|
| 71 |
|
| 72 |
|
|
|
|
| 73 |
def get_regular_font_size_and_color(doc):
|
| 74 |
font_sizes = []
|
| 75 |
colors = []
|
|
|
|
| 34 |
import copy
|
| 35 |
import tsadropboxretrieval
|
| 36 |
|
| 37 |
+
import json, urllib.parse, copy
|
| 38 |
|
| 39 |
def changepdflinks(data_list_JSON, pdflink):
|
| 40 |
print('Received JSON:', data_list_JSON)
|
| 41 |
|
| 42 |
+
# Ensure list of dicts
|
| 43 |
if isinstance(data_list_JSON, str):
|
| 44 |
try:
|
| 45 |
data_list_JSON = json.loads(data_list_JSON)
|
|
|
|
| 48 |
elif not isinstance(data_list_JSON, list):
|
| 49 |
raise ValueError(f"Input must be JSON string or list, got {type(data_list_JSON)}")
|
| 50 |
|
| 51 |
+
# Work on a safe copy
|
| 52 |
+
data_list_JSON = copy.deepcopy(data_list_JSON)
|
| 53 |
+
|
| 54 |
for entry in data_list_JSON:
|
| 55 |
old_url = entry.get("NBSLink", "")
|
| 56 |
if not old_url:
|
| 57 |
continue
|
| 58 |
|
| 59 |
parsed = urllib.parse.urlparse(old_url)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
+
# Extract page/zoom fragment (if present)
|
| 62 |
+
fragment = parsed.fragment # e.g. "page=3&zoom=150"
|
| 63 |
+
print(fragment)
|
| 64 |
+
# Encode the new pdf link safely
|
| 65 |
+
encoded_pdf = urllib.parse.quote(pdflink, safe=":/?=&")
|
| 66 |
|
| 67 |
+
# Construct the new final link: encoded pdf link + old fragment
|
| 68 |
+
if fragment:
|
| 69 |
+
new_url = f"{encoded_pdf}#{fragment}"
|
| 70 |
+
else:
|
| 71 |
+
new_url = encoded_pdf
|
| 72 |
+
print(newurl)
|
| 73 |
+
entry["NBSLink"] = 'https://findconsole-initialmarkups.hf.space/view-pdf?pdfLink='+new_url
|
| 74 |
+
print(entry["NBSLink"] )
|
| 75 |
return data_list_JSON
|
| 76 |
|
| 77 |
|
| 78 |
+
|
| 79 |
def get_regular_font_size_and_color(doc):
|
| 80 |
font_sizes = []
|
| 81 |
colors = []
|