InitialMarkups

Runtime error

App Files Files Community

Marthee commited on Oct 29, 2025

Commit

242d2f6

verified ·

1 Parent(s): 4a2eef2

Update InitialMarkups.py

Browse files

Files changed (1) hide show

InitialMarkups.py +42 -55

InitialMarkups.py CHANGED Viewed

@@ -39,43 +39,30 @@ import json
 import copy
 import urllib.parse
-def changepdflinks(data_list_JSON, pdflink):
-    print('Received JSON:', data_list_JSON,pdflink)
-    # Ensure list of dicts
-    if isinstance(data_list_JSON, str):
-        try:
-            data_list_JSON = json.loads(data_list_JSON)
-        except json.JSONDecodeError:
-            raise ValueError(f"Invalid JSON string passed: {data_list_JSON[:200]}")
-    elif not isinstance(data_list_JSON, list):
-        raise ValueError(f"Input must be JSON string or list, got {type(data_list_JSON)}")
-    data_list_JSON = copy.deepcopy(data_list_JSON)
-    for entry in data_list_JSON:
-        old_url = entry.get("NBSLink", "")
-        print('old_url',old_url)
-        if not old_url:
-            continue
-        parsed = urllib.parse.urlparse(old_url)
-        fragment = parsed.fragment  # e.g. "page=3&zoom=150"
-        print("Fragment:", fragment)
-        # Encode the provided PDF link for safe use in query string
-        encoded_pdf = urllib.parse.quote(pdflink, safe='')
-        # Build final view link
-        if fragment:
-            new_url = f"https://findconsole-initialmarkups.hf.space/view-pdf?pdfLink={encoded_pdf}#{fragment}"
-        else:
-            new_url = f"https://findconsole-initialmarkups.hf.space/view-pdf?pdfLink={encoded_pdf}"
-        print('urlsent:',pdflink)
-        print("New URL:", new_url)
-        entry["NBSLink"] = new_url
-    return data_list_JSON
@@ -875,19 +862,19 @@ def extract_section_under_header(multiplePDF_Paths):
                                     pageNumberFound = page_num + 1
                                 # Build the query parameters
-                                    params = {
-                                        'pdfLink': pdf_path,  # Your PDF link
-                                        'keyword': heading_to_search,  # Your keyword (could be a string or list)
-                                    }
-                                    # URL encode each parameter
-                                    encoded_params = {key: urllib.parse.quote(value, safe='') for key, value in params.items()}
-                                    # Construct the final encoded link
-                                    encoded_link = '&'.join([f"{key}={value}" for key, value in encoded_params.items()])
-                                    # Correctly construct the final URL with page and zoom
-                                    final_url = f"{baselink}{encoded_link}#page={str(pageNumberFound)}&zoom={zoom_str}"
                                     # Get current date and time
                                     now = datetime.now()
@@ -898,7 +885,7 @@ def extract_section_under_header(multiplePDF_Paths):
                                     data_entry = {
-                                            "NBSLink": final_url,
                                             "Subject": heading_to_search,
                                             "Page": str(pageNumberFound),
                                             "Author": "ADR",
@@ -969,19 +956,19 @@ def extract_section_under_header(multiplePDF_Paths):
                                         pageNumberFound = page_num + 1
                                     # Build the query parameters
-                                        params = {
-                                            'pdfLink': pdf_path,  # Your PDF link
-                                            'keyword': heading_to_search,  # Your keyword (could be a string or list)
-                                        }
-                                        # URL encode each parameter
-                                        encoded_params = {key: urllib.parse.quote(value, safe='') for key, value in params.items()}
-                                        # Construct the final encoded link
-                                        encoded_link = '&'.join([f"{key}={value}" for key, value in encoded_params.items()])
-                                        # Correctly construct the final URL with page and zoom
-                                        final_url = f"{baselink}{encoded_link}#page={str(pageNumberFound)}&zoom={zoom_str}"
                                         # Get current date and time
                                         now = datetime.now()
@@ -992,7 +979,7 @@ def extract_section_under_header(multiplePDF_Paths):
                                         data_entry = {
-                                                "NBSLink": final_url,
                                                 "Subject": heading_to_search,
                                                 "Page": str(pageNumberFound),
                                                 "Author": "ADR",

 import copy
 import urllib.parse
+import urllib.parse
+def setpdflinks(json_data, pdf_path):
+    base_viewer_link = "https://findconsole-initialmarkups.hf.space/view-pdf?"
+    updated_json = []
+    for entry in json_data:
+        # Extract needed fields
+        zoom_str = entry.get("NBSLink", "")
+        # Encode the pdf link safely for URL usage
+        encoded_pdf_link = urllib.parse.quote(pdf_path, safe='')
+        # Construct the final link
+        final_url = f"{base_viewer_link}pdfLink={encoded_pdf_link}#{zoom_str}"
+        # Replace the old NBSLink value with the full URL
+        entry["NBSLink"] = final_url
+        updated_json.append(entry)
+    return updated_json
                                     pageNumberFound = page_num + 1
                                 # Build the query parameters
+                                    # params = {
+                                    #     'pdfLink': pdf_path,  # Your PDF link
+                                    #     'keyword': heading_to_search,  # Your keyword (could be a string or list)
+                                    # }
+                                    # # URL encode each parameter
+                                    # encoded_params = {key: urllib.parse.quote(value, safe='') for key, value in params.items()}
+                                    # # Construct the final encoded link
+                                    # encoded_link = '&'.join([f"{key}={value}" for key, value in encoded_params.items()])
+                                    # # Correctly construct the final URL with page and zoom
+                                    # final_url = f"{baselink}{encoded_link}#page={str(pageNumberFound)}&zoom={zoom_str}"
                                     # Get current date and time
                                     now = datetime.now()
                                     data_entry = {
+                                            "NBSLink": zoom_str,
                                             "Subject": heading_to_search,
                                             "Page": str(pageNumberFound),
                                             "Author": "ADR",
                                         pageNumberFound = page_num + 1
                                     # Build the query parameters
+                                        # params = {
+                                        #     'pdfLink': pdf_path,  # Your PDF link
+                                        #     'keyword': heading_to_search,  # Your keyword (could be a string or list)
+                                        # }
+                                        # # URL encode each parameter
+                                        # encoded_params = {key: urllib.parse.quote(value, safe='') for key, value in params.items()}
+                                        # # Construct the final encoded link
+                                        # encoded_link = '&'.join([f"{key}={value}" for key, value in encoded_params.items()])
+                                        # # Correctly construct the final URL with page and zoom
+                                        # final_url = f"{baselink}{encoded_link}#page={str(pageNumberFound)}&zoom={zoom_str}"
                                         # Get current date and time
                                         now = datetime.now()
                                         data_entry = {
+                                                "NBSLink": zoom_str,
                                                 "Subject": heading_to_search,
                                                 "Page": str(pageNumberFound),
                                                 "Author": "ADR",