Spaces:
Runtime error
Runtime error
Update InitialMarkups.py
Browse files- InitialMarkups.py +48 -1
InitialMarkups.py
CHANGED
|
@@ -6,7 +6,7 @@ Automatically generated by Colab.
|
|
| 6 |
Original file is located at
|
| 7 |
https://colab.research.google.com/drive/12XfVkmKmN3oVjHhLVE0_GgkftgArFEK2
|
| 8 |
"""
|
| 9 |
-
baselink='https://
|
| 10 |
|
| 11 |
|
| 12 |
|
|
@@ -898,6 +898,53 @@ def extract_section_under_header(pdf_path):
|
|
| 898 |
current_bbox[page_num] = header_bbox
|
| 899 |
|
| 900 |
last_y1s[page_num] = header_bbox[3]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 901 |
i += 2
|
| 902 |
continue
|
| 903 |
if collecting:
|
|
|
|
| 6 |
Original file is located at
|
| 7 |
https://colab.research.google.com/drive/12XfVkmKmN3oVjHhLVE0_GgkftgArFEK2
|
| 8 |
"""
|
| 9 |
+
baselink='https://findconsole-initialmarkups.hf.space/view-pdf?'
|
| 10 |
|
| 11 |
|
| 12 |
|
|
|
|
| 898 |
current_bbox[page_num] = header_bbox
|
| 899 |
|
| 900 |
last_y1s[page_num] = header_bbox[3]
|
| 901 |
+
x0, y0, x1, y1 = header_bbox
|
| 902 |
+
zoom = 200
|
| 903 |
+
left = int(x0)
|
| 904 |
+
top = int(y0)
|
| 905 |
+
zoom_str = f"{zoom},{left},{top}"
|
| 906 |
+
pageNumberFound = page_num + 1
|
| 907 |
+
|
| 908 |
+
# Build the query parameters
|
| 909 |
+
params = {
|
| 910 |
+
'pdfLink': pdf_path, # Your PDF link
|
| 911 |
+
'keyword': heading_to_search, # Your keyword (could be a string or list)
|
| 912 |
+
}
|
| 913 |
+
|
| 914 |
+
# URL encode each parameter
|
| 915 |
+
encoded_params = {key: urllib.parse.quote(value, safe='') for key, value in params.items()}
|
| 916 |
+
|
| 917 |
+
# Construct the final encoded link
|
| 918 |
+
encoded_link = '&'.join([f"{key}={value}" for key, value in encoded_params.items()])
|
| 919 |
+
|
| 920 |
+
# Correctly construct the final URL with page and zoom
|
| 921 |
+
final_url = f"{baselink}{encoded_link}#page={str(pageNumberFound)}&zoom={zoom_str}"
|
| 922 |
+
|
| 923 |
+
# Get current date and time
|
| 924 |
+
now = datetime.now()
|
| 925 |
+
|
| 926 |
+
# Format the output
|
| 927 |
+
formatted_time = now.strftime("%d/%m/%Y %I:%M:%S %p")
|
| 928 |
+
# Optionally, add the URL to a DataFrame
|
| 929 |
+
|
| 930 |
+
|
| 931 |
+
data_entry = {
|
| 932 |
+
"NBSLink": final_url,
|
| 933 |
+
"Subject": heading_to_search,
|
| 934 |
+
"Page": str(pageNumberFound),
|
| 935 |
+
"Author": "ADR",
|
| 936 |
+
"Creation Date": formatted_time,
|
| 937 |
+
"Layer": "Initial",
|
| 938 |
+
"Code": "to be added",
|
| 939 |
+
"head above 1": paths[-2],
|
| 940 |
+
"head above 2": paths[0]
|
| 941 |
+
}
|
| 942 |
+
data_list_JSON.append(data_entry)
|
| 943 |
+
|
| 944 |
+
# Convert list to JSON
|
| 945 |
+
json_output = json.dumps(data_list_JSON, indent=4)
|
| 946 |
+
|
| 947 |
+
print("Final URL:", final_url)
|
| 948 |
i += 2
|
| 949 |
continue
|
| 950 |
if collecting:
|