Marthee commited on
Commit
09f4b44
·
verified ·
1 Parent(s): a5d06df

Update InitialMarkups.py

Browse files
Files changed (1) hide show
  1. InitialMarkups.py +48 -1
InitialMarkups.py CHANGED
@@ -6,7 +6,7 @@ Automatically generated by Colab.
6
  Original file is located at
7
  https://colab.research.google.com/drive/12XfVkmKmN3oVjHhLVE0_GgkftgArFEK2
8
  """
9
- baselink='https://find-initialmarkups.hf.space/view-pdf?'
10
 
11
 
12
 
@@ -898,6 +898,53 @@ def extract_section_under_header(pdf_path):
898
  current_bbox[page_num] = header_bbox
899
 
900
  last_y1s[page_num] = header_bbox[3]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
901
  i += 2
902
  continue
903
  if collecting:
 
6
  Original file is located at
7
  https://colab.research.google.com/drive/12XfVkmKmN3oVjHhLVE0_GgkftgArFEK2
8
  """
9
+ baselink='https://findconsole-initialmarkups.hf.space/view-pdf?'
10
 
11
 
12
 
 
898
  current_bbox[page_num] = header_bbox
899
 
900
  last_y1s[page_num] = header_bbox[3]
901
+ x0, y0, x1, y1 = header_bbox
902
+ zoom = 200
903
+ left = int(x0)
904
+ top = int(y0)
905
+ zoom_str = f"{zoom},{left},{top}"
906
+ pageNumberFound = page_num + 1
907
+
908
+ # Build the query parameters
909
+ params = {
910
+ 'pdfLink': pdf_path, # Your PDF link
911
+ 'keyword': heading_to_search, # Your keyword (could be a string or list)
912
+ }
913
+
914
+ # URL encode each parameter
915
+ encoded_params = {key: urllib.parse.quote(value, safe='') for key, value in params.items()}
916
+
917
+ # Construct the final encoded link
918
+ encoded_link = '&'.join([f"{key}={value}" for key, value in encoded_params.items()])
919
+
920
+ # Correctly construct the final URL with page and zoom
921
+ final_url = f"{baselink}{encoded_link}#page={str(pageNumberFound)}&zoom={zoom_str}"
922
+
923
+ # Get current date and time
924
+ now = datetime.now()
925
+
926
+ # Format the output
927
+ formatted_time = now.strftime("%d/%m/%Y %I:%M:%S %p")
928
+ # Optionally, add the URL to a DataFrame
929
+
930
+
931
+ data_entry = {
932
+ "NBSLink": final_url,
933
+ "Subject": heading_to_search,
934
+ "Page": str(pageNumberFound),
935
+ "Author": "ADR",
936
+ "Creation Date": formatted_time,
937
+ "Layer": "Initial",
938
+ "Code": "to be added",
939
+ "head above 1": paths[-2],
940
+ "head above 2": paths[0]
941
+ }
942
+ data_list_JSON.append(data_entry)
943
+
944
+ # Convert list to JSON
945
+ json_output = json.dumps(data_list_JSON, indent=4)
946
+
947
+ print("Final URL:", final_url)
948
  i += 2
949
  continue
950
  if collecting: