Marthee commited on
Commit
58e3f42
·
verified ·
1 Parent(s): 4221eea

Update InitialMarkups.py

Browse files
Files changed (1) hide show
  1. InitialMarkups.py +3 -23
InitialMarkups.py CHANGED
@@ -13,8 +13,7 @@ tobebilledonlyLink='https://adr.trevorsadd.co.uk/api/view-pdf-tobebilled?'
13
 
14
 
15
 
16
- from dateutil.parser import parse as dateparse
17
- from dateutil.parser._parser import ParserError # More specific error
18
  from urllib.parse import urlparse, unquote
19
  import os
20
  from io import BytesIO
@@ -43,7 +42,7 @@ import urllib.parse
43
 
44
  def changepdflinks(json_data, pdf_path):
45
  print('ll , ' ,json_data,pdf_path)
46
- # base_viewer_link = "https://findconsole-initialmarkups.hf.space/view-pdf?"
47
 
48
  updated_json = []
49
  for entry in json_data:
@@ -55,7 +54,7 @@ def changepdflinks(json_data, pdf_path):
55
  encoded_pdf_link = urllib.parse.quote(pdf_path, safe='')
56
 
57
  # Construct the final link
58
- final_url = f"{baselink}pdfLink={encoded_pdf_link}#page={str(page_str)}&zoom={zoom_str}"
59
 
60
  # Replace the old NBSLink value with the full URL
61
  entry["NBSLink"] = final_url
@@ -997,25 +996,6 @@ def extract_section_under_header(multiplePDF_Paths):
997
  i += 2
998
  continue
999
  if collecting:
1000
- # ----------------------------------------------------
1001
- # ADD THIS BLOCK IN ITS PLACE
1002
- # ----------------------------------------------------
1003
- # NEW: Check if the line is a date, and if so, stop collecting
1004
- try:
1005
- # Use the 'line_text_raw' we defined earlier for an accurate parse
1006
- dateparse(line_text, fuzzy=True)
1007
-
1008
- # --- Date Found: Stop Collecting ---
1009
- print(f"🛑 Stop at date: '{line_text}'")
1010
- collecting = False
1011
- done = True # Mark this header as finished
1012
- break_collecting = True # Signal outer loops to stop
1013
- break # Break this 'while' loop
1014
-
1015
- except (ParserError, ValueError, OverflowError):
1016
- # No date found, continue normally to process this line
1017
- pass
1018
- # ----------------------------------------------------
1019
  norm_line = normalize_text(line_text)
1020
 
1021
  # Optimized URL check
 
13
 
14
 
15
 
16
+
 
17
  from urllib.parse import urlparse, unquote
18
  import os
19
  from io import BytesIO
 
42
 
43
  def changepdflinks(json_data, pdf_path):
44
  print('ll , ' ,json_data,pdf_path)
45
+ base_viewer_link = "https://findconsole-initialmarkups.hf.space/view-pdf?"
46
 
47
  updated_json = []
48
  for entry in json_data:
 
54
  encoded_pdf_link = urllib.parse.quote(pdf_path, safe='')
55
 
56
  # Construct the final link
57
+ final_url = f"{base_viewer_link}pdfLink={encoded_pdf_link}#page={str(page_str)}&zoom={zoom_str}"
58
 
59
  # Replace the old NBSLink value with the full URL
60
  entry["NBSLink"] = final_url
 
996
  i += 2
997
  continue
998
  if collecting:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
999
  norm_line = normalize_text(line_text)
1000
 
1001
  # Optimized URL check