Marthee commited on
Commit
625f65d
·
verified ·
1 Parent(s): 3a4acfd

Update Find_Hyperlinking_text.py

Browse files
Files changed (1) hide show
  1. Find_Hyperlinking_text.py +9 -8
Find_Hyperlinking_text.py CHANGED
@@ -265,18 +265,21 @@ def annotate_text_from_pdf(pdfshareablelinks, LISTheading_to_search):
265
  annot.update()
266
  groupmainheadingFromArray = [item for item in merged_groupheadings if previous_header in item]
267
 
268
- # Build the query parameters
269
  params = {
270
  'pdfLink': link, # Your PDF link
271
  'keyword': NBS_heading, # Your keyword (could be a string or list)
272
  }
 
 
273
  encoded_params = {key: urllib.parse.quote(value, safe='') for key, value in params.items()}
 
274
  # Construct the final encoded link
275
  encoded_link = '&'.join([f"{key}={value}" for key, value in encoded_params.items()])
276
-
277
- # Combine with the base link
278
- final_url = baselink + encoded_link+'#page='+str(pageNumberFound)+'&zoom=',str(zoom_str)
279
-
280
  # Optionally, add the URL to a DataFrame
281
  if len(groupmainheadingFromArray) > 0:
282
  df = pd.concat([df, pd.DataFrame([{
@@ -285,10 +288,8 @@ def annotate_text_from_pdf(pdfshareablelinks, LISTheading_to_search):
285
  'head above 1': header2,
286
  "head above 2": groupmainheadingFromArray[0]
287
  }])], ignore_index=True)
288
-
289
  print("Final URL:", final_url)
290
- # if len(groupmainheadingFromArray) > 0:
291
- # df = pd.concat([df, pd.DataFrame([{"NBSLink":baselink+encoded_link,"NBS": NBS_heading, 'head above 1': header2, "head above 2": groupmainheadingFromArray[0]}])], ignore_index=True)
292
 
293
  if collecting_text:
294
  annot = page.add_highlight_annot(highlight_rect)
 
265
  annot.update()
266
  groupmainheadingFromArray = [item for item in merged_groupheadings if previous_header in item]
267
 
268
+ # Build the query parameters
269
  params = {
270
  'pdfLink': link, # Your PDF link
271
  'keyword': NBS_heading, # Your keyword (could be a string or list)
272
  }
273
+
274
+ # URL encode each parameter
275
  encoded_params = {key: urllib.parse.quote(value, safe='') for key, value in params.items()}
276
+
277
  # Construct the final encoded link
278
  encoded_link = '&'.join([f"{key}={value}" for key, value in encoded_params.items()])
279
+
280
+ # Correctly construct the final URL with page and zoom
281
+ final_url = f"{baselink}{encoded_link}&page={str(pageNumberFound)}&zoom={zoom_str}"
282
+
283
  # Optionally, add the URL to a DataFrame
284
  if len(groupmainheadingFromArray) > 0:
285
  df = pd.concat([df, pd.DataFrame([{
 
288
  'head above 1': header2,
289
  "head above 2": groupmainheadingFromArray[0]
290
  }])], ignore_index=True)
291
+
292
  print("Final URL:", final_url)
 
 
293
 
294
  if collecting_text:
295
  annot = page.add_highlight_annot(highlight_rect)