Marthee commited on
Commit
1f03351
·
verified ·
1 Parent(s): f5d33ec

Update Find_Hyperlinking_text.py

Browse files
Files changed (1) hide show
  1. Find_Hyperlinking_text.py +29 -5
Find_Hyperlinking_text.py CHANGED
@@ -265,12 +265,36 @@ def annotate_text_from_pdf(pdfshareablelinks, LISTheading_to_search):
265
  annot.update()
266
  groupmainheadingFromArray = [item for item in merged_groupheadings if previous_header in item]
267
 
268
- NBSlinkeach='pdfLink='+link+'&keyword='+NBS_heading+'#page='+str(pageNumberFound)+'&zoom='+str(highlight_rect)
269
- encoded_link = urllib.parse.quote(NBSlinkeach, safe='')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  if len(groupmainheadingFromArray) > 0:
271
- print('LINKLINK:',baselink+encoded_link)
272
- df = pd.concat([df, pd.DataFrame([{"NBSLink":baselink+encoded_link,"NBS": NBS_heading, 'head above 1': header2, "head above 2": groupmainheadingFromArray[0]}])], ignore_index=True)
273
- # Highlight the text
 
 
 
 
 
 
 
 
274
  if collecting_text:
275
  annot = page.add_highlight_annot(highlight_rect)
276
  annot.update()
 
265
  annot.update()
266
  groupmainheadingFromArray = [item for item in merged_groupheadings if previous_header in item]
267
 
268
+ # Build the query parameters
269
+ params = {
270
+ 'pdfLink': link, # Your PDF link
271
+ 'keyword': NBS_heading, # Your keyword (could be a string or list)
272
+ 'page': str(pageNumberFound),
273
+ 'zoom': str(highlight_rect)
274
+ }
275
+
276
+ # Encode each parameter individually
277
+ encoded_params = {key: urllib.parse.quote(value, safe='') for key, value in params.items()}
278
+
279
+ # Construct the final encoded link
280
+ encoded_link = '&'.join([f"{key}={value}" for key, value in encoded_params.items()])
281
+
282
+ # Combine with the base link
283
+ final_url = baselink + encoded_link
284
+
285
+ # Optionally, add the URL to a DataFrame
286
  if len(groupmainheadingFromArray) > 0:
287
+ df = pd.concat([df, pd.DataFrame([{
288
+ "NBSLink": final_url,
289
+ "NBS": NBS_heading,
290
+ 'head above 1': header2,
291
+ "head above 2": groupmainheadingFromArray[0]
292
+ }])], ignore_index=True)
293
+
294
+ print("Final URL:", final_url)
295
+ # if len(groupmainheadingFromArray) > 0:
296
+ # df = pd.concat([df, pd.DataFrame([{"NBSLink":baselink+encoded_link,"NBS": NBS_heading, 'head above 1': header2, "head above 2": groupmainheadingFromArray[0]}])], ignore_index=True)
297
+
298
  if collecting_text:
299
  annot = page.add_highlight_annot(highlight_rect)
300
  annot.update()