InitialMarkups2

Sleeping

App Files Files Community

Marthee commited on Feb 7, 2025

Commit

1f03351

verified ·

1 Parent(s): f5d33ec

Update Find_Hyperlinking_text.py

Browse files

Files changed (1) hide show

Find_Hyperlinking_text.py +29 -5

Find_Hyperlinking_text.py CHANGED Viewed

@@ -265,12 +265,36 @@ def annotate_text_from_pdf(pdfshareablelinks, LISTheading_to_search):
                                             annot.update()
                                             groupmainheadingFromArray = [item for item in merged_groupheadings if previous_header in item]
-                                            NBSlinkeach='pdfLink='+link+'&keyword='+NBS_heading+'#page='+str(pageNumberFound)+'&zoom='+str(highlight_rect)
-                                            encoded_link = urllib.parse.quote(NBSlinkeach, safe='')
                                             if len(groupmainheadingFromArray) > 0:
-                                                print('LINKLINK:',baselink+encoded_link)
-                                                df = pd.concat([df, pd.DataFrame([{"NBSLink":baselink+encoded_link,"NBS": NBS_heading, 'head above 1': header2, "head above 2": groupmainheadingFromArray[0]}])], ignore_index=True)
-                                        # Highlight the text
                             if collecting_text:
                                 annot = page.add_highlight_annot(highlight_rect)
                                 annot.update()

                                             annot.update()
                                             groupmainheadingFromArray = [item for item in merged_groupheadings if previous_header in item]
+                                            # Build the query parameters
+                                            params = {
+                                                'pdfLink': link,  # Your PDF link
+                                                'keyword': NBS_heading,  # Your keyword (could be a string or list)
+                                                'page': str(pageNumberFound),
+                                                'zoom': str(highlight_rect)
+                                            }
+                                            # Encode each parameter individually
+                                            encoded_params = {key: urllib.parse.quote(value, safe='') for key, value in params.items()}
+                                            # Construct the final encoded link
+                                            encoded_link = '&'.join([f"{key}={value}" for key, value in encoded_params.items()])
+                                            # Combine with the base link
+                                            final_url = baselink + encoded_link
+                                            # Optionally, add the URL to a DataFrame
                                             if len(groupmainheadingFromArray) > 0:
+                                                df = pd.concat([df, pd.DataFrame([{
+                                                    "NBSLink": final_url,
+                                                    "NBS": NBS_heading,
+                                                    'head above 1': header2,
+                                                    "head above 2": groupmainheadingFromArray[0]
+                                                }])], ignore_index=True)
+                                            print("Final URL:", final_url)
+                                            # if len(groupmainheadingFromArray) > 0:
+                                            #     df = pd.concat([df, pd.DataFrame([{"NBSLink":baselink+encoded_link,"NBS": NBS_heading, 'head above 1': header2, "head above 2": groupmainheadingFromArray[0]}])], ignore_index=True)
                             if collecting_text:
                                 annot = page.add_highlight_annot(highlight_rect)
                                 annot.update()