InitialMarkups

Runtime error

App Files Files Community

Marthee commited on Feb 7, 2025

Commit

625f65d

verified ·

1 Parent(s): 3a4acfd

Update Find_Hyperlinking_text.py

Browse files

Files changed (1) hide show

Find_Hyperlinking_text.py +9 -8

Find_Hyperlinking_text.py CHANGED Viewed

@@ -265,18 +265,21 @@ def annotate_text_from_pdf(pdfshareablelinks, LISTheading_to_search):
                                             annot.update()
                                             groupmainheadingFromArray = [item for item in merged_groupheadings if previous_header in item]
-                                            # Build the query parameters
                                             params = {
                                                 'pdfLink': link,  # Your PDF link
                                                 'keyword': NBS_heading,  # Your keyword (could be a string or list)
                                             }
                                             encoded_params = {key: urllib.parse.quote(value, safe='') for key, value in params.items()}
                                             # Construct the final encoded link
                                             encoded_link = '&'.join([f"{key}={value}" for key, value in encoded_params.items()])
-                                            # Combine with the base link
-                                            final_url = baselink + encoded_link+'#page='+str(pageNumberFound)+'&zoom=',str(zoom_str)
                                             # Optionally, add the URL to a DataFrame
                                             if len(groupmainheadingFromArray) > 0:
                                                 df = pd.concat([df, pd.DataFrame([{
@@ -285,10 +288,8 @@ def annotate_text_from_pdf(pdfshareablelinks, LISTheading_to_search):
                                                     'head above 1': header2,
                                                     "head above 2": groupmainheadingFromArray[0]
                                                 }])], ignore_index=True)
                                             print("Final URL:", final_url)
-                                            # if len(groupmainheadingFromArray) > 0:
-                                            #     df = pd.concat([df, pd.DataFrame([{"NBSLink":baselink+encoded_link,"NBS": NBS_heading, 'head above 1': header2, "head above 2": groupmainheadingFromArray[0]}])], ignore_index=True)
                             if collecting_text:
                                 annot = page.add_highlight_annot(highlight_rect)

                                             annot.update()
                                             groupmainheadingFromArray = [item for item in merged_groupheadings if previous_header in item]
+                                                        # Build the query parameters
                                             params = {
                                                 'pdfLink': link,  # Your PDF link
                                                 'keyword': NBS_heading,  # Your keyword (could be a string or list)
                                             }
+                                            # URL encode each parameter
                                             encoded_params = {key: urllib.parse.quote(value, safe='') for key, value in params.items()}
                                             # Construct the final encoded link
                                             encoded_link = '&'.join([f"{key}={value}" for key, value in encoded_params.items()])
+                                            # Correctly construct the final URL with page and zoom
+                                            final_url = f"{baselink}{encoded_link}&page={str(pageNumberFound)}&zoom={zoom_str}"
                                             # Optionally, add the URL to a DataFrame
                                             if len(groupmainheadingFromArray) > 0:
                                                 df = pd.concat([df, pd.DataFrame([{
                                                     'head above 1': header2,
                                                     "head above 2": groupmainheadingFromArray[0]
                                                 }])], ignore_index=True)
                                             print("Final URL:", final_url)
                             if collecting_text:
                                 annot = page.add_highlight_annot(highlight_rect)