InitialMarkups2

Sleeping

App Files Files Community

Marthee commited on Apr 29, 2025

Commit

47356e3

verified ·

1 Parent(s): 5e8acd5

Update findspecsv1.py

Browse files

Files changed (1) hide show

findspecsv1.py +3 -3

findspecsv1.py CHANGED Viewed

@@ -459,7 +459,7 @@ def extract_section_under_header(pdf_path, target_header_LIST):
                             if header_font_sizes:
                                 matched_header_font_size = max(header_font_sizes)
                             print(f"📥 Start collecting after header: {combined_line} (Font size: {matched_header_font_size})")
                             # Collect the header line text and bbox too!
                             collected_lines.append(line_text)
@@ -527,7 +527,7 @@ def extract_section_under_header(pdf_path, target_header_LIST):
                                     zoom = 200
                                     zoom_str = f"{zoom},{left},{top}"
                                     print('zoooom',zoom_str)
-                                    pageNumberFound = page_num +1
                                     params = {
                                         'pdfLink': pdf_path,  # Your PDF link
                                         'keyword': heading_to_search,  # Your keyword (could be a string or list)
@@ -541,7 +541,7 @@ def extract_section_under_header(pdf_path, target_header_LIST):
                                     # Correctly construct the final URL with page and zoom
                                     zoom_str = f"{zoom},{left},{top}"
-                                    final_url = f"{baselink}{encoded_link}#page={str(page_num)}&zoom={zoom_str}"
                                     print(final_url)
                                     # Get current date and time
                                     now = datetime.now()

                             if header_font_sizes:
                                 matched_header_font_size = max(header_font_sizes)
                             print(f"📥 Start collecting after header: {combined_line} (Font size: {matched_header_font_size})")
+                            pageNumberFound = page_num +1
                             # Collect the header line text and bbox too!
                             collected_lines.append(line_text)
                                     zoom = 200
                                     zoom_str = f"{zoom},{left},{top}"
                                     print('zoooom',zoom_str)
                                     params = {
                                         'pdfLink': pdf_path,  # Your PDF link
                                         'keyword': heading_to_search,  # Your keyword (could be a string or list)
                                     # Correctly construct the final URL with page and zoom
                                     zoom_str = f"{zoom},{left},{top}"
+                                    final_url = f"{baselink}{encoded_link}#page={str(pageNumberFound)}&zoom={zoom_str}"
                                     print(final_url)
                                     # Get current date and time
                                     now = datetime.now()