Spaces:
Sleeping
Sleeping
Update findspecsv1.py
Browse files- findspecsv1.py +3 -3
findspecsv1.py
CHANGED
|
@@ -459,7 +459,7 @@ def extract_section_under_header(pdf_path, target_header_LIST):
|
|
| 459 |
if header_font_sizes:
|
| 460 |
matched_header_font_size = max(header_font_sizes)
|
| 461 |
print(f"📥 Start collecting after header: {combined_line} (Font size: {matched_header_font_size})")
|
| 462 |
-
|
| 463 |
# Collect the header line text and bbox too!
|
| 464 |
collected_lines.append(line_text)
|
| 465 |
|
|
@@ -527,7 +527,7 @@ def extract_section_under_header(pdf_path, target_header_LIST):
|
|
| 527 |
zoom = 200
|
| 528 |
zoom_str = f"{zoom},{left},{top}"
|
| 529 |
print('zoooom',zoom_str)
|
| 530 |
-
|
| 531 |
params = {
|
| 532 |
'pdfLink': pdf_path, # Your PDF link
|
| 533 |
'keyword': heading_to_search, # Your keyword (could be a string or list)
|
|
@@ -541,7 +541,7 @@ def extract_section_under_header(pdf_path, target_header_LIST):
|
|
| 541 |
|
| 542 |
# Correctly construct the final URL with page and zoom
|
| 543 |
zoom_str = f"{zoom},{left},{top}"
|
| 544 |
-
final_url = f"{baselink}{encoded_link}#page={str(
|
| 545 |
print(final_url)
|
| 546 |
# Get current date and time
|
| 547 |
now = datetime.now()
|
|
|
|
| 459 |
if header_font_sizes:
|
| 460 |
matched_header_font_size = max(header_font_sizes)
|
| 461 |
print(f"📥 Start collecting after header: {combined_line} (Font size: {matched_header_font_size})")
|
| 462 |
+
pageNumberFound = page_num +1
|
| 463 |
# Collect the header line text and bbox too!
|
| 464 |
collected_lines.append(line_text)
|
| 465 |
|
|
|
|
| 527 |
zoom = 200
|
| 528 |
zoom_str = f"{zoom},{left},{top}"
|
| 529 |
print('zoooom',zoom_str)
|
| 530 |
+
|
| 531 |
params = {
|
| 532 |
'pdfLink': pdf_path, # Your PDF link
|
| 533 |
'keyword': heading_to_search, # Your keyword (could be a string or list)
|
|
|
|
| 541 |
|
| 542 |
# Correctly construct the final URL with page and zoom
|
| 543 |
zoom_str = f"{zoom},{left},{top}"
|
| 544 |
+
final_url = f"{baselink}{encoded_link}#page={str(pageNumberFound)}&zoom={zoom_str}"
|
| 545 |
print(final_url)
|
| 546 |
# Get current date and time
|
| 547 |
now = datetime.now()
|