InitialMarkups2

Sleeping

Marthee commited on Oct 4, 2024

Commit

ad8236d

verified ·

1 Parent(s): 550a176

Update pdftotext.py

Files changed (1) hide show

pdftotext.py CHANGED Viewed

@@ -2,8 +2,7 @@ import fitz  # PyMuPDF
 from io import BytesIO
 import re
 import requests
-def texts_from_pdf(pdfshareablelink, heading_to_search):
     print('intexts')
     pdf_content = None
@@ -87,6 +86,7 @@ def texts_from_pdf(pdfshareablelink, heading_to_search):
                                 if heading_pattern.match(span_text) and span_text != heading_to_search:
                                     print(f"Ending collection at heading: {span_text}")
                                     collecting_text = False  # Stop collecting
                                     return all_text.strip()  # Return collected text
                         # If we're collecting text, add it to the output
@@ -112,7 +112,8 @@ def texts_from_pdf(pdfshareablelink, heading_to_search):
             all_text += current_line.strip() + '\n'
             current_line = ""  # Reset for the next line
-    return all_text.strip() if f10_count == 2 else "Second heading not found"
 # import fitz

 from io import BytesIO
 import re
 import requests
+def texts_from_pdf(pdfshareablelink, heading_to_search):
     print('intexts')
     pdf_content = None
                                 if heading_pattern.match(span_text) and span_text != heading_to_search:
                                     print(f"Ending collection at heading: {span_text}")
                                     collecting_text = False  # Stop collecting
                                     return all_text.strip()  # Return collected text
                         # If we're collecting text, add it to the output
             all_text += current_line.strip() + '\n'
             current_line = ""  # Reset for the next line
+    # print(f"\nCollected Text:\n{all_text.strip()}")
+    return all_text.strip() if f10_count > 1 else "Heading not found"
 # import fitz