Marthee commited on
Commit
ad8236d
·
verified ·
1 Parent(s): 550a176

Update pdftotext.py

Browse files
Files changed (1) hide show
  1. pdftotext.py +4 -3
pdftotext.py CHANGED
@@ -2,8 +2,7 @@ import fitz # PyMuPDF
2
  from io import BytesIO
3
  import re
4
  import requests
5
-
6
- def texts_from_pdf(pdfshareablelink, heading_to_search):
7
  print('intexts')
8
 
9
  pdf_content = None
@@ -87,6 +86,7 @@ def texts_from_pdf(pdfshareablelink, heading_to_search):
87
  if heading_pattern.match(span_text) and span_text != heading_to_search:
88
  print(f"Ending collection at heading: {span_text}")
89
  collecting_text = False # Stop collecting
 
90
  return all_text.strip() # Return collected text
91
 
92
  # If we're collecting text, add it to the output
@@ -112,7 +112,8 @@ def texts_from_pdf(pdfshareablelink, heading_to_search):
112
  all_text += current_line.strip() + '\n'
113
  current_line = "" # Reset for the next line
114
 
115
- return all_text.strip() if f10_count == 2 else "Second heading not found"
 
116
 
117
  # import fitz
118
 
 
2
  from io import BytesIO
3
  import re
4
  import requests
5
+ def texts_from_pdf(pdfshareablelink, heading_to_search):
 
6
  print('intexts')
7
 
8
  pdf_content = None
 
86
  if heading_pattern.match(span_text) and span_text != heading_to_search:
87
  print(f"Ending collection at heading: {span_text}")
88
  collecting_text = False # Stop collecting
89
+
90
  return all_text.strip() # Return collected text
91
 
92
  # If we're collecting text, add it to the output
 
112
  all_text += current_line.strip() + '\n'
113
  current_line = "" # Reset for the next line
114
 
115
+ # print(f"\nCollected Text:\n{all_text.strip()}")
116
+ return all_text.strip() if f10_count > 1 else "Heading not found"
117
 
118
  # import fitz
119