Marthee commited on
Commit
4ee4eb2
·
verified ·
1 Parent(s): 0aea4e4

Update pdftotext.py

Browse files
Files changed (1) hide show
  1. pdftotext.py +14 -0
pdftotext.py CHANGED
@@ -139,6 +139,20 @@ def apiFiltering(apitext):
139
  "bqcode": detail.get('bqcodelibrary', {}).get('bqcode')
140
  })
141
  return filtered_items
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  # import fitz
143
 
144
  # import tsadropboxretrieval
 
139
  "bqcode": detail.get('bqcodelibrary', {}).get('bqcode')
140
  })
141
  return filtered_items
142
+
143
+
144
+
145
+ import fitz
146
+
147
+ def texts_from_pdfAllText(input_pdf_data):
148
+ pdf_document = fitz.open('pdf',input_pdf_data)
149
+
150
+ for page_num in range(pdf_document.page_count):
151
+ page = pdf_document[page_num]
152
+ text_instances = page.get_text()
153
+
154
+ print(text_instances)
155
+ return text_instances
156
  # import fitz
157
 
158
  # import tsadropboxretrieval