Marthee commited on
Commit
1cbecc5
·
verified ·
1 Parent(s): e78e8fc

Update pdftotext.py

Browse files
Files changed (1) hide show
  1. pdftotext.py +11 -0
pdftotext.py CHANGED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fitz
2
+
3
+ def texts_from_pdf(input_pdf_data):
4
+ pdf_document = fitz.open('pdf',input_pdf_data)
5
+
6
+ for page_num in range(pdf_document.page_count):
7
+ page = pdf_document[page_num]
8
+ text_instances = page.get_text()
9
+
10
+ print(text_instances)
11
+ return text_instances