Spaces:
Running
Running
Update pdftotext.py
Browse files- pdftotext.py +11 -0
pdftotext.py
CHANGED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import fitz
|
| 2 |
+
|
| 3 |
+
def texts_from_pdf(input_pdf_data):
|
| 4 |
+
pdf_document = fitz.open('pdf',input_pdf_data)
|
| 5 |
+
|
| 6 |
+
for page_num in range(pdf_document.page_count):
|
| 7 |
+
page = pdf_document[page_num]
|
| 8 |
+
text_instances = page.get_text()
|
| 9 |
+
|
| 10 |
+
print(text_instances)
|
| 11 |
+
return text_instances
|