Spaces:
Paused
Paused
Update pdftotext.py
Browse files- pdftotext.py +5 -3
pdftotext.py
CHANGED
|
@@ -2,10 +2,12 @@ import fitz
|
|
| 2 |
|
| 3 |
def texts_from_pdf(input_pdf_data):
|
| 4 |
pdf_document = fitz.open('pdf',input_pdf_data)
|
| 5 |
-
|
| 6 |
for page_num in range(pdf_document.page_count):
|
| 7 |
page = pdf_document[page_num]
|
| 8 |
text_instances = page.get_text()
|
|
|
|
| 9 |
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
| 2 |
|
| 3 |
def texts_from_pdf(input_pdf_data):
|
| 4 |
pdf_document = fitz.open('pdf',input_pdf_data)
|
| 5 |
+
alltexts=[]
|
| 6 |
for page_num in range(pdf_document.page_count):
|
| 7 |
page = pdf_document[page_num]
|
| 8 |
text_instances = page.get_text()
|
| 9 |
+
alltexts.append(text_instances)
|
| 10 |
|
| 11 |
+
page.apply_redactions()
|
| 12 |
+
print(alltexts)
|
| 13 |
+
return alltexts
|