Spaces:
Runtime error
Runtime error
Update pdftotext.py
Browse files- pdftotext.py +5 -5
pdftotext.py
CHANGED
|
@@ -2,12 +2,12 @@ import fitz
|
|
| 2 |
|
| 3 |
def texts_from_pdf(input_pdf_data):
|
| 4 |
pdf_document = fitz.open('pdf',input_pdf_data)
|
| 5 |
-
alltexts=
|
| 6 |
for page_num in range(pdf_document.page_count):
|
| 7 |
page = pdf_document[page_num]
|
| 8 |
text_instances = page.get_text()
|
| 9 |
-
alltexts
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
print(alltexts)
|
| 13 |
return alltexts
|
|
|
|
|
|
| 2 |
|
| 3 |
def texts_from_pdf(input_pdf_data):
|
| 4 |
pdf_document = fitz.open('pdf',input_pdf_data)
|
| 5 |
+
alltexts=''
|
| 6 |
for page_num in range(pdf_document.page_count):
|
| 7 |
page = pdf_document[page_num]
|
| 8 |
text_instances = page.get_text()
|
| 9 |
+
alltexts+=text_instances
|
| 10 |
+
|
| 11 |
+
alltexts = alltexts.replace('\n', ' ')
|
|
|
|
| 12 |
return alltexts
|
| 13 |
+
|