Spaces:
Runtime error
Runtime error
Update pdftotext.py
Browse files- pdftotext.py +8 -3
pdftotext.py
CHANGED
|
@@ -1,7 +1,13 @@
|
|
| 1 |
import fitz
|
| 2 |
|
| 3 |
-
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
alltexts=''
|
| 6 |
for page_num in range(pdf_document.page_count):
|
| 7 |
page = pdf_document[page_num]
|
|
@@ -10,4 +16,3 @@ def texts_from_pdf(input_pdf_data):
|
|
| 10 |
|
| 11 |
# alltexts = alltexts.replace('\n', ' ')
|
| 12 |
return alltexts
|
| 13 |
-
|
|
|
|
| 1 |
import fitz
|
| 2 |
|
| 3 |
+
import tsadropboxretrieval
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def texts_from_pdf(dbpdfpath):
|
| 7 |
+
dbxTeam= tsadropboxretrieval.ADR_Access_DropboxTeam('user')
|
| 8 |
+
md, res =dbxTeam.files_download(path=dbpdfpath)
|
| 9 |
+
dataDoc = res.content
|
| 10 |
+
pdf_document = fitz.open('pdf',dataDoc)
|
| 11 |
alltexts=''
|
| 12 |
for page_num in range(pdf_document.page_count):
|
| 13 |
page = pdf_document[page_num]
|
|
|
|
| 16 |
|
| 17 |
# alltexts = alltexts.replace('\n', ' ')
|
| 18 |
return alltexts
|
|
|