Spaces:
Sleeping
Sleeping
Update pdftotext.py
Browse files- pdftotext.py +5 -0
pdftotext.py
CHANGED
|
@@ -4,10 +4,15 @@ import tsadropboxretrieval
|
|
| 4 |
|
| 5 |
|
| 6 |
def texts_from_pdf(dbpdfpath):
|
|
|
|
| 7 |
dbxTeam= tsadropboxretrieval.ADR_Access_DropboxTeam('user')
|
|
|
|
| 8 |
md, res =dbxTeam.files_download(path=dbpdfpath)
|
|
|
|
| 9 |
dataDoc = res.content
|
|
|
|
| 10 |
pdf_document = fitz.open('pdf',dataDoc)
|
|
|
|
| 11 |
alltexts=''
|
| 12 |
for page_num in range(pdf_document.page_count):
|
| 13 |
page = pdf_document[page_num]
|
|
|
|
| 4 |
|
| 5 |
|
| 6 |
def texts_from_pdf(dbpdfpath):
|
| 7 |
+
print('intexts')
|
| 8 |
dbxTeam= tsadropboxretrieval.ADR_Access_DropboxTeam('user')
|
| 9 |
+
print('dbdone')
|
| 10 |
md, res =dbxTeam.files_download(path=dbpdfpath)
|
| 11 |
+
print('downloaded')
|
| 12 |
dataDoc = res.content
|
| 13 |
+
print('l')
|
| 14 |
pdf_document = fitz.open('pdf',dataDoc)
|
| 15 |
+
print('k')
|
| 16 |
alltexts=''
|
| 17 |
for page_num in range(pdf_document.page_count):
|
| 18 |
page = pdf_document[page_num]
|