Marthee commited on
Commit
1cd28b5
·
verified ·
1 Parent(s): e074023

Update pdftotext.py

Browse files
Files changed (1) hide show
  1. pdftotext.py +5 -0
pdftotext.py CHANGED
@@ -4,10 +4,15 @@ import tsadropboxretrieval
4
 
5
 
6
  def texts_from_pdf(dbpdfpath):
 
7
  dbxTeam= tsadropboxretrieval.ADR_Access_DropboxTeam('user')
 
8
  md, res =dbxTeam.files_download(path=dbpdfpath)
 
9
  dataDoc = res.content
 
10
  pdf_document = fitz.open('pdf',dataDoc)
 
11
  alltexts=''
12
  for page_num in range(pdf_document.page_count):
13
  page = pdf_document[page_num]
 
4
 
5
 
6
  def texts_from_pdf(dbpdfpath):
7
+ print('intexts')
8
  dbxTeam= tsadropboxretrieval.ADR_Access_DropboxTeam('user')
9
+ print('dbdone')
10
  md, res =dbxTeam.files_download(path=dbpdfpath)
11
+ print('downloaded')
12
  dataDoc = res.content
13
+ print('l')
14
  pdf_document = fitz.open('pdf',dataDoc)
15
+ print('k')
16
  alltexts=''
17
  for page_num in range(pdf_document.page_count):
18
  page = pdf_document[page_num]