Marthee commited on
Commit
82952d5
·
verified ·
1 Parent(s): 63cb90c

Update pdftotext.py

Browse files
Files changed (1) hide show
  1. pdftotext.py +8 -3
pdftotext.py CHANGED
@@ -1,7 +1,13 @@
1
  import fitz
2
 
3
- def texts_from_pdf(input_pdf_data):
4
- pdf_document = fitz.open('pdf',input_pdf_data)
 
 
 
 
 
 
5
  alltexts=''
6
  for page_num in range(pdf_document.page_count):
7
  page = pdf_document[page_num]
@@ -10,4 +16,3 @@ def texts_from_pdf(input_pdf_data):
10
 
11
  # alltexts = alltexts.replace('\n', ' ')
12
  return alltexts
13
-
 
1
  import fitz
2
 
3
+ import tsadropboxretrieval
4
+
5
+
6
+ def texts_from_pdf(dbpdfpath):
7
+ dbxTeam= tsadropboxretrieval.ADR_Access_DropboxTeam('user')
8
+ md, res =dbxTeam.files_download(path=dbpdfpath)
9
+ dataDoc = res.content
10
+ pdf_document = fitz.open('pdf',dataDoc)
11
  alltexts=''
12
  for page_num in range(pdf_document.page_count):
13
  page = pdf_document[page_num]
 
16
 
17
  # alltexts = alltexts.replace('\n', ' ')
18
  return alltexts