Marthee commited on
Commit
b88bc67
·
verified ·
1 Parent(s): 2d44306

Update pdftotext.py

Browse files
Files changed (1) hide show
  1. pdftotext.py +5 -5
pdftotext.py CHANGED
@@ -2,12 +2,12 @@ import fitz
2
 
3
  def texts_from_pdf(input_pdf_data):
4
  pdf_document = fitz.open('pdf',input_pdf_data)
5
- alltexts=[]
6
  for page_num in range(pdf_document.page_count):
7
  page = pdf_document[page_num]
8
  text_instances = page.get_text()
9
- alltexts.append(text_instances)
10
-
11
- page.apply_redactions()
12
- print(alltexts)
13
  return alltexts
 
 
2
 
3
  def texts_from_pdf(input_pdf_data):
4
  pdf_document = fitz.open('pdf',input_pdf_data)
5
+ alltexts=''
6
  for page_num in range(pdf_document.page_count):
7
  page = pdf_document[page_num]
8
  text_instances = page.get_text()
9
+ alltexts+=text_instances
10
+
11
+ alltexts = alltexts.replace('\n', ' ')
 
12
  return alltexts
13
+