neerajkalyank commited on
Commit
152d5f0
·
verified ·
1 Parent(s): 87f6a59

Update ingestion.py

Browse files
Files changed (1) hide show
  1. ingestion.py +1 -1
ingestion.py CHANGED
@@ -4,7 +4,7 @@ import docx2txt
4
  def read_file(file):
5
  if file.name.endswith(".pdf"):
6
  reader = PdfReader(file)
7
- return " ".join(p.extract_text() for p in reader.pages)
8
  if file.name.endswith(".docx"):
9
  return docx2txt.process(file)
10
  return file.read().decode("utf-8")
 
4
  def read_file(file):
5
  if file.name.endswith(".pdf"):
6
  reader = PdfReader(file)
7
+ return " ".join(p.extract_text() or "" for p in reader.pages)
8
  if file.name.endswith(".docx"):
9
  return docx2txt.process(file)
10
  return file.read().decode("utf-8")