SQuAD / utils /file_loader.py
tnp554's picture
feat: deploy SQuAD backend with all AI models
09daf0b
import PyPDF2
import docx
def load_txt(file_path):
with open(file_path, "r", encoding="utf-8") as f:
return f.read()
def load_pdf(file_path):
text = ""
with open(file_path, "rb") as f:
reader = PyPDF2.PdfReader(f)
for page in reader.pages:
if page.extract_text():
text += page.extract_text()
return text
def load_docx(file_path):
doc = docx.Document(file_path)
return "\n".join([p.text for p in doc.paragraphs])