File size: 396 Bytes
fa396c8
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
from PyPDF2 import PdfReader
from docx import Document

def load_document(path: str) -> str:
    if path.endswith(".pdf"):
        reader = PdfReader(path)
        return " ".join(page.extract_text() or "" for page in reader.pages)

    if path.endswith(".docx"):
        doc = Document(path)
        return " ".join(p.text for p in doc.paragraphs)

    raise ValueError("Unsupported file type")