File size: 536 Bytes
a9a45af
bf38aae
a9a45af
 
 
d4ce689
a9a45af
bf38aae
a9a45af
bf38aae
 
 
a9a45af
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import os
import fitz  # PyMuPDF

def load_text(path):
    file_extension = os.path.splitext(path)[1].lower()

    if file_extension == ".pdf":
        doc = fitz.open(path)
        texts = []
        for page in doc:
            texts.append(page.get_text())
        return "\n".join(texts)    
    elif file_extension == ".txt":
        with open(path, "r", encoding="utf-8") as file:
            return file.read()
    
    else:
        raise ValueError(f"Unsupported file type: {file_extension}. Only PDF and TXT are supported.")