File size: 448 Bytes
7780d69
8922fbb
7780d69
 
8922fbb
 
 
 
 
 
0d76c8b
7780d69
8922fbb
 
0d76c8b
 
8922fbb
7780d69
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import fitz  # PyMuPDF
import os

def extract_text(file):
    if not file:
        return ""

    file_ext = os.path.splitext(file.name)[1].lower()

    if file_ext == ".pdf":
        with fitz.open(file.name) as doc:
            return "\n".join([page.get_text() for page in doc])

    elif file_ext == ".txt":
        with open(file.name, "r", encoding="utf-8") as f:
            return f.read()

    else:
        return "Unsupported file type"