File size: 1,231 Bytes
1595f22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# %%writefile file_loader.py
from PyPDF2 import PdfReader
from docx import Document


def load_text_from_file(uploaded_file):
    if "." not in uploaded_file.name:
        raise ValueError("File has no extension")

    file_type = uploaded_file.name.split(".")[-1].lower()

    try:
        if file_type == "pdf":
            reader = PdfReader(uploaded_file)
            return "\n".join([page.extract_text() for page in reader.pages])

        elif file_type in ["docx", "doc"]:
            doc = Document(uploaded_file)
            return "\n".join([para.text for para in doc.paragraphs])

        elif file_type == "txt":
            content = uploaded_file.read()
            for encoding in ["utf-8", "latin-1", "cp1252"]:
                try:
                    return content.decode(encoding)
                except UnicodeDecodeError:
                    continue
            raise ValueError("Unable to decode text file")

        else:
            raise ValueError(f"Unsupported file type: {file_type}")

    except ImportError as e:
        raise ImportError(f"Required library not installed: {e}")
    except Exception as e:
        raise ValueError(f"Error processing file: {e}")