File size: 1,680 Bytes
1bc3f18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from config import get_settings
import os

def get_file_extension(file_id: str): 
            return os.path.splitext(file_id)[-1]


def load_file(file_path: str):
    if get_settings().CustomLoaders==True:
        from ingestion.loaders.pdf_loader import load_pdf
        from ingestion.loaders.txt_loader import load_txt
        from ingestion.loaders.md_loader import load_md
        from ingestion.loaders.docx_loader import load_docx


        #Dispatcher 
        
        ext = os.path.splitext(file_path)[1].lower()

        if ext == ".pdf":
            docs = load_pdf(file_path)
        elif ext == ".docx":
            docs = load_docx(file_path)
        elif ext == ".md":
            docs = load_md(file_path)
        elif ext == ".txt":
            docs = load_txt(file_path)
        else:
            print(f"Unsupported file type: {ext}")
            return []

            # Return list of Document objects as-is
        return docs


    elif get_settings().CustomLoaders==False:

        from langchain_community.document_loaders import (
        TextLoader,
        Docx2txtLoader,
        UnstructuredMarkdownLoader,
        PyMuPDFLoader,
        )


        extension = get_file_extension(file_path)

        if extension == ".txt":
            return TextLoader(file_path, encoding="utf8").load()
        elif extension == ".docx":
            return Docx2txtLoader(file_path).load()
        elif extension == ".md":
            return UnstructuredMarkdownLoader(file_path).load()
        elif extension in [".pdf"]:
            return PyMuPDFLoader(file_path).load()
        else:
            raise ValueError(f"Unsupported file extension: {extension}")