from config import get_settings import os def get_file_extension(file_id: str): return os.path.splitext(file_id)[-1] def load_file(file_path: str): if get_settings().CustomLoaders==True: from ingestion.loaders.pdf_loader import load_pdf from ingestion.loaders.txt_loader import load_txt from ingestion.loaders.md_loader import load_md from ingestion.loaders.docx_loader import load_docx #Dispatcher ext = os.path.splitext(file_path)[1].lower() if ext == ".pdf": docs = load_pdf(file_path) elif ext == ".docx": docs = load_docx(file_path) elif ext == ".md": docs = load_md(file_path) elif ext == ".txt": docs = load_txt(file_path) else: print(f"Unsupported file type: {ext}") return [] # Return list of Document objects as-is return docs elif get_settings().CustomLoaders==False: from langchain_community.document_loaders import ( TextLoader, Docx2txtLoader, UnstructuredMarkdownLoader, PyMuPDFLoader, ) extension = get_file_extension(file_path) if extension == ".txt": return TextLoader(file_path, encoding="utf8").load() elif extension == ".docx": return Docx2txtLoader(file_path).load() elif extension == ".md": return UnstructuredMarkdownLoader(file_path).load() elif extension in [".pdf"]: return PyMuPDFLoader(file_path).load() else: raise ValueError(f"Unsupported file extension: {extension}")