Spaces:
Paused
Paused
| from config import get_settings | |
| import os | |
| def get_file_extension(file_id: str): | |
| return os.path.splitext(file_id)[-1] | |
| def load_file(file_path: str): | |
| if get_settings().CustomLoaders==True: | |
| from ingestion.loaders.pdf_loader import load_pdf | |
| from ingestion.loaders.txt_loader import load_txt | |
| from ingestion.loaders.md_loader import load_md | |
| from ingestion.loaders.docx_loader import load_docx | |
| #Dispatcher | |
| ext = os.path.splitext(file_path)[1].lower() | |
| if ext == ".pdf": | |
| docs = load_pdf(file_path) | |
| elif ext == ".docx": | |
| docs = load_docx(file_path) | |
| elif ext == ".md": | |
| docs = load_md(file_path) | |
| elif ext == ".txt": | |
| docs = load_txt(file_path) | |
| else: | |
| print(f"Unsupported file type: {ext}") | |
| return [] | |
| # Return list of Document objects as-is | |
| return docs | |
| elif get_settings().CustomLoaders==False: | |
| from langchain_community.document_loaders import ( | |
| TextLoader, | |
| Docx2txtLoader, | |
| UnstructuredMarkdownLoader, | |
| PyMuPDFLoader, | |
| ) | |
| extension = get_file_extension(file_path) | |
| if extension == ".txt": | |
| return TextLoader(file_path, encoding="utf8").load() | |
| elif extension == ".docx": | |
| return Docx2txtLoader(file_path).load() | |
| elif extension == ".md": | |
| return UnstructuredMarkdownLoader(file_path).load() | |
| elif extension in [".pdf"]: | |
| return PyMuPDFLoader(file_path).load() | |
| else: | |
| raise ValueError(f"Unsupported file extension: {extension}") | |