Spaces:
Running
Running
| import os | |
| from langchain_community.document_loaders import UnstructuredMarkdownLoader, PyPDFLoader | |
| from fastapi import UploadFile | |
| import shutil | |
| async def process_file(file: UploadFile) -> str: | |
| """ | |
| Saves the uploaded file temporarily, loads its content based on extension, | |
| and returns the text content. | |
| """ | |
| temp_filename = f"temp_{file.filename}" | |
| with open(temp_filename, "wb") as buffer: | |
| shutil.copyfileobj(file.file, buffer) | |
| content = "" | |
| try: | |
| if temp_filename.endswith(".md"): | |
| loader = UnstructuredMarkdownLoader(temp_filename) | |
| docs = loader.load() | |
| content = "\n\n".join([d.page_content for d in docs]) | |
| elif temp_filename.endswith(".pdf"): | |
| loader = PyPDFLoader(temp_filename) | |
| docs = loader.load() | |
| content = "\n\n".join([d.page_content for d in docs]) | |
| else: | |
| # Fallback for text files | |
| with open(temp_filename, "r", encoding="utf-8") as f: | |
| content = f.read() | |
| except Exception as e: | |
| print(f"Error processing file: {e}") | |
| content = "Error processing file." | |
| finally: | |
| if os.path.exists(temp_filename): | |
| os.remove(temp_filename) | |
| return content | |