import os from langchain_community.document_loaders import UnstructuredMarkdownLoader, PyPDFLoader from fastapi import UploadFile import shutil async def process_file(file: UploadFile) -> str: """ Saves the uploaded file temporarily, loads its content based on extension, and returns the text content. """ temp_filename = f"temp_{file.filename}" with open(temp_filename, "wb") as buffer: shutil.copyfileobj(file.file, buffer) content = "" try: if temp_filename.endswith(".md"): loader = UnstructuredMarkdownLoader(temp_filename) docs = loader.load() content = "\n\n".join([d.page_content for d in docs]) elif temp_filename.endswith(".pdf"): loader = PyPDFLoader(temp_filename) docs = loader.load() content = "\n\n".join([d.page_content for d in docs]) else: # Fallback for text files with open(temp_filename, "r", encoding="utf-8") as f: content = f.read() except Exception as e: print(f"Error processing file: {e}") content = "Error processing file." finally: if os.path.exists(temp_filename): os.remove(temp_filename) return content