import os import tempfile from typing import List from fastapi import UploadFile from aimakerspace.text_utils import CharacterTextSplitter, TextFileLoader, PDFLoader class FileProcessor: def __init__(self): self.text_splitter = CharacterTextSplitter() async def process_file(self, file: UploadFile) -> List[str]: """Process an uploaded file and return text chunks.""" print(f"Processing file: {file.filename}") # Create a temporary file with the correct extension suffix = f".{file.filename.split('.')[-1]}" with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file: # Write the uploaded file content to the temporary file content = await file.read() temp_file.write(content) temp_file.flush() print(f"Created temporary file at: {temp_file.name}") try: # Create appropriate loader based on file type loader = self._get_loader(temp_file.name, file.filename) # Load and process the documents documents = loader.load_documents() texts = self.text_splitter.split_texts(documents) return texts finally: # Clean up the temporary file try: os.unlink(temp_file.name) except Exception as e: print(f"Error cleaning up temporary file: {e}") def _get_loader(self, file_path: str, original_filename: str): """Get the appropriate loader based on file type.""" if original_filename.lower().endswith(".pdf"): return PDFLoader(file_path) return TextFileLoader(file_path)