AIE-RAG / backend /file_processor.py
mbudisic's picture
Added FastAPI for backend and kept chainlit as the frontend.
8b3bb10
import os
import tempfile
from typing import List
from fastapi import UploadFile
from aimakerspace.text_utils import CharacterTextSplitter, TextFileLoader, PDFLoader
class FileProcessor:
def __init__(self):
self.text_splitter = CharacterTextSplitter()
async def process_file(self, file: UploadFile) -> List[str]:
"""Process an uploaded file and return text chunks."""
print(f"Processing file: {file.filename}")
# Create a temporary file with the correct extension
suffix = f".{file.filename.split('.')[-1]}"
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
# Write the uploaded file content to the temporary file
content = await file.read()
temp_file.write(content)
temp_file.flush()
print(f"Created temporary file at: {temp_file.name}")
try:
# Create appropriate loader based on file type
loader = self._get_loader(temp_file.name, file.filename)
# Load and process the documents
documents = loader.load_documents()
texts = self.text_splitter.split_texts(documents)
return texts
finally:
# Clean up the temporary file
try:
os.unlink(temp_file.name)
except Exception as e:
print(f"Error cleaning up temporary file: {e}")
def _get_loader(self, file_path: str, original_filename: str):
"""Get the appropriate loader based on file type."""
if original_filename.lower().endswith(".pdf"):
return PDFLoader(file_path)
return TextFileLoader(file_path)