""" api.py — FastAPI REST endpoint for DocMind AI (Multi-File + Memory) Runs on port 7861 alongside the Streamlit app (port 7860). v4 changes: - GET /documents → list all loaded documents - DELETE /documents/{filename} → remove a specific file - Upload now supports up to 5 files (additive) - Reset clears all files """ from fastapi import FastAPI, UploadFile, File, HTTPException from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from typing import List import os import io app = FastAPI(title="DocMind AI API", version="3.0") app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Shared RAG engine instance _rag_engine = None def get_rag(): global _rag_engine if _rag_engine is None: from rag_engine import RAGEngine _rag_engine = RAGEngine() return _rag_engine # ── Models ─────────────────────────────────────────────────────────────────── class QueryRequest(BaseModel): question: str class DocumentInfo(BaseModel): name: str file_type: str chunk_count: int class QueryResponse(BaseModel): answer: str sources: List[str] success: bool error: str = "" memory_count: int = 0 class UploadResponse(BaseModel): success: bool filename: str chunks: int file_type: str message: str file_count: int # how many files are now loaded total_chunks: int # total chunks across all files class DocumentsResponse(BaseModel): file_count: int max_files: int total_chunks: int documents: List[DocumentInfo] class MemoryResponse(BaseModel): exchanges: int messages: List[dict] # ── Routes ─────────────────────────────────────────────────────────────────── @app.get("/health") def health(): return {"status": "ok", "service": "DocMind AI API", "version": "3.0"} @app.get("/documents", response_model=DocumentsResponse) def list_documents(): """List all currently loaded documents.""" rag = get_rag() docs = rag.get_documents() return DocumentsResponse( file_count=len(docs), max_files=5, total_chunks=rag.get_total_chunks(), documents=[ DocumentInfo( name=d["name"], file_type=d["type"], chunk_count=d["chunk_count"], ) for d in docs ], ) @app.delete("/documents/{filename}") def remove_document(filename: str): """Remove a specific document by filename.""" rag = get_rag() removed = rag.remove_file(filename) if not removed: raise HTTPException(status_code=404, detail=f"Document '{filename}' not found.") return { "success": True, "message": f"Removed '{filename}'.", "file_count": rag.get_file_count(), "total_chunks": rag.get_total_chunks(), } @app.post("/upload", response_model=UploadResponse) async def upload_document(file: UploadFile = File(...)): """ Upload and ingest a document (additive — up to 5 files). Supported: PDF, TXT, DOCX, CSV, XLSX, JPG, PNG, WEBP """ filename = file.filename suffix = os.path.splitext(filename)[-1].lower() SUPPORTED = {".pdf", ".txt", ".docx", ".doc", ".csv", ".xlsx", ".xls", ".jpg", ".jpeg", ".png", ".webp"} if suffix not in SUPPORTED: raise HTTPException( status_code=400, detail=f"Unsupported file type: {suffix}. Supported: {', '.join(sorted(SUPPORTED))}" ) try: rag = get_rag() content = await file.read() buf = io.BytesIO(content) buf.name = filename buf.filename = filename chunks = rag.ingest_file(buf) type_labels = { ".pdf": "PDF Document", ".txt": "Text File", ".docx": "Word Document", ".doc": "Word Document", ".csv": "CSV Spreadsheet", ".xlsx": "Excel Spreadsheet", ".xls": "Excel Spreadsheet", ".jpg": "Image", ".jpeg": "Image", ".png": "Image", ".webp": "Image", } return UploadResponse( success=True, filename=filename, chunks=chunks, file_type=type_labels.get(suffix, suffix), message=f"Successfully indexed {chunks} chunks from {filename}", file_count=rag.get_file_count(), total_chunks=rag.get_total_chunks(), ) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.post("/query", response_model=QueryResponse) async def query_document(req: QueryRequest): """Ask a question across all loaded documents. Uses conversation memory.""" if not req.question.strip(): raise HTTPException(status_code=400, detail="Question cannot be empty.") try: rag = get_rag() answer, sources = rag.query(req.question) return QueryResponse( answer=answer, sources=sources, success=True, memory_count=rag.get_memory_count() ) except Exception as e: return QueryResponse(answer="", sources=[], success=False, error=str(e)) @app.get("/memory", response_model=MemoryResponse) def get_memory(): """Return current conversation history.""" rag = get_rag() return MemoryResponse( exchanges=rag.get_memory_count(), messages=rag.get_memory_messages() ) @app.post("/memory/clear") def clear_memory(): """Clear conversation history without removing documents.""" rag = get_rag() rag.clear_memory() return {"success": True, "message": "Conversation memory cleared."} @app.post("/reset") def reset(): """Reset everything — all documents and memory.""" rag = get_rag() rag.reset() return {"success": True, "message": "All documents and memory cleared."}