Spaces:
Running
Running
| """ | |
| api.py — FastAPI REST endpoint for DocMind AI (Multi-File + Memory) | |
| Runs on port 7861 alongside the Streamlit app (port 7860). | |
| v4 changes: | |
| - GET /documents → list all loaded documents | |
| - DELETE /documents/{filename} → remove a specific file | |
| - Upload now supports up to 5 files (additive) | |
| - Reset clears all files | |
| """ | |
| from fastapi import FastAPI, UploadFile, File, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| from typing import List | |
| import os | |
| import io | |
| app = FastAPI(title="DocMind AI API", version="3.0") | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Shared RAG engine instance | |
| _rag_engine = None | |
| def get_rag(): | |
| global _rag_engine | |
| if _rag_engine is None: | |
| from rag_engine import RAGEngine | |
| _rag_engine = RAGEngine() | |
| return _rag_engine | |
| # ── Models ─────────────────────────────────────────────────────────────────── | |
| class QueryRequest(BaseModel): | |
| question: str | |
| class DocumentInfo(BaseModel): | |
| name: str | |
| file_type: str | |
| chunk_count: int | |
| class QueryResponse(BaseModel): | |
| answer: str | |
| sources: List[str] | |
| success: bool | |
| error: str = "" | |
| memory_count: int = 0 | |
| class UploadResponse(BaseModel): | |
| success: bool | |
| filename: str | |
| chunks: int | |
| file_type: str | |
| message: str | |
| file_count: int # how many files are now loaded | |
| total_chunks: int # total chunks across all files | |
| class DocumentsResponse(BaseModel): | |
| file_count: int | |
| max_files: int | |
| total_chunks: int | |
| documents: List[DocumentInfo] | |
| class MemoryResponse(BaseModel): | |
| exchanges: int | |
| messages: List[dict] | |
| # ── Routes ─────────────────────────────────────────────────────────────────── | |
| def health(): | |
| return {"status": "ok", "service": "DocMind AI API", "version": "3.0"} | |
| def list_documents(): | |
| """List all currently loaded documents.""" | |
| rag = get_rag() | |
| docs = rag.get_documents() | |
| return DocumentsResponse( | |
| file_count=len(docs), | |
| max_files=5, | |
| total_chunks=rag.get_total_chunks(), | |
| documents=[ | |
| DocumentInfo( | |
| name=d["name"], | |
| file_type=d["type"], | |
| chunk_count=d["chunk_count"], | |
| ) | |
| for d in docs | |
| ], | |
| ) | |
| def remove_document(filename: str): | |
| """Remove a specific document by filename.""" | |
| rag = get_rag() | |
| removed = rag.remove_file(filename) | |
| if not removed: | |
| raise HTTPException(status_code=404, detail=f"Document '{filename}' not found.") | |
| return { | |
| "success": True, | |
| "message": f"Removed '{filename}'.", | |
| "file_count": rag.get_file_count(), | |
| "total_chunks": rag.get_total_chunks(), | |
| } | |
| async def upload_document(file: UploadFile = File(...)): | |
| """ | |
| Upload and ingest a document (additive — up to 5 files). | |
| Supported: PDF, TXT, DOCX, CSV, XLSX, JPG, PNG, WEBP | |
| """ | |
| filename = file.filename | |
| suffix = os.path.splitext(filename)[-1].lower() | |
| SUPPORTED = {".pdf", ".txt", ".docx", ".doc", ".csv", ".xlsx", ".xls", | |
| ".jpg", ".jpeg", ".png", ".webp"} | |
| if suffix not in SUPPORTED: | |
| raise HTTPException( | |
| status_code=400, | |
| detail=f"Unsupported file type: {suffix}. Supported: {', '.join(sorted(SUPPORTED))}" | |
| ) | |
| try: | |
| rag = get_rag() | |
| content = await file.read() | |
| buf = io.BytesIO(content) | |
| buf.name = filename | |
| buf.filename = filename | |
| chunks = rag.ingest_file(buf) | |
| type_labels = { | |
| ".pdf": "PDF Document", | |
| ".txt": "Text File", | |
| ".docx": "Word Document", ".doc": "Word Document", | |
| ".csv": "CSV Spreadsheet", | |
| ".xlsx": "Excel Spreadsheet", ".xls": "Excel Spreadsheet", | |
| ".jpg": "Image", ".jpeg": "Image", ".png": "Image", ".webp": "Image", | |
| } | |
| return UploadResponse( | |
| success=True, | |
| filename=filename, | |
| chunks=chunks, | |
| file_type=type_labels.get(suffix, suffix), | |
| message=f"Successfully indexed {chunks} chunks from {filename}", | |
| file_count=rag.get_file_count(), | |
| total_chunks=rag.get_total_chunks(), | |
| ) | |
| except ValueError as e: | |
| raise HTTPException(status_code=400, detail=str(e)) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| async def query_document(req: QueryRequest): | |
| """Ask a question across all loaded documents. Uses conversation memory.""" | |
| if not req.question.strip(): | |
| raise HTTPException(status_code=400, detail="Question cannot be empty.") | |
| try: | |
| rag = get_rag() | |
| answer, sources = rag.query(req.question) | |
| return QueryResponse( | |
| answer=answer, | |
| sources=sources, | |
| success=True, | |
| memory_count=rag.get_memory_count() | |
| ) | |
| except Exception as e: | |
| return QueryResponse(answer="", sources=[], success=False, error=str(e)) | |
| def get_memory(): | |
| """Return current conversation history.""" | |
| rag = get_rag() | |
| return MemoryResponse( | |
| exchanges=rag.get_memory_count(), | |
| messages=rag.get_memory_messages() | |
| ) | |
| def clear_memory(): | |
| """Clear conversation history without removing documents.""" | |
| rag = get_rag() | |
| rag.clear_memory() | |
| return {"success": True, "message": "Conversation memory cleared."} | |
| def reset(): | |
| """Reset everything — all documents and memory.""" | |
| rag = get_rag() | |
| rag.reset() | |
| return {"success": True, "message": "All documents and memory cleared."} | |