Spaces:
Running
Running
| # src/api/main.py | |
| import sys | |
| import os | |
| import io | |
| import time | |
| import uuid | |
| import atexit | |
| import shutil | |
| import asyncio | |
| from pathlib import Path | |
| from typing import Optional | |
| from fastapi import FastAPI, HTTPException, UploadFile, File, Request | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from fastapi.responses import JSONResponse | |
| from pydantic import BaseModel, field_validator | |
| sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))) | |
| from src.rag.query_engine import QueryEngine | |
| from src.summarization.inference import summarize | |
| # ββ Constants ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| MAX_UPLOAD_MB = 10 | |
| MAX_UPLOAD_BYTES = MAX_UPLOAD_MB * 1024 * 1024 | |
| UPLOAD_DIR = Path("data/uploads") | |
| UPLOAD_DIR.mkdir(parents=True, exist_ok=True) | |
| SUMMARIZE_TIMEOUT_S = 180 # 3 min max for summarization on CPU | |
| # ββ App ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| app = FastAPI( | |
| title="NyayLens API", | |
| description="Production API for Legal Chat, Document QA, and Summarization", | |
| version="1.0.0", | |
| ) | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=[ | |
| "https://nyay-lens.vercel.app", # Production Vercel URL | |
| "http://localhost:5173", # Local Vite dev server | |
| "http://127.0.0.1:5173" | |
| ], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # ββ Startup / Shutdown βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async def cleanup_loop(): | |
| """Background task to remove leftover files older than 2 hours.""" | |
| while True: | |
| now = time.time() | |
| for f in UPLOAD_DIR.glob("*"): | |
| if f.is_file() and (now - f.stat().st_mtime) > 7200: | |
| try: | |
| f.unlink() | |
| except Exception as e: | |
| print(f"Cleanup error: {e}") | |
| await asyncio.sleep(3600) # Check every hour | |
| async def startup(): | |
| global query_engine | |
| print("Initializing NyayLens Backend...") | |
| query_engine = QueryEngine() | |
| # Start the infinite cleanup loop | |
| asyncio.create_task(cleanup_loop()) | |
| print("β Backend ready. Background cleanup active.") | |
| def shutdown(): | |
| """Clean up all uploaded files on server shutdown.""" | |
| if UPLOAD_DIR.exists(): | |
| shutil.rmtree(UPLOAD_DIR) | |
| UPLOAD_DIR.mkdir(parents=True, exist_ok=True) | |
| print("β Uploads directory cleaned on shutdown.") | |
| # ββ Schema βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class UnifiedRequest(BaseModel): | |
| message: str | |
| filepath: Optional[str] = None | |
| top_k: int = 5 | |
| chat_history: Optional[list] = [] | |
| def message_not_empty(cls, v): | |
| if not v or not v.strip(): | |
| raise ValueError("Message cannot be empty") | |
| if len(v) > 4000: | |
| raise ValueError("Message too long (max 4000 characters)") | |
| return v.strip() | |
| # ββ Health βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def health(): | |
| return { | |
| "status": "online", | |
| "service": "NyayLens API", | |
| "version": "1.0.0", | |
| "models": ["Legal-BERT", "Legal-PEGASUS", "Llama-3.1-8B (Groq)"], | |
| "index": "FAISS 298K vectors", | |
| } | |
| # ββ Upload βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async def upload_document(file: UploadFile = File(...)): | |
| """ | |
| Accepts .pdf and .txt files up to 10 MB. | |
| PDFs are extracted to plain text via pdfplumber. | |
| Returns a server filepath for subsequent /api/chat calls. | |
| """ | |
| import pdfplumber | |
| # 1. Validate extension | |
| filename = file.filename or "upload" | |
| ext = Path(filename).suffix.lower() | |
| if ext not in {".pdf", ".txt"}: | |
| raise HTTPException(status_code=400, detail="Only .pdf and .txt files are supported.") | |
| # 2. Read with size guard | |
| raw_bytes = await file.read() | |
| if len(raw_bytes) > MAX_UPLOAD_BYTES: | |
| raise HTTPException( | |
| status_code=413, | |
| detail=f"File too large. Maximum allowed size is {MAX_UPLOAD_MB} MB." | |
| ) | |
| if len(raw_bytes) == 0: | |
| raise HTTPException(status_code=400, detail="Uploaded file is empty.") | |
| # 3. Unique name to avoid collisions | |
| uid = uuid.uuid4().hex[:8] | |
| safe_name = f"{uid}_{Path(filename).stem}" | |
| # 4. Extract / save | |
| if ext == ".pdf": | |
| text_parts = [] | |
| try: | |
| with pdfplumber.open(io.BytesIO(raw_bytes)) as pdf: | |
| for page in pdf.pages: | |
| t = page.extract_text() | |
| if t: | |
| text_parts.append(t.strip()) | |
| except Exception as e: | |
| raise HTTPException(status_code=400, detail=f"PDF extraction failed: {e}") | |
| if not text_parts: | |
| raise HTTPException( | |
| status_code=422, | |
| detail="PDF contains no readable text. It may be a scanned image β please use a searchable PDF." | |
| ) | |
| out_path = UPLOAD_DIR / f"{safe_name}.txt" | |
| out_path.write_text("\n\n".join(text_parts), encoding="utf-8") | |
| return {"filepath": str(out_path), "filename": filename, "pages": len(text_parts), "size_kb": round(len(raw_bytes)/1024, 1)} | |
| else: | |
| out_path = UPLOAD_DIR / f"{safe_name}.txt" | |
| out_path.write_bytes(raw_bytes) | |
| return {"filepath": str(out_path), "filename": filename, "size_kb": round(len(raw_bytes)/1024, 1)} | |
| # ββ Chat βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def chat(request: UnifiedRequest): | |
| """ | |
| Unified intent-aware chat endpoint. | |
| Routes to: Summarization | Document QA | Global RAG | |
| """ | |
| message_lower = request.message.lower() | |
| print(f"\n[BACKEND] '{request.message[:80]}' | file={os.path.basename(request.filepath) if request.filepath else 'None'}") | |
| # Validate filepath if provided | |
| if request.filepath: | |
| if not os.path.exists(request.filepath): | |
| return JSONResponse( | |
| status_code=404, | |
| content={"answer": "The uploaded document could not be found on the server. Please re-upload the file.", "sources": []} | |
| ) | |
| try: | |
| # ββ Route 1: Summarization (with timeout) ββββββββββββββββββββββββββ | |
| if "summarize" in message_lower or "summary" in message_lower: | |
| if not request.filepath: | |
| return { | |
| "answer": "Please **upload a PDF or text file** first using the π button, then ask me to summarize it.", | |
| "sources": [] | |
| } | |
| print("[BACKEND] β Summarization pipeline") | |
| summary_dict = summarize(request.filepath) | |
| return { | |
| "answer": "__STRUCTURED_SUMMARY__", | |
| "summary": summary_dict, | |
| "sources": [{"judgment_id": os.path.basename(request.filepath), "score": 1.0}] | |
| } | |
| # ββ Route 2: Document QA ββββββββββββββββββββββββββββββββββββββββββββ | |
| if request.filepath: | |
| print("[BACKEND] β Document QA") | |
| return query_engine.query_with_document(request.message, request.filepath, chat_history=request.chat_history) | |
| # ββ Route 3: Global RAG βββββββββββββββββββββββββββββββββββββββββββββ | |
| print("[BACKEND] β Global RAG") | |
| return query_engine.query(request.message, top_k=request.top_k, chat_history=request.chat_history) | |
| except Exception as e: | |
| print(f"[BACKEND ERROR] {e}") | |
| raise HTTPException(status_code=500, detail=f"An internal error occurred: {str(e)}") | |
| # ββ Cleanup old uploads (files older than 2 hours) βββββββββββββββββββββββββ | |
| def delete_upload(filename: str): | |
| """Explicit delete for a specific upload.""" | |
| target = UPLOAD_DIR / filename | |
| if target.exists() and target.is_file(): | |
| target.unlink() | |
| return {"status": "deleted"} | |
| raise HTTPException(status_code=404, detail="File not found.") | |