NyayLens-API / src /api /main.py
Sai Pranav Reddy
Clean lightweight deployment
968e24d
# src/api/main.py
import sys
import os
import io
import time
import uuid
import atexit
import shutil
import asyncio
from pathlib import Path
from typing import Optional
from fastapi import FastAPI, HTTPException, UploadFile, File, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pydantic import BaseModel, field_validator
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
from src.rag.query_engine import QueryEngine
from src.summarization.inference import summarize
# ── Constants ──────────────────────────────────────────────────────────────
MAX_UPLOAD_MB = 10
MAX_UPLOAD_BYTES = MAX_UPLOAD_MB * 1024 * 1024
UPLOAD_DIR = Path("data/uploads")
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
SUMMARIZE_TIMEOUT_S = 180 # 3 min max for summarization on CPU
# ── App ────────────────────────────────────────────────────────────────────
app = FastAPI(
title="NyayLens API",
description="Production API for Legal Chat, Document QA, and Summarization",
version="1.0.0",
)
app.add_middleware(
CORSMiddleware,
allow_origins=[
"https://nyay-lens.vercel.app", # Production Vercel URL
"http://localhost:5173", # Local Vite dev server
"http://127.0.0.1:5173"
],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# ── Startup / Shutdown ─────────────────────────────────────────────────────
async def cleanup_loop():
"""Background task to remove leftover files older than 2 hours."""
while True:
now = time.time()
for f in UPLOAD_DIR.glob("*"):
if f.is_file() and (now - f.stat().st_mtime) > 7200:
try:
f.unlink()
except Exception as e:
print(f"Cleanup error: {e}")
await asyncio.sleep(3600) # Check every hour
@app.on_event("startup")
async def startup():
global query_engine
print("Initializing NyayLens Backend...")
query_engine = QueryEngine()
# Start the infinite cleanup loop
asyncio.create_task(cleanup_loop())
print("βœ“ Backend ready. Background cleanup active.")
@app.on_event("shutdown")
def shutdown():
"""Clean up all uploaded files on server shutdown."""
if UPLOAD_DIR.exists():
shutil.rmtree(UPLOAD_DIR)
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
print("βœ“ Uploads directory cleaned on shutdown.")
# ── Schema ─────────────────────────────────────────────────────────────────
class UnifiedRequest(BaseModel):
message: str
filepath: Optional[str] = None
top_k: int = 5
chat_history: Optional[list] = []
@field_validator("message")
@classmethod
def message_not_empty(cls, v):
if not v or not v.strip():
raise ValueError("Message cannot be empty")
if len(v) > 4000:
raise ValueError("Message too long (max 4000 characters)")
return v.strip()
# ── Health ─────────────────────────────────────────────────────────────────
@app.get("/")
@app.get("/api/health")
def health():
return {
"status": "online",
"service": "NyayLens API",
"version": "1.0.0",
"models": ["Legal-BERT", "Legal-PEGASUS", "Llama-3.1-8B (Groq)"],
"index": "FAISS 298K vectors",
}
# ── Upload ─────────────────────────────────────────────────────────────────
@app.post("/api/upload")
async def upload_document(file: UploadFile = File(...)):
"""
Accepts .pdf and .txt files up to 10 MB.
PDFs are extracted to plain text via pdfplumber.
Returns a server filepath for subsequent /api/chat calls.
"""
import pdfplumber
# 1. Validate extension
filename = file.filename or "upload"
ext = Path(filename).suffix.lower()
if ext not in {".pdf", ".txt"}:
raise HTTPException(status_code=400, detail="Only .pdf and .txt files are supported.")
# 2. Read with size guard
raw_bytes = await file.read()
if len(raw_bytes) > MAX_UPLOAD_BYTES:
raise HTTPException(
status_code=413,
detail=f"File too large. Maximum allowed size is {MAX_UPLOAD_MB} MB."
)
if len(raw_bytes) == 0:
raise HTTPException(status_code=400, detail="Uploaded file is empty.")
# 3. Unique name to avoid collisions
uid = uuid.uuid4().hex[:8]
safe_name = f"{uid}_{Path(filename).stem}"
# 4. Extract / save
if ext == ".pdf":
text_parts = []
try:
with pdfplumber.open(io.BytesIO(raw_bytes)) as pdf:
for page in pdf.pages:
t = page.extract_text()
if t:
text_parts.append(t.strip())
except Exception as e:
raise HTTPException(status_code=400, detail=f"PDF extraction failed: {e}")
if not text_parts:
raise HTTPException(
status_code=422,
detail="PDF contains no readable text. It may be a scanned image β€” please use a searchable PDF."
)
out_path = UPLOAD_DIR / f"{safe_name}.txt"
out_path.write_text("\n\n".join(text_parts), encoding="utf-8")
return {"filepath": str(out_path), "filename": filename, "pages": len(text_parts), "size_kb": round(len(raw_bytes)/1024, 1)}
else:
out_path = UPLOAD_DIR / f"{safe_name}.txt"
out_path.write_bytes(raw_bytes)
return {"filepath": str(out_path), "filename": filename, "size_kb": round(len(raw_bytes)/1024, 1)}
# ── Chat ───────────────────────────────────────────────────────────────────
@app.post("/api/chat")
def chat(request: UnifiedRequest):
"""
Unified intent-aware chat endpoint.
Routes to: Summarization | Document QA | Global RAG
"""
message_lower = request.message.lower()
print(f"\n[BACKEND] '{request.message[:80]}' | file={os.path.basename(request.filepath) if request.filepath else 'None'}")
# Validate filepath if provided
if request.filepath:
if not os.path.exists(request.filepath):
return JSONResponse(
status_code=404,
content={"answer": "The uploaded document could not be found on the server. Please re-upload the file.", "sources": []}
)
try:
# ── Route 1: Summarization (with timeout) ──────────────────────────
if "summarize" in message_lower or "summary" in message_lower:
if not request.filepath:
return {
"answer": "Please **upload a PDF or text file** first using the πŸ“Ž button, then ask me to summarize it.",
"sources": []
}
print("[BACKEND] β†’ Summarization pipeline")
summary_dict = summarize(request.filepath)
return {
"answer": "__STRUCTURED_SUMMARY__",
"summary": summary_dict,
"sources": [{"judgment_id": os.path.basename(request.filepath), "score": 1.0}]
}
# ── Route 2: Document QA ────────────────────────────────────────────
if request.filepath:
print("[BACKEND] β†’ Document QA")
return query_engine.query_with_document(request.message, request.filepath, chat_history=request.chat_history)
# ── Route 3: Global RAG ─────────────────────────────────────────────
print("[BACKEND] β†’ Global RAG")
return query_engine.query(request.message, top_k=request.top_k, chat_history=request.chat_history)
except Exception as e:
print(f"[BACKEND ERROR] {e}")
raise HTTPException(status_code=500, detail=f"An internal error occurred: {str(e)}")
# ── Cleanup old uploads (files older than 2 hours) ─────────────────────────
@app.delete("/api/upload/{filename}")
def delete_upload(filename: str):
"""Explicit delete for a specific upload."""
target = UPLOAD_DIR / filename
if target.exists() and target.is_file():
target.unlink()
return {"status": "deleted"}
raise HTTPException(status_code=404, detail="File not found.")