Spaces:

SaiPranav09
/

NyayLens-API

Running

Sai Pranav Reddy

Clean lightweight deployment

968e24d 3 days ago

9.34 kB

	# src/api/main.py
	import sys
	import os
	import io
	import time
	import uuid
	import atexit
	import shutil
	import asyncio
	from pathlib import Path
	from typing import Optional

	from fastapi import FastAPI, HTTPException, UploadFile, File, Request
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import JSONResponse
	from pydantic import BaseModel, field_validator

	sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))

	from src.rag.query_engine import QueryEngine
	from src.summarization.inference import summarize

	# ── Constants ──────────────────────────────────────────────────────────────
	MAX_UPLOAD_MB = 10
	MAX_UPLOAD_BYTES = MAX_UPLOAD_MB * 1024 * 1024
	UPLOAD_DIR = Path("data/uploads")
	UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
	SUMMARIZE_TIMEOUT_S = 180 # 3 min max for summarization on CPU

	# ── App ────────────────────────────────────────────────────────────────────
	app = FastAPI(
	title="NyayLens API",
	description="Production API for Legal Chat, Document QA, and Summarization",
	version="1.0.0",
	)

	app.add_middleware(
	CORSMiddleware,
	allow_origins=[
	"https://nyay-lens.vercel.app", # Production Vercel URL
	"http://localhost:5173", # Local Vite dev server
	"http://127.0.0.1:5173"
	],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# ── Startup / Shutdown ─────────────────────────────────────────────────────
	async def cleanup_loop():
	"""Background task to remove leftover files older than 2 hours."""
	while True:
	now = time.time()
	for f in UPLOAD_DIR.glob("*"):
	if f.is_file() and (now - f.stat().st_mtime) > 7200:
	try:
	f.unlink()
	except Exception as e:
	print(f"Cleanup error: {e}")
	await asyncio.sleep(3600) # Check every hour

	@app.on_event("startup")
	async def startup():
	global query_engine
	print("Initializing NyayLens Backend...")
	query_engine = QueryEngine()

	# Start the infinite cleanup loop
	asyncio.create_task(cleanup_loop())
	print("✓ Backend ready. Background cleanup active.")

	@app.on_event("shutdown")
	def shutdown():
	"""Clean up all uploaded files on server shutdown."""
	if UPLOAD_DIR.exists():
	shutil.rmtree(UPLOAD_DIR)
	UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
	print("✓ Uploads directory cleaned on shutdown.")

	# ── Schema ─────────────────────────────────────────────────────────────────
	class UnifiedRequest(BaseModel):
	message: str
	filepath: Optional[str] = None
	top_k: int = 5
	chat_history: Optional[list] = []

	@field_validator("message")
	@classmethod
	def message_not_empty(cls, v):
	if not v or not v.strip():
	raise ValueError("Message cannot be empty")
	if len(v) > 4000:
	raise ValueError("Message too long (max 4000 characters)")
	return v.strip()

	# ── Health ─────────────────────────────────────────────────────────────────
	@app.get("/")
	@app.get("/api/health")
	def health():
	return {
	"status": "online",
	"service": "NyayLens API",
	"version": "1.0.0",
	"models": ["Legal-BERT", "Legal-PEGASUS", "Llama-3.1-8B (Groq)"],
	"index": "FAISS 298K vectors",
	}

	# ── Upload ─────────────────────────────────────────────────────────────────
	@app.post("/api/upload")
	async def upload_document(file: UploadFile = File(...)):
	"""
	Accepts .pdf and .txt files up to 10 MB.
	PDFs are extracted to plain text via pdfplumber.
	Returns a server filepath for subsequent /api/chat calls.
	"""
	import pdfplumber

	# 1. Validate extension
	filename = file.filename or "upload"
	ext = Path(filename).suffix.lower()
	if ext not in {".pdf", ".txt"}:
	raise HTTPException(status_code=400, detail="Only .pdf and .txt files are supported.")

	# 2. Read with size guard
	raw_bytes = await file.read()
	if len(raw_bytes) > MAX_UPLOAD_BYTES:
	raise HTTPException(
	status_code=413,
	detail=f"File too large. Maximum allowed size is {MAX_UPLOAD_MB} MB."
	)
	if len(raw_bytes) == 0:
	raise HTTPException(status_code=400, detail="Uploaded file is empty.")

	# 3. Unique name to avoid collisions
	uid = uuid.uuid4().hex[:8]
	safe_name = f"{uid}_{Path(filename).stem}"

	# 4. Extract / save
	if ext == ".pdf":
	text_parts = []
	try:
	with pdfplumber.open(io.BytesIO(raw_bytes)) as pdf:
	for page in pdf.pages:
	t = page.extract_text()
	if t:
	text_parts.append(t.strip())
	except Exception as e:
	raise HTTPException(status_code=400, detail=f"PDF extraction failed: {e}")

	if not text_parts:
	raise HTTPException(
	status_code=422,
	detail="PDF contains no readable text. It may be a scanned image — please use a searchable PDF."
	)

	out_path = UPLOAD_DIR / f"{safe_name}.txt"
	out_path.write_text("\n\n".join(text_parts), encoding="utf-8")
	return {"filepath": str(out_path), "filename": filename, "pages": len(text_parts), "size_kb": round(len(raw_bytes)/1024, 1)}

	else:
	out_path = UPLOAD_DIR / f"{safe_name}.txt"
	out_path.write_bytes(raw_bytes)
	return {"filepath": str(out_path), "filename": filename, "size_kb": round(len(raw_bytes)/1024, 1)}


	# ── Chat ───────────────────────────────────────────────────────────────────
	@app.post("/api/chat")
	def chat(request: UnifiedRequest):
	"""
	Unified intent-aware chat endpoint.
	Routes to: Summarization \| Document QA \| Global RAG
	"""
	message_lower = request.message.lower()

	print(f"\n[BACKEND] '{request.message[:80]}' \| file={os.path.basename(request.filepath) if request.filepath else 'None'}")

	# Validate filepath if provided
	if request.filepath:
	if not os.path.exists(request.filepath):
	return JSONResponse(
	status_code=404,
	content={"answer": "The uploaded document could not be found on the server. Please re-upload the file.", "sources": []}
	)

	try:
	# ── Route 1: Summarization (with timeout) ──────────────────────────
	if "summarize" in message_lower or "summary" in message_lower:
	if not request.filepath:
	return {
	"answer": "Please upload a PDF or text file first using the 📎 button, then ask me to summarize it.",
	"sources": []
	}
	print("[BACKEND] → Summarization pipeline")
	summary_dict = summarize(request.filepath)
	return {
	"answer": "__STRUCTURED_SUMMARY__",
	"summary": summary_dict,
	"sources": [{"judgment_id": os.path.basename(request.filepath), "score": 1.0}]
	}

	# ── Route 2: Document QA ────────────────────────────────────────────
	if request.filepath:
	print("[BACKEND] → Document QA")
	return query_engine.query_with_document(request.message, request.filepath, chat_history=request.chat_history)

	# ── Route 3: Global RAG ─────────────────────────────────────────────
	print("[BACKEND] → Global RAG")
	return query_engine.query(request.message, top_k=request.top_k, chat_history=request.chat_history)

	except Exception as e:
	print(f"[BACKEND ERROR] {e}")
	raise HTTPException(status_code=500, detail=f"An internal error occurred: {str(e)}")


	# ── Cleanup old uploads (files older than 2 hours) ─────────────────────────
	@app.delete("/api/upload/{filename}")
	def delete_upload(filename: str):
	"""Explicit delete for a specific upload."""
	target = UPLOAD_DIR / filename
	if target.exists() and target.is_file():
	target.unlink()
	return {"status": "deleted"}
	raise HTTPException(status_code=404, detail="File not found.")