Spaces:
Running
Running
File size: 9,339 Bytes
968e24d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 | # src/api/main.py
import sys
import os
import io
import time
import uuid
import atexit
import shutil
import asyncio
from pathlib import Path
from typing import Optional
from fastapi import FastAPI, HTTPException, UploadFile, File, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pydantic import BaseModel, field_validator
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
from src.rag.query_engine import QueryEngine
from src.summarization.inference import summarize
# ββ Constants ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
MAX_UPLOAD_MB = 10
MAX_UPLOAD_BYTES = MAX_UPLOAD_MB * 1024 * 1024
UPLOAD_DIR = Path("data/uploads")
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
SUMMARIZE_TIMEOUT_S = 180 # 3 min max for summarization on CPU
# ββ App ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
app = FastAPI(
title="NyayLens API",
description="Production API for Legal Chat, Document QA, and Summarization",
version="1.0.0",
)
app.add_middleware(
CORSMiddleware,
allow_origins=[
"https://nyay-lens.vercel.app", # Production Vercel URL
"http://localhost:5173", # Local Vite dev server
"http://127.0.0.1:5173"
],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# ββ Startup / Shutdown βββββββββββββββββββββββββββββββββββββββββββββββββββββ
async def cleanup_loop():
"""Background task to remove leftover files older than 2 hours."""
while True:
now = time.time()
for f in UPLOAD_DIR.glob("*"):
if f.is_file() and (now - f.stat().st_mtime) > 7200:
try:
f.unlink()
except Exception as e:
print(f"Cleanup error: {e}")
await asyncio.sleep(3600) # Check every hour
@app.on_event("startup")
async def startup():
global query_engine
print("Initializing NyayLens Backend...")
query_engine = QueryEngine()
# Start the infinite cleanup loop
asyncio.create_task(cleanup_loop())
print("β Backend ready. Background cleanup active.")
@app.on_event("shutdown")
def shutdown():
"""Clean up all uploaded files on server shutdown."""
if UPLOAD_DIR.exists():
shutil.rmtree(UPLOAD_DIR)
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
print("β Uploads directory cleaned on shutdown.")
# ββ Schema βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class UnifiedRequest(BaseModel):
message: str
filepath: Optional[str] = None
top_k: int = 5
chat_history: Optional[list] = []
@field_validator("message")
@classmethod
def message_not_empty(cls, v):
if not v or not v.strip():
raise ValueError("Message cannot be empty")
if len(v) > 4000:
raise ValueError("Message too long (max 4000 characters)")
return v.strip()
# ββ Health βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@app.get("/")
@app.get("/api/health")
def health():
return {
"status": "online",
"service": "NyayLens API",
"version": "1.0.0",
"models": ["Legal-BERT", "Legal-PEGASUS", "Llama-3.1-8B (Groq)"],
"index": "FAISS 298K vectors",
}
# ββ Upload βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@app.post("/api/upload")
async def upload_document(file: UploadFile = File(...)):
"""
Accepts .pdf and .txt files up to 10 MB.
PDFs are extracted to plain text via pdfplumber.
Returns a server filepath for subsequent /api/chat calls.
"""
import pdfplumber
# 1. Validate extension
filename = file.filename or "upload"
ext = Path(filename).suffix.lower()
if ext not in {".pdf", ".txt"}:
raise HTTPException(status_code=400, detail="Only .pdf and .txt files are supported.")
# 2. Read with size guard
raw_bytes = await file.read()
if len(raw_bytes) > MAX_UPLOAD_BYTES:
raise HTTPException(
status_code=413,
detail=f"File too large. Maximum allowed size is {MAX_UPLOAD_MB} MB."
)
if len(raw_bytes) == 0:
raise HTTPException(status_code=400, detail="Uploaded file is empty.")
# 3. Unique name to avoid collisions
uid = uuid.uuid4().hex[:8]
safe_name = f"{uid}_{Path(filename).stem}"
# 4. Extract / save
if ext == ".pdf":
text_parts = []
try:
with pdfplumber.open(io.BytesIO(raw_bytes)) as pdf:
for page in pdf.pages:
t = page.extract_text()
if t:
text_parts.append(t.strip())
except Exception as e:
raise HTTPException(status_code=400, detail=f"PDF extraction failed: {e}")
if not text_parts:
raise HTTPException(
status_code=422,
detail="PDF contains no readable text. It may be a scanned image β please use a searchable PDF."
)
out_path = UPLOAD_DIR / f"{safe_name}.txt"
out_path.write_text("\n\n".join(text_parts), encoding="utf-8")
return {"filepath": str(out_path), "filename": filename, "pages": len(text_parts), "size_kb": round(len(raw_bytes)/1024, 1)}
else:
out_path = UPLOAD_DIR / f"{safe_name}.txt"
out_path.write_bytes(raw_bytes)
return {"filepath": str(out_path), "filename": filename, "size_kb": round(len(raw_bytes)/1024, 1)}
# ββ Chat βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@app.post("/api/chat")
def chat(request: UnifiedRequest):
"""
Unified intent-aware chat endpoint.
Routes to: Summarization | Document QA | Global RAG
"""
message_lower = request.message.lower()
print(f"\n[BACKEND] '{request.message[:80]}' | file={os.path.basename(request.filepath) if request.filepath else 'None'}")
# Validate filepath if provided
if request.filepath:
if not os.path.exists(request.filepath):
return JSONResponse(
status_code=404,
content={"answer": "The uploaded document could not be found on the server. Please re-upload the file.", "sources": []}
)
try:
# ββ Route 1: Summarization (with timeout) ββββββββββββββββββββββββββ
if "summarize" in message_lower or "summary" in message_lower:
if not request.filepath:
return {
"answer": "Please **upload a PDF or text file** first using the π button, then ask me to summarize it.",
"sources": []
}
print("[BACKEND] β Summarization pipeline")
summary_dict = summarize(request.filepath)
return {
"answer": "__STRUCTURED_SUMMARY__",
"summary": summary_dict,
"sources": [{"judgment_id": os.path.basename(request.filepath), "score": 1.0}]
}
# ββ Route 2: Document QA ββββββββββββββββββββββββββββββββββββββββββββ
if request.filepath:
print("[BACKEND] β Document QA")
return query_engine.query_with_document(request.message, request.filepath, chat_history=request.chat_history)
# ββ Route 3: Global RAG βββββββββββββββββββββββββββββββββββββββββββββ
print("[BACKEND] β Global RAG")
return query_engine.query(request.message, top_k=request.top_k, chat_history=request.chat_history)
except Exception as e:
print(f"[BACKEND ERROR] {e}")
raise HTTPException(status_code=500, detail=f"An internal error occurred: {str(e)}")
# ββ Cleanup old uploads (files older than 2 hours) βββββββββββββββββββββββββ
@app.delete("/api/upload/{filename}")
def delete_upload(filename: str):
"""Explicit delete for a specific upload."""
target = UPLOAD_DIR / filename
if target.exists() and target.is_file():
target.unlink()
return {"status": "deleted"}
raise HTTPException(status_code=404, detail="File not found.")
|