Mitakshara commited on
Commit
a01e1da
·
1 Parent(s): c961e00

feat(backend): initialize FastAPI app

Browse files
Files changed (1) hide show
  1. main.py +503 -0
main.py ADDED
@@ -0,0 +1,503 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LexMind — FastAPI Backend (Pinecone + HuggingFace Inference API)
3
+ Run with: uvicorn main:app --reload --port 8000
4
+ """
5
+
6
+ import os
7
+ import re
8
+ import json
9
+ from pathlib import Path
10
+ from typing import Optional
11
+
12
+ import httpx
13
+ import fitz # PyMuPDF
14
+ import torch
15
+ from fastapi import FastAPI, UploadFile, File, HTTPException
16
+ from fastapi.middleware.cors import CORSMiddleware
17
+ from fastapi.staticfiles import StaticFiles
18
+ from fastapi.responses import FileResponse
19
+ from pydantic import BaseModel
20
+ from sentence_transformers import SentenceTransformer
21
+ from pinecone import Pinecone
22
+ from dotenv import load_dotenv
23
+
24
+ load_dotenv()
25
+ # ── Configuration ─────────────────────────────────────────────────────────────
26
+ PINECONE_API_KEY = os.getenv("PINECONE_API_KEY", "")
27
+ HF_API_KEY = os.getenv("HF_API_KEY", "")
28
+
29
+ JUDGEMENTS_INDEX = "legal-judgements"
30
+ LEGAL_FRAMEWORK_INDEX = "legal-framework"
31
+
32
+ LOCAL_MODEL_DIR = "./models/bge-small"
33
+ EMBED_MODEL_NAME = "BAAI/bge-small-en-v1.5"
34
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
35
+
36
+ # Both stages use the same model — change here to use different ones
37
+ HF_ROUTER_MODEL = "meta-llama/Llama-3.1-8B-Instruct" # Stage 1: conversation + routing
38
+ HF_LEGAL_MODEL = "meta-llama/Llama-3.1-8B-Instruct" # Stage 2: legal RAG answer
39
+
40
+ HF_CHAT_URL = "https://router.huggingface.co/v1/chat/completions"
41
+ BGE_PREFIX = "Represent this sentence for searching relevant passages: "
42
+ TOP_K = 10
43
+ CONSTITUTION_TOP_K = 5
44
+ # ─────────────────────────────────────────────────────────────────────────────
45
+
46
+
47
+ # ── Load embedding model ──────────────────────────────────────────────────────
48
+ def load_embed_model() -> SentenceTransformer:
49
+ local = Path(LOCAL_MODEL_DIR)
50
+ if local.exists() and any(local.iterdir()):
51
+ print(f"✅ Loading bge-small from '{LOCAL_MODEL_DIR}'")
52
+ else:
53
+ print(f"📥 Downloading {EMBED_MODEL_NAME} (~130 MB)…")
54
+ local.mkdir(parents=True, exist_ok=True)
55
+ m = SentenceTransformer(EMBED_MODEL_NAME)
56
+ m.save(str(local))
57
+ print(f"✅ Model saved to '{LOCAL_MODEL_DIR}'")
58
+ model = SentenceTransformer(str(local))
59
+ model = model.to(DEVICE)
60
+ print(f" Embedding device: {DEVICE}")
61
+ return model
62
+
63
+
64
+ embed_model = load_embed_model()
65
+
66
+
67
+ # ── Connect to Pinecone ───────────────────────────────────────────────────────
68
+ print("🔌 Connecting to Pinecone…")
69
+ pc = Pinecone(api_key=PINECONE_API_KEY)
70
+
71
+ judgements_index = pc.Index(JUDGEMENTS_INDEX)
72
+ print(f"✅ Judgements index | vectors: {judgements_index.describe_index_stats().total_vector_count}")
73
+
74
+ try:
75
+ legal_index = pc.Index(LEGAL_FRAMEWORK_INDEX)
76
+ print(f"✅ Legal framework index | vectors: {legal_index.describe_index_stats().total_vector_count}")
77
+ except Exception:
78
+ legal_index = None
79
+ print("⚠️ Legal framework index not found — run build_pinecone_legal.py.")
80
+
81
+
82
+ # ── FastAPI app ───────────────────────────────────────────────────────────────
83
+ app = FastAPI(title="LexMind API", version="3.0.0")
84
+
85
+ app.add_middleware(
86
+ CORSMiddleware,
87
+ allow_origins=["*"],
88
+ allow_methods=["*"],
89
+ allow_headers=["*"],
90
+ )
91
+
92
+
93
+ # ── Pydantic models ───────────────────────────────────────────────────────────
94
+ class SearchRequest(BaseModel):
95
+ query: str
96
+ top_k: int = 10
97
+ offset: int = 0
98
+
99
+
100
+ class ChatRequest(BaseModel):
101
+ message: str
102
+ context: str = ""
103
+ system_prompt: str = ""
104
+ model_override: str = ""
105
+
106
+
107
+ class DroppedCitationModel(BaseModel):
108
+ file_name: str = ""
109
+ year: str = ""
110
+ content: str = ""
111
+ score: float = 0.0
112
+
113
+
114
+ class SmartChatRequest(BaseModel):
115
+ message: str
116
+ case_text: str = "" # user's case description
117
+ dropped_citation: Optional[DroppedCitationModel] = None # only if user dragged a doc
118
+
119
+
120
+ # ── HuggingFace helper ────────────────────────────────────────────────────────
121
+ async def call_hf(
122
+ model: str,
123
+ system: str,
124
+ user: str,
125
+ temperature: float = 0.4,
126
+ max_tokens: int = 1024,
127
+ timeout: int = 120,
128
+ ) -> str:
129
+ headers = {
130
+ "Authorization": f"Bearer {HF_API_KEY}",
131
+ "Content-Type": "application/json",
132
+ }
133
+ payload = {
134
+ "model": model,
135
+ "messages": [
136
+ {"role": "system", "content": system},
137
+ {"role": "user", "content": user},
138
+ ],
139
+ "max_tokens": max_tokens,
140
+ "temperature": temperature,
141
+ "top_p": 0.9,
142
+ "stream": False,
143
+ }
144
+
145
+ async with httpx.AsyncClient(timeout=timeout) as client:
146
+ r = await client.post(HF_CHAT_URL, headers=headers, json=payload)
147
+
148
+ if r.status_code != 200:
149
+ print(f"[HF ERROR] status={r.status_code} model={model} body={r.text[:400]}")
150
+
151
+ if r.status_code == 401:
152
+ raise HTTPException(status_code=401,
153
+ detail="Invalid HuggingFace API key.")
154
+ if r.status_code == 403:
155
+ raise HTTPException(status_code=403,
156
+ detail=f"Access denied for '{model}'. Accept the license at huggingface.co/{model}")
157
+ if r.status_code == 404:
158
+ raise HTTPException(status_code=404,
159
+ detail=f"Model '{model}' not found.")
160
+ if r.status_code == 429:
161
+ raise HTTPException(status_code=429,
162
+ detail="HuggingFace rate limit hit. Please wait and retry.")
163
+ if r.status_code == 503:
164
+ raise HTTPException(status_code=503,
165
+ detail=f"Model '{model}' is loading (~20s). Please retry.")
166
+
167
+ r.raise_for_status()
168
+
169
+ data = r.json()
170
+ choices = data.get("choices", [])
171
+ if choices:
172
+ content = choices[0].get("message", {}).get("content", "")
173
+ if content:
174
+ return content.strip()
175
+
176
+ if isinstance(data, list) and data:
177
+ return data[0].get("generated_text", "").strip()
178
+
179
+ raise HTTPException(status_code=500,
180
+ detail=f"Unexpected HF response: {str(data)[:200]}")
181
+
182
+
183
+ # ── Embed helper ──────────────────────────────────────────────────────────────
184
+ def embed_query(text: str) -> list[float]:
185
+ return embed_model.encode(
186
+ BGE_PREFIX + text,
187
+ normalize_embeddings=True,
188
+ device=DEVICE
189
+ ).tolist()
190
+
191
+
192
+ # ── Routes ────────────────────────────────────────────────────────────────────
193
+
194
+ @app.get("/api/health")
195
+ async def health():
196
+ hf_ok = False
197
+ try:
198
+ async with httpx.AsyncClient(timeout=5) as client:
199
+ r = await client.get(
200
+ "https://huggingface.co/api/whoami",
201
+ headers={"Authorization": f"Bearer {HF_API_KEY}"}
202
+ )
203
+ hf_ok = r.status_code == 200
204
+ except Exception:
205
+ pass
206
+
207
+ j_stats = judgements_index.describe_index_stats()
208
+ l_stats = legal_index.describe_index_stats() if legal_index else None
209
+
210
+ return {
211
+ "status": "ok",
212
+ "huggingface": "authenticated" if hf_ok else "check HF_API_KEY",
213
+ "router_model": HF_ROUTER_MODEL,
214
+ "legal_model": HF_LEGAL_MODEL,
215
+ "judgements_vectors": j_stats.total_vector_count,
216
+ "legal_vectors": l_stats.total_vector_count if l_stats else 0,
217
+ "embed_device": DEVICE,
218
+ }
219
+
220
+
221
+ @app.post("/api/search")
222
+ async def search(req: SearchRequest):
223
+ """Semantic search over judgements Pinecone index with pagination."""
224
+ if not req.query.strip():
225
+ raise HTTPException(status_code=400, detail="Query cannot be empty.")
226
+
227
+ fetch_k = min(req.offset + req.top_k, 100)
228
+
229
+ try:
230
+ result = judgements_index.query(
231
+ vector=embed_query(req.query),
232
+ top_k=fetch_k,
233
+ include_metadata=True,
234
+ )
235
+ except Exception as e:
236
+ raise HTTPException(status_code=500, detail=f"Search failed: {str(e)}")
237
+
238
+ output = []
239
+ for m in result.get("matches", []):
240
+ meta = m.get("metadata", {})
241
+ output.append({
242
+ "file_name": meta.get("file_name", "Unknown"),
243
+ "year": meta.get("year", "Unknown"),
244
+ "source": meta.get("source", ""),
245
+ "score": round(float(m.get("score", 0)), 4),
246
+ "content": meta.get("content", ""),
247
+ })
248
+
249
+ output.sort(key=lambda x: x["score"], reverse=True)
250
+ return {"results": output[req.offset: req.offset + req.top_k], "count": len(output)}
251
+
252
+
253
+ @app.post("/api/extract-pdf")
254
+ async def extract_pdf(file: UploadFile = File(...)):
255
+ """Extract full text from an uploaded PDF."""
256
+ if not file.filename.lower().endswith(".pdf"):
257
+ raise HTTPException(status_code=400, detail="Only PDF files are accepted.")
258
+ contents = await file.read()
259
+ try:
260
+ doc = fitz.open(stream=contents, filetype="pdf")
261
+ pages = [page.get_text() for page in doc]
262
+ doc.close()
263
+ text = "\n\n".join(pages).strip()
264
+ except Exception as e:
265
+ raise HTTPException(status_code=500, detail=f"PDF extraction failed: {str(e)}")
266
+ return {"text": text, "pages": len(pages), "filename": file.filename}
267
+
268
+
269
+ @app.post("/api/legal-context")
270
+ async def legal_context(req: SearchRequest):
271
+ """Retrieve legal framework chunks from Pinecone."""
272
+ if not legal_index:
273
+ return {"results": [], "count": 0}
274
+ if not req.query.strip():
275
+ raise HTTPException(status_code=400, detail="Query cannot be empty.")
276
+
277
+ try:
278
+ result = legal_index.query(
279
+ vector=embed_query(req.query),
280
+ top_k=min(req.top_k or CONSTITUTION_TOP_K, 10),
281
+ include_metadata=True,
282
+ )
283
+ except Exception as e:
284
+ raise HTTPException(status_code=500, detail=f"Legal context search failed: {str(e)}")
285
+
286
+ output = []
287
+ for m in result.get("matches", []):
288
+ meta = m.get("metadata", {})
289
+ output.append({
290
+ "source": meta.get("source", "Unknown"),
291
+ "type": meta.get("type", ""),
292
+ "section": meta.get("section", ""),
293
+ "score": round(float(m.get("score", 0)), 4),
294
+ "content": meta.get("content", ""),
295
+ })
296
+ output.sort(key=lambda x: x["score"], reverse=True)
297
+ return {"results": output, "count": len(output)}
298
+
299
+
300
+ @app.post("/api/chat")
301
+ async def chat_legacy(req: ChatRequest):
302
+ """Legacy endpoint — used by CitationCard summarize and AI compare features."""
303
+ system = (
304
+ "You are LexMind, a professional Indian legal research assistant. "
305
+ "Answer concisely and professionally based only on the provided context."
306
+ )
307
+ user = (
308
+ f"CONTEXT:\n{req.context}\n\nQUESTION: {req.message}"
309
+ if req.context.strip() else req.message
310
+ )
311
+ try:
312
+ reply = await call_hf(HF_LEGAL_MODEL, system, user)
313
+ return {"reply": reply}
314
+ except HTTPException:
315
+ raise
316
+ except Exception as e:
317
+ raise HTTPException(status_code=500, detail=f"Chat failed: {str(e)}")
318
+
319
+
320
+ @app.post("/api/smart-chat")
321
+ async def smart_chat(req: SmartChatRequest):
322
+ """
323
+ Two-stage conversational chat:
324
+
325
+ Stage 1 — LLM1 (Llama-3.1-8B):
326
+ - Always knows the user's case description
327
+ - Handles casual conversation naturally
328
+ - If legal question detected, produces a precise rag_query for LLM2
329
+ - Has NO knowledge of retrieved judgements
330
+ - Only knows about a dropped citation if user explicitly dragged one in
331
+
332
+ Stage 2 — LLM2 (Llama-3.1-8B):
333
+ - Only called when Stage 1 detects a legal question
334
+ - Gets: legal framework from Pinecone + dropped citation (if any)
335
+ - Returns grounded legal answer with [LAW: source] citations
336
+ """
337
+
338
+ # ── Build case context for LLM1 ──────────────────────────────────────────
339
+ case_ctx = ""
340
+ if req.case_text.strip():
341
+ case_ctx = f"\nCURRENT USER CASE:\n{req.case_text[:800]}\n"
342
+
343
+ dropped_ctx = ""
344
+ if req.dropped_citation and req.dropped_citation.content.strip():
345
+ name = (req.dropped_citation.file_name or '').replace('_', ' ').strip()
346
+ dropped_ctx = (
347
+ f"\nUSER HAS SHARED THIS JUDGEMENT FOR DISCUSSION:\n"
348
+ f"Case: {name} ({req.dropped_citation.year or '?'})\n"
349
+ f"{req.dropped_citation.content[:2000]}\n"
350
+ )
351
+
352
+ # ── Stage 1: Router + conversationalist ──────────────────────────────────
353
+ router_system = f"""You are LexMind, a friendly and professional Indian legal research assistant.
354
+ {case_ctx}{dropped_ctx}
355
+ YOUR BEHAVIOUR:
356
+ - For casual messages (greetings, thanks, small talk): reply naturally and warmly in 1-2 sentences.
357
+ - For questions about the shared judgement above (if any): you can answer directly from it.
358
+ - For legal questions requiring Constitution/IPC/CrPC/BSA knowledge: identify what needs to be looked up.
359
+ - Never make up legal information you are not sure about.
360
+
361
+ Respond ONLY with valid JSON, no extra text, no markdown fences:
362
+
363
+ For casual chat:
364
+ {{"intent": "chat", "response": "your warm friendly reply here", "rag_query": null}}
365
+
366
+ For a legal question you can answer from the shared judgement:
367
+ {{"intent": "citation", "response": "your answer from the judgement", "rag_query": null}}
368
+
369
+ For a legal question needing Constitution/IPC/CrPC/BSA lookup:
370
+ {{"intent": "legal", "response": null, "rag_query": "precise 3-8 word search query"}}"""
371
+
372
+ router_user = f'User message: "{req.message}"'
373
+
374
+ try:
375
+ raw = await call_hf(
376
+ HF_ROUTER_MODEL,
377
+ router_system,
378
+ router_user,
379
+ temperature=0.2,
380
+ max_tokens=300,
381
+ timeout=60,
382
+ )
383
+ except HTTPException:
384
+ raise
385
+ except Exception as e:
386
+ raise HTTPException(status_code=500, detail=f"Stage 1 failed: {str(e)}")
387
+
388
+ # ── Parse Stage 1 JSON ────────────────────────────────────────────────────
389
+ intent = "chat"
390
+ response = None
391
+ rag_query = None
392
+ try:
393
+ clean = re.sub(r"```json|```", "", raw).strip()
394
+ match = re.search(r"\{.*\}", clean, re.DOTALL)
395
+ parsed = json.loads(match.group(0) if match else clean)
396
+ intent = parsed.get("intent", "chat")
397
+ response = parsed.get("response")
398
+ rag_query = parsed.get("rag_query")
399
+ except Exception:
400
+ # JSON parse failed — treat raw text as a casual reply
401
+ intent = "chat"
402
+ response = raw.strip() if raw.strip() else "How can I help you?"
403
+
404
+ # ── Stage 1 exits: casual or citation answer ──────────────────────────────
405
+ if intent in ("chat", "citation"):
406
+ return {
407
+ "reply": response or "How can I help you today?",
408
+ "intent": intent,
409
+ }
410
+
411
+ # ── Stage 2: Legal RAG answer ─────────────────────────────────────────────
412
+ search_q = rag_query or req.message
413
+
414
+ # 2a. Search Pinecone legal-framework index
415
+ legal_ctx = ""
416
+ if legal_index and search_q:
417
+ try:
418
+ law_result = legal_index.query(
419
+ vector=embed_query(search_q),
420
+ top_k=CONSTITUTION_TOP_K,
421
+ include_metadata=True,
422
+ )
423
+ matches = sorted(
424
+ law_result.get("matches", []),
425
+ key=lambda x: x.get("score", 0),
426
+ reverse=True,
427
+ )
428
+ if matches:
429
+ legal_ctx = "RELEVANT LEGAL FRAMEWORK (Constitution / IPC / CrPC / BSA):\n\n"
430
+ for m in matches:
431
+ meta = m.get("metadata", {})
432
+ src = meta.get("source", "Law")
433
+ sec = meta.get("section", "")
434
+ legal_ctx += f"[LAW: {src}{' S.' + str(sec) if sec else ''}]\n"
435
+ legal_ctx += f"{meta.get('content', '')[:600]}\n\n---\n\n"
436
+ except Exception:
437
+ pass # continue without legal context
438
+
439
+ # 2b. Build Stage 2 context
440
+ # Includes: case description + dropped citation (if any) + legal framework
441
+ # Does NOT include retrieved judgements
442
+ stage2_context = ""
443
+ if req.case_text.strip():
444
+ stage2_context += f"USER'S CASE:\n{req.case_text[:800]}\n\n"
445
+ if dropped_ctx:
446
+ stage2_context += dropped_ctx + "\n"
447
+ if legal_ctx:
448
+ stage2_context += legal_ctx
449
+
450
+ legal_system = """You are LexMind, a professional Indian legal research assistant.
451
+
452
+ KNOWLEDGE BASE YOU CAN USE:
453
+ - The user's case description (if provided)
454
+ - A shared judgement (if user dragged one in)
455
+ - Indian Constitution, IPC, CrPC, BSA 2023 — cited as [LAW: source S.section]
456
+
457
+ KNOWLEDGE GAPS — be honest if asked about these:
458
+ - Code of Civil Procedure (CPC) — not in your knowledge base
459
+ - Indian Contract Act — not in your knowledge base
460
+ - Transfer of Property Act — not in your knowledge base
461
+
462
+ RULES:
463
+ 1. Answer ONLY from the provided context. Never fabricate.
464
+ 2. Cite laws as [LAW: IPC S.302] or [LAW: Indian Constitution Art.21].
465
+ 3. If context is insufficient: "I don't have enough information on this. Please search for relevant citations."
466
+ 4. Be concise, clear, and professional.
467
+ 5. Answer directly — no preamble like "Based on the context provided…"."""
468
+
469
+ legal_user = (
470
+ f"QUESTION: {req.message}\n\nCONTEXT:\n{stage2_context}"
471
+ if stage2_context.strip()
472
+ else req.message
473
+ )
474
+
475
+ try:
476
+ reply = await call_hf(
477
+ HF_LEGAL_MODEL,
478
+ legal_system,
479
+ legal_user,
480
+ temperature=0.2,
481
+ max_tokens=1024,
482
+ timeout=120,
483
+ )
484
+ return {"reply": reply, "intent": "legal"}
485
+ except HTTPException:
486
+ raise
487
+ except Exception as e:
488
+ raise HTTPException(status_code=500, detail=f"Stage 2 failed: {str(e)}")
489
+
490
+
491
+ # ── Serve React frontend ──────────────────────────────────────────────────────
492
+ # Built frontend output is generated under ../frontend/dist (relative to backend/)
493
+ dist_path = Path("../frontend/dist")
494
+ if dist_path.exists():
495
+ app.mount("/assets", StaticFiles(directory=str(dist_path / "assets")), name="assets")
496
+
497
+ @app.get("/")
498
+ async def serve_frontend():
499
+ return FileResponse(str(dist_path / "index.html"))
500
+
501
+ @app.get("/{full_path:path}")
502
+ async def serve_spa(full_path: str):
503
+ return FileResponse(str(dist_path / "index.html"))