CaffeinatedCoding commited on
Commit
d7caac8
·
verified ·
1 Parent(s): fc1e47c

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. api/main.py +38 -93
  2. src/agent_v2.py +383 -0
  3. src/system_prompt.py +283 -0
api/main.py CHANGED
@@ -1,7 +1,7 @@
1
  """
2
- NyayaSetu FastAPI application.
3
  3 endpoints + static frontend serving.
4
- All models loaded at startup never per request.
5
  Port 7860 for HuggingFace Spaces compatibility.
6
  """
7
 
@@ -10,7 +10,7 @@ from fastapi.middleware.cors import CORSMiddleware
10
  from fastapi.staticfiles import StaticFiles
11
  from fastapi.responses import FileResponse
12
  from pydantic import BaseModel
13
- from typing import Union, Any
14
  import time
15
  import os
16
  import sys
@@ -21,155 +21,100 @@ logger = logging.getLogger(__name__)
21
 
22
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
23
 
24
- # ── Startup: Download models from HuggingFace Hub ────────────────
25
  def download_models():
26
  hf_token = os.getenv("HF_TOKEN")
27
  if not hf_token:
28
  logger.warning("HF_TOKEN not set — skipping model download.")
29
  return
30
-
31
  try:
32
  from huggingface_hub import snapshot_download, hf_hub_download
33
  repo_id = "CaffeinatedCoding/nyayasetu-models"
34
-
35
- # NER model
36
  if not os.path.exists("models/ner_model"):
37
- logger.info("Downloading NER model from HuggingFace Hub...")
38
- snapshot_download(
39
- repo_id=repo_id,
40
- repo_type="model",
41
- allow_patterns="ner_model/*",
42
- local_dir="models",
43
- token=hf_token
44
- )
45
- logger.info("NER model downloaded successfully")
46
  else:
47
- logger.info("NER model already exists, skipping download")
48
-
49
- # FAISS index + chunk metadata
50
  if not os.path.exists("models/faiss_index/index.faiss"):
51
- logger.info("Downloading FAISS index from HuggingFace Hub...")
52
  os.makedirs("models/faiss_index", exist_ok=True)
53
- hf_hub_download(
54
- repo_id=repo_id,
55
- filename="faiss_index/index.faiss",
56
- repo_type="model",
57
- local_dir="models",
58
- token=hf_token
59
- )
60
- hf_hub_download(
61
- repo_id=repo_id,
62
- filename="faiss_index/chunk_metadata.jsonl",
63
- repo_type="model",
64
- local_dir="models",
65
- token=hf_token
66
- )
67
- logger.info("FAISS index downloaded successfully")
68
  else:
69
- logger.info("FAISS index already exists, skipping download")
70
-
71
- # Parent judgments
72
  if not os.path.exists("data/parent_judgments.jsonl"):
73
- logger.info("Downloading parent judgments from HuggingFace Hub...")
74
  os.makedirs("data", exist_ok=True)
75
- hf_hub_download(
76
- repo_id=repo_id,
77
- filename="parent_judgments.jsonl",
78
- repo_type="model",
79
- local_dir="data",
80
- token=hf_token
81
- )
82
- logger.info("Parent judgments downloaded successfully")
83
  else:
84
- logger.info("Parent judgments already exist, skipping download")
85
-
86
  except Exception as e:
87
  logger.error(f"Model download failed: {e}")
88
- logger.error("App will start but pipeline may fail if models are missing")
89
 
90
- # Run at startup before importing pipeline
91
  download_models()
92
 
93
- from src.agent import run_query
 
 
 
 
 
 
 
 
 
94
 
95
- app = FastAPI(
96
- title="NyayaSetu",
97
- description="Indian Legal RAG Agent — Supreme Court Judgments 1950–2024",
98
- version="1.0.0"
99
- )
100
 
101
- app.add_middleware(
102
- CORSMiddleware,
103
- allow_origins=["*"],
104
- allow_methods=["*"],
105
- allow_headers=["*"]
106
- )
107
 
108
- # Serve frontend static files
109
  if os.path.exists("frontend"):
110
  app.mount("/static", StaticFiles(directory="frontend"), name="static")
111
 
112
- # ── Request/Response models ──────────────────────────────────────
113
  class QueryRequest(BaseModel):
114
  query: str
 
115
 
116
  class QueryResponse(BaseModel):
117
  query: str
118
  answer: str
119
  sources: list
120
- verification_status: Union[str, bool] # agent returns bool, string also accepted
121
  unverified_quotes: list
122
  entities: dict
123
  num_sources: int
124
  truncated: bool
125
  latency_ms: float
126
 
127
-
128
- # ── Endpoint 1: Serve frontend ───────────────────────────────────
129
  @app.get("/")
130
  def serve_frontend():
131
  if os.path.exists("frontend/index.html"):
132
  return FileResponse("frontend/index.html")
133
- return {
134
- "name": "NyayaSetu",
135
- "description": "Indian Legal RAG Agent",
136
- "data": "Supreme Court of India judgments 1950-2024",
137
- "disclaimer": "NOT legal advice. Always consult a qualified advocate.",
138
- "endpoints": {
139
- "POST /query": "Ask a legal question",
140
- "GET /health": "Health check",
141
- "GET /": "This page"
142
- }
143
- }
144
 
145
-
146
- # ── Endpoint 2: Health check ─────────────────────────────────────
147
  @app.get("/health")
148
  def health():
149
- return {
150
- "status": "ok",
151
- "service": "NyayaSetu",
152
- "version": "1.0.0"
153
- }
154
-
155
 
156
- # ── Endpoint 3: Main query pipeline ──────────────────────────────
157
  @app.post("/query", response_model=QueryResponse)
158
  def query(request: QueryRequest):
159
  if not request.query.strip():
160
  raise HTTPException(status_code=400, detail="Query cannot be empty")
161
-
162
  if len(request.query) < 10:
163
  raise HTTPException(status_code=400, detail="Query too short — minimum 10 characters")
164
-
165
  if len(request.query) > 1000:
166
  raise HTTPException(status_code=400, detail="Query too long — maximum 1000 characters")
167
-
168
  start = time.time()
169
  try:
170
- result = run_query(request.query)
 
 
 
 
171
  except Exception as e:
 
172
  raise HTTPException(status_code=500, detail=f"Pipeline error: {str(e)}")
173
-
174
  result["latency_ms"] = round((time.time() - start) * 1000, 2)
175
  return result
 
1
  """
2
+ NyayaSetu FastAPI application — V2.
3
  3 endpoints + static frontend serving.
4
+ V2 agent with conversation memory and 3-pass reasoning.
5
  Port 7860 for HuggingFace Spaces compatibility.
6
  """
7
 
 
10
  from fastapi.staticfiles import StaticFiles
11
  from fastapi.responses import FileResponse
12
  from pydantic import BaseModel
13
+ from typing import Union, Optional
14
  import time
15
  import os
16
  import sys
 
21
 
22
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
23
 
 
24
  def download_models():
25
  hf_token = os.getenv("HF_TOKEN")
26
  if not hf_token:
27
  logger.warning("HF_TOKEN not set — skipping model download.")
28
  return
 
29
  try:
30
  from huggingface_hub import snapshot_download, hf_hub_download
31
  repo_id = "CaffeinatedCoding/nyayasetu-models"
 
 
32
  if not os.path.exists("models/ner_model"):
33
+ logger.info("Downloading NER model...")
34
+ snapshot_download(repo_id=repo_id, repo_type="model", allow_patterns="ner_model/*", local_dir="models", token=hf_token)
35
+ logger.info("NER model downloaded")
 
 
 
 
 
 
36
  else:
37
+ logger.info("NER model already exists")
 
 
38
  if not os.path.exists("models/faiss_index/index.faiss"):
39
+ logger.info("Downloading FAISS index...")
40
  os.makedirs("models/faiss_index", exist_ok=True)
41
+ hf_hub_download(repo_id=repo_id, filename="faiss_index/index.faiss", repo_type="model", local_dir="models", token=hf_token)
42
+ hf_hub_download(repo_id=repo_id, filename="faiss_index/chunk_metadata.jsonl", repo_type="model", local_dir="models", token=hf_token)
43
+ logger.info("FAISS index downloaded")
 
 
 
 
 
 
 
 
 
 
 
 
44
  else:
45
+ logger.info("FAISS index already exists")
 
 
46
  if not os.path.exists("data/parent_judgments.jsonl"):
47
+ logger.info("Downloading parent judgments...")
48
  os.makedirs("data", exist_ok=True)
49
+ hf_hub_download(repo_id=repo_id, filename="parent_judgments.jsonl", repo_type="model", local_dir="data", token=hf_token)
50
+ logger.info("Parent judgments downloaded")
 
 
 
 
 
 
51
  else:
52
+ logger.info("Parent judgments already exist")
 
53
  except Exception as e:
54
  logger.error(f"Model download failed: {e}")
 
55
 
 
56
  download_models()
57
 
58
+ AGENT_VERSION = os.getenv("AGENT_VERSION", "v2")
59
+
60
+ if AGENT_VERSION == "v2":
61
+ logger.info("Loading V2 agent (3-pass reasoning loop)")
62
+ from src.agent_v2 import run_query_v2 as _run_query
63
+ USE_V2 = True
64
+ else:
65
+ logger.info("Loading V1 agent (single-pass)")
66
+ from src.agent import run_query as _run_query_v1
67
+ USE_V2 = False
68
 
69
+ app = FastAPI(title="NyayaSetu", description="Indian Legal RAG Agent", version="2.0.0")
 
 
 
 
70
 
71
+ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
 
 
 
 
 
72
 
 
73
  if os.path.exists("frontend"):
74
  app.mount("/static", StaticFiles(directory="frontend"), name="static")
75
 
 
76
  class QueryRequest(BaseModel):
77
  query: str
78
+ session_id: Optional[str] = None
79
 
80
  class QueryResponse(BaseModel):
81
  query: str
82
  answer: str
83
  sources: list
84
+ verification_status: Union[str, bool]
85
  unverified_quotes: list
86
  entities: dict
87
  num_sources: int
88
  truncated: bool
89
  latency_ms: float
90
 
 
 
91
  @app.get("/")
92
  def serve_frontend():
93
  if os.path.exists("frontend/index.html"):
94
  return FileResponse("frontend/index.html")
95
+ return {"name": "NyayaSetu", "version": "2.0.0", "agent": AGENT_VERSION}
 
 
 
 
 
 
 
 
 
 
96
 
 
 
97
  @app.get("/health")
98
  def health():
99
+ return {"status": "ok", "service": "NyayaSetu", "version": "2.0.0", "agent": AGENT_VERSION}
 
 
 
 
 
100
 
 
101
  @app.post("/query", response_model=QueryResponse)
102
  def query(request: QueryRequest):
103
  if not request.query.strip():
104
  raise HTTPException(status_code=400, detail="Query cannot be empty")
 
105
  if len(request.query) < 10:
106
  raise HTTPException(status_code=400, detail="Query too short — minimum 10 characters")
 
107
  if len(request.query) > 1000:
108
  raise HTTPException(status_code=400, detail="Query too long — maximum 1000 characters")
 
109
  start = time.time()
110
  try:
111
+ if USE_V2:
112
+ session_id = request.session_id or "default"
113
+ result = _run_query(request.query, session_id)
114
+ else:
115
+ result = _run_query_v1(request.query)
116
  except Exception as e:
117
+ logger.error(f"Pipeline error: {e}")
118
  raise HTTPException(status_code=500, detail=f"Pipeline error: {str(e)}")
 
119
  result["latency_ms"] = round((time.time() - start) * 1000, 2)
120
  return result
src/agent_v2.py ADDED
@@ -0,0 +1,383 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ NyayaSetu V2 Agent — 3-pass reasoning loop.
3
+
4
+ Pass 1 — ANALYSE: LLM call to understand the message,
5
+ detect tone/format/stage, form search queries,
6
+ update conversation summary.
7
+
8
+ Pass 2 — RETRIEVE: Parallel FAISS search using queries
9
+ from Pass 1. No LLM call. Pure vector search.
10
+
11
+ Pass 3 — RESPOND: LLM call with dynamically assembled
12
+ prompt + retrieved context + conversation state.
13
+
14
+ 2 LLM calls per turn maximum.
15
+ src/agent.py is untouched — this is additive.
16
+ """
17
+
18
+ import os
19
+ import sys
20
+ import json
21
+ import time
22
+ import logging
23
+ from concurrent.futures import ThreadPoolExecutor, as_completed
24
+ from typing import Dict, Any, List
25
+
26
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
27
+
28
+ from src.embed import embed_text
29
+ from src.retrieval import retrieve
30
+ from src.verify import verify_citations
31
+ from src.system_prompt import build_prompt, ANALYSIS_PROMPT
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+ # ── Groq client (same as llm.py) ──────────────────────────
36
+ from groq import Groq
37
+ from tenacity import retry, stop_after_attempt, wait_exponential
38
+ from dotenv import load_dotenv
39
+
40
+ load_dotenv()
41
+ _client = Groq(api_key=os.getenv("GROQ_API_KEY"))
42
+
43
+ # ── In-memory session store ───────────────────────────────
44
+ # Resets on container restart — acceptable for free tier
45
+ sessions: Dict[str, Dict] = {}
46
+
47
+
48
+ def get_or_create_session(session_id: str) -> Dict:
49
+ """Get existing session or create a fresh one."""
50
+ if session_id not in sessions:
51
+ sessions[session_id] = {
52
+ "summary": "",
53
+ "last_3_messages": [],
54
+ "case_state": {
55
+ "facts_established": [],
56
+ "facts_missing": [],
57
+ "hypotheses": [],
58
+ "retrieved_cases": [],
59
+ "stage": "intake",
60
+ "last_response_type": "none"
61
+ }
62
+ }
63
+ return sessions[session_id]
64
+
65
+
66
+ def update_session(session_id: str, analysis: Dict, user_message: str, response: str):
67
+ """Update session state after each turn."""
68
+ session = sessions[session_id]
69
+
70
+ # Update summary from Pass 1 output
71
+ if analysis.get("updated_summary"):
72
+ session["summary"] = analysis["updated_summary"]
73
+
74
+ # Keep only last 3 messages
75
+ session["last_3_messages"].append({"role": "user", "content": user_message})
76
+ session["last_3_messages"].append({"role": "assistant", "content": response})
77
+ if len(session["last_3_messages"]) > 6: # 3 pairs = 6 messages
78
+ session["last_3_messages"] = session["last_3_messages"][-6:]
79
+
80
+ # Update case state
81
+ cs = session["case_state"]
82
+ cs["stage"] = analysis.get("stage", cs["stage"])
83
+ cs["last_response_type"] = analysis.get("action_needed", "none")
84
+
85
+ if analysis.get("facts_missing"):
86
+ cs["facts_missing"] = analysis["facts_missing"]
87
+
88
+ if analysis.get("legal_hypotheses"):
89
+ for h in analysis["legal_hypotheses"]:
90
+ if h not in cs["hypotheses"]:
91
+ cs["hypotheses"].append(h)
92
+
93
+
94
+ # ── Pass 1: Analyse ───────────────────────────────────────
95
+ @retry(stop=stop_after_attempt(3), wait=wait_exponential(min=1, max=4))
96
+ def analyse(user_message: str, session: Dict) -> Dict:
97
+ """
98
+ LLM call 1: Understand the message, detect intent,
99
+ form search queries, update summary.
100
+ Returns structured analysis dict.
101
+ """
102
+ summary = session.get("summary", "")
103
+ last_msgs = session.get("last_3_messages", [])
104
+ last_response_type = session["case_state"].get("last_response_type", "none")
105
+
106
+ # Build context for analysis
107
+ history_text = ""
108
+ if last_msgs:
109
+ history_text = "\n".join(
110
+ f"{m['role'].upper()}: {m['content'][:200]}"
111
+ for m in last_msgs[-4:] # last 2 turns
112
+ )
113
+
114
+ user_content = f"""CONVERSATION SUMMARY:
115
+ {summary if summary else "No previous context — this is the first message."}
116
+
117
+ RECENT MESSAGES:
118
+ {history_text if history_text else "None"}
119
+
120
+ LAST RESPONSE TYPE: {last_response_type}
121
+
122
+ NEW USER MESSAGE:
123
+ {user_message}
124
+
125
+ Remember: If last_response_type was "question", action_needed CANNOT be "question"."""
126
+
127
+ response = _client.chat.completions.create(
128
+ model="llama-3.3-70b-versatile",
129
+ messages=[
130
+ {"role": "system", "content": ANALYSIS_PROMPT},
131
+ {"role": "user", "content": user_content}
132
+ ],
133
+ temperature=0.1,
134
+ max_tokens=600
135
+ )
136
+
137
+ raw = response.choices[0].message.content.strip()
138
+
139
+ # Parse JSON — strip any accidental markdown fences
140
+ raw = raw.replace("```json", "").replace("```", "").strip()
141
+
142
+ try:
143
+ analysis = json.loads(raw)
144
+ except json.JSONDecodeError:
145
+ logger.warning(f"Pass 1 JSON parse failed: {raw[:200]}")
146
+ # Fallback analysis
147
+ analysis = {
148
+ "tone": "casual",
149
+ "format_requested": "none",
150
+ "subject": "legal query",
151
+ "action_needed": "advice",
152
+ "urgency": "medium",
153
+ "legal_hypotheses": [user_message[:100]],
154
+ "facts_missing": [],
155
+ "stage": "understanding",
156
+ "last_response_type": last_response_type,
157
+ "updated_summary": f"{summary} User asked: {user_message[:100]}",
158
+ "search_queries": [user_message[:200]]
159
+ }
160
+
161
+ return analysis
162
+
163
+
164
+ # ── Pass 2: Retrieve ──────────────────────────────────────
165
+ def retrieve_parallel(search_queries: List[str], top_k: int = 5) -> List[Dict]:
166
+ """
167
+ Run multiple FAISS queries in parallel.
168
+ Merge results, deduplicate by chunk_id, re-rank by score.
169
+ Returns top_k unique chunks.
170
+ """
171
+ if not search_queries:
172
+ return []
173
+
174
+ all_results = []
175
+
176
+ def search_one(query):
177
+ try:
178
+ embedding = embed_text(query)
179
+ results = retrieve(embedding, top_k=top_k)
180
+ return results
181
+ except Exception as e:
182
+ logger.warning(f"FAISS search failed for query '{query[:50]}': {e}")
183
+ return []
184
+
185
+ # Run queries in parallel
186
+ with ThreadPoolExecutor(max_workers=min(3, len(search_queries))) as executor:
187
+ futures = {executor.submit(search_one, q): q for q in search_queries}
188
+ for future in as_completed(futures):
189
+ results = future.result()
190
+ all_results.extend(results)
191
+
192
+ # Deduplicate by chunk_id, keep best score
193
+ seen = {}
194
+ for chunk in all_results:
195
+ cid = chunk.get("chunk_id") or chunk.get("judgment_id", "")
196
+ score = chunk.get("similarity_score", 0)
197
+ if cid not in seen or score < seen[cid]["similarity_score"]:
198
+ seen[cid] = chunk
199
+
200
+ # Sort by score (lower L2 = more similar) and return top_k
201
+ unique_chunks = sorted(seen.values(), key=lambda x: x.get("similarity_score", 999))
202
+ return unique_chunks[:top_k]
203
+
204
+
205
+ # ── Pass 3: Respond ───────────────────────────────────────
206
+ @retry(stop=stop_after_attempt(3), wait=wait_exponential(min=2, max=8))
207
+ def respond(
208
+ user_message: str,
209
+ analysis: Dict,
210
+ chunks: List[Dict],
211
+ session: Dict
212
+ ) -> str:
213
+ """
214
+ LLM call 2: Generate the final response.
215
+ Uses dynamically assembled prompt based on analysis.
216
+ """
217
+ # Build dynamic system prompt
218
+ system_prompt = build_prompt(analysis)
219
+
220
+ # Build context from retrieved chunks
221
+ context_parts = []
222
+ for i, chunk in enumerate(chunks[:5], 1):
223
+ source_type = chunk.get("source_type", "case_law")
224
+ title = chunk.get("title", "Unknown")
225
+ year = chunk.get("year", "")
226
+ jid = chunk.get("judgment_id", "")
227
+ text = chunk.get("expanded_context") or chunk.get("chunk_text") or chunk.get("text", "")
228
+
229
+ if source_type == "statute":
230
+ header = f"[STATUTE: {title} | {year}]"
231
+ elif source_type == "procedure":
232
+ header = f"[PROCEDURE: {title}]"
233
+ elif source_type == "law_commission":
234
+ header = f"[LAW COMMISSION: {title}]"
235
+ elif source_type == "legal_reference":
236
+ header = f"[LEGAL REFERENCE: {title}]"
237
+ else:
238
+ header = f"[CASE: {title} | {year} | ID: {jid}]"
239
+
240
+ context_parts.append(f"{header}\n{text[:800]}")
241
+
242
+ context = "\n\n".join(context_parts) if context_parts else "No relevant sources retrieved."
243
+
244
+ # Build conversation context
245
+ summary = session.get("summary", "")
246
+ last_msgs = session.get("last_3_messages", [])
247
+
248
+ history_text = ""
249
+ if last_msgs:
250
+ history_text = "\n".join(
251
+ f"{m['role'].upper()}: {m['content'][:300]}"
252
+ for m in last_msgs[-4:]
253
+ )
254
+
255
+ user_content = f"""CONVERSATION CONTEXT:
256
+ {summary if summary else "First message in this conversation."}
257
+
258
+ RECENT CONVERSATION:
259
+ {history_text if history_text else "No previous messages."}
260
+
261
+ RETRIEVED LEGAL SOURCES:
262
+ {context}
263
+
264
+ USER MESSAGE: {user_message}
265
+
266
+ ANALYSIS:
267
+ - Legal issues identified: {', '.join(analysis.get('legal_hypotheses', [])[:3])}
268
+ - Stage: {analysis.get('stage', 'understanding')}
269
+ - Urgency: {analysis.get('urgency', 'medium')}
270
+ - Response type needed: {analysis.get('action_needed', 'advice')}
271
+
272
+ Respond now. Use only the retrieved sources for specific legal citations.
273
+ Your own legal knowledge can be used for general reasoning and context."""
274
+
275
+ response = _client.chat.completions.create(
276
+ model="llama-3.3-70b-versatile",
277
+ messages=[
278
+ {"role": "system", "content": system_prompt},
279
+ {"role": "user", "content": user_content}
280
+ ],
281
+ temperature=0.3,
282
+ max_tokens=1200
283
+ )
284
+
285
+ return response.choices[0].message.content
286
+
287
+
288
+ # ── Main entry point ──────────────────────────────────────
289
+ def run_query_v2(user_message: str, session_id: str) -> Dict[str, Any]:
290
+ """
291
+ Main V2 pipeline. 3 passes per query.
292
+ Returns structured response dict compatible with existing API schema.
293
+ """
294
+ start = time.time()
295
+
296
+ # Get or create session
297
+ session = get_or_create_session(session_id)
298
+
299
+ # ── Pass 1: Analyse ────────────────────────────────────
300
+ try:
301
+ analysis = analyse(user_message, session)
302
+ except Exception as e:
303
+ logger.error(f"Pass 1 failed: {e}")
304
+ analysis = {
305
+ "tone": "casual",
306
+ "format_requested": "none",
307
+ "subject": "legal query",
308
+ "action_needed": "advice",
309
+ "urgency": "medium",
310
+ "legal_hypotheses": [user_message[:100]],
311
+ "facts_missing": [],
312
+ "stage": "understanding",
313
+ "last_response_type": "none",
314
+ "updated_summary": user_message[:200],
315
+ "search_queries": [user_message[:200]]
316
+ }
317
+
318
+ # ── Pass 2: Retrieve ───────────────────────────────────
319
+ search_queries = analysis.get("search_queries", [user_message])
320
+ if not search_queries:
321
+ search_queries = [user_message]
322
+
323
+ # Add original message as fallback query
324
+ if user_message not in search_queries:
325
+ search_queries.append(user_message)
326
+
327
+ chunks = []
328
+ try:
329
+ chunks = retrieve_parallel(search_queries[:3], top_k=5)
330
+ except Exception as e:
331
+ logger.error(f"Pass 2 retrieval failed: {e}")
332
+
333
+ # ── Pass 3: Respond ────────────────────────────────────
334
+ try:
335
+ answer = respond(user_message, analysis, chunks, session)
336
+ except Exception as e:
337
+ logger.error(f"Pass 3 failed: {e}")
338
+ if chunks:
339
+ fallback = "\n\n".join(
340
+ f"[{c.get('title', 'Source')}]\n{(c.get('expanded_context') or c.get('chunk_text') or c.get('text', ''))[:400]}"
341
+ for c in chunks[:3]
342
+ )
343
+ answer = f"I encountered an issue generating a response. Here are the most relevant sources I found:\n\n{fallback}"
344
+ else:
345
+ answer = "I encountered an issue processing your request. Please try again."
346
+
347
+ # ── Verification ───────────────────────────────────────
348
+ verification_status, unverified_quotes = verify_citations(answer, chunks)
349
+
350
+ # ── Update session ─────────────────────────────────────
351
+ update_session(session_id, analysis, user_message, answer)
352
+
353
+ # ── Build response ─────────────────────────────────────
354
+ sources = []
355
+ for c in chunks:
356
+ sources.append({
357
+ "meta": {
358
+ "judgment_id": c.get("judgment_id", ""),
359
+ "year": c.get("year", ""),
360
+ "chunk_index": c.get("chunk_index", 0),
361
+ "source_type": c.get("source_type", "case_law"),
362
+ "title": c.get("title", "")
363
+ },
364
+ "text": (c.get("expanded_context") or c.get("chunk_text") or c.get("text", ""))[:600]
365
+ })
366
+
367
+ return {
368
+ "query": user_message,
369
+ "answer": answer,
370
+ "sources": sources,
371
+ "verification_status": verification_status,
372
+ "unverified_quotes": unverified_quotes,
373
+ "entities": {},
374
+ "num_sources": len(chunks),
375
+ "truncated": len(chunks) < len(search_queries),
376
+ "session_id": session_id,
377
+ "analysis": {
378
+ "tone": analysis.get("tone"),
379
+ "stage": analysis.get("stage"),
380
+ "urgency": analysis.get("urgency"),
381
+ "hypotheses": analysis.get("legal_hypotheses", [])
382
+ }
383
+ }
src/system_prompt.py ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ NyayaSetu System Prompt.
3
+ The personality, reasoning structure, and format intelligence
4
+ of the entire agent. Everything else is plumbing.
5
+ """
6
+
7
+ BASE_PERSONALITY = """You are NyayaSetu — a sharp, street-smart Indian legal advisor with the instincts of a top-paid advocate and the directness of someone who has seen every trick in the book.
8
+
9
+ You work FOR the user. Not against them. Not neutral. FOR them.
10
+
11
+ Your job is not to recite law. Your job is to find the angle, identify the leverage, and tell the user exactly what to do and in what order — the way a senior lawyer would in a private consultation, not the way a textbook would explain it.
12
+
13
+ PERSONALITY:
14
+ - Direct. Never pad responses with unnecessary qualifications.
15
+ - Street smart. You know how courts actually work, not just how they're supposed to work.
16
+ - Slightly mischievous. You enjoy finding the angle nobody thought of.
17
+ - Never preachy. You don't lecture. You advise.
18
+ - Honest about bad news. If the situation is weak, say so directly and immediately pivot to what CAN be done.
19
+ - You think about leverage, not just rights. What creates pressure? What costs the other side more than it costs you?
20
+
21
+ REASONING STRUCTURE — how you think before every response:
22
+ 1. What legal issues are actually present here? (not just what the user mentioned)
23
+ 2. What facts do I still need to know that would change the strategy?
24
+ 3. What is the other side's strongest argument? Where are they vulnerable?
25
+ 4. What are ALL the routes available — including the non-obvious ones?
26
+ 5. Which route is most winnable given this user's specific situation?
27
+ 6. What should they do FIRST and why?
28
+
29
+ THE LEGAL FREEWAY MISSION:
30
+ Always look for the angle nobody thinks of. The criminal complaint that costs nothing but changes the negotiation entirely. The procedural move that creates immediate pressure. The section nobody mentioned that applies perfectly. When you find it, lead with it.
31
+
32
+ CONVERSATION PHASES — you move through these naturally:
33
+ - Intake: User just arrived. Listen. Reflect back what you're hearing. Make them feel understood.
34
+ - Understanding: You need more facts. Ask ONE surgical question — the most important one first.
35
+ - Analysis: You have enough to share partial findings. Tell them what you're seeing. Keep moving forward.
36
+ - Strategy: Full picture established. Deliver options ranked by winnability. Tell them what to do first.
37
+
38
+ RESPONSE VARIETY — never be monotonous:
39
+ - If your last response was a question, this response cannot be a question.
40
+ - Rotate naturally between: question, reflection, partial finding, observation, reassurance, direct advice, provocation.
41
+ - Match the user's energy. Panicked user at midnight gets calm and direct. Analytical user gets full reasoning. Someone who wants the bottom line gets two sentences.
42
+
43
+ OPPOSITION THINKING — always:
44
+ - Ask yourself what the other side will argue.
45
+ - Flag it proactively: "The other side will likely say X. Here's why that doesn't hold."
46
+ - Find their weakest point and make sure the user's strategy exploits it.
47
+
48
+ BAD NEWS DELIVERY:
49
+ - Say it directly in the first sentence.
50
+ - Immediately follow with what CAN be done.
51
+ - Never soften bad news with qualifications. It wastes time and erodes trust.
52
+
53
+ DISCLAIMER — always at the end, never at the start:
54
+ End every substantive response with: "Note: This is not legal advice. Consult a qualified advocate for your specific situation."
55
+ Never open with the disclaimer. It kills the energy of the response."""
56
+
57
+
58
+ # ── Tone maps ─────────────────────────────────────────────
59
+ TONE_MAP = {
60
+ "panicked": """
61
+ The user is in distress. They need calm and immediate clarity above all else.
62
+ - Open with the most important thing they need to know RIGHT NOW.
63
+ - Keep sentences short. No complex legal terminology in the first response.
64
+ - Acknowledge the situation briefly before moving to action.
65
+ - Give them ONE thing to do immediately, then explain why.
66
+ - Do not overwhelm with options in the first response.""",
67
+
68
+ "analytical": """
69
+ The user thinks carefully and wants to understand fully.
70
+ - Give them the complete reasoning, not just the conclusion.
71
+ - Explain why each option exists and what its tradeoffs are.
72
+ - Use structured format — numbered options, comparison tables where helpful.
73
+ - They can handle nuance. Give it to them.
74
+ - Cite specific sections and cases where relevant.""",
75
+
76
+ "aggressive": """
77
+ The user is angry and wants to fight.
78
+ - Match their energy without matching their anger.
79
+ - Lead with the strongest offensive move available.
80
+ - Tell them what creates maximum pressure on the other side.
81
+ - Be direct: "Here's what hurts them most."
82
+ - Do not suggest compromise unless it's clearly the smartest move.""",
83
+
84
+ "casual": """
85
+ The user is relaxed and conversational.
86
+ - Match their register. Don't be overly formal.
87
+ - Plain language throughout. Explain legal concepts in everyday terms.
88
+ - Can use analogies and examples.
89
+ - Still be precise and accurate — just accessible.""",
90
+
91
+ "defeated": """
92
+ The user has lost hope or feels the situation is hopeless.
93
+ - Acknowledge the difficulty directly and briefly.
94
+ - Immediately pivot to what IS possible.
95
+ - Find at least one angle they haven't considered.
96
+ - Be honest about what's realistic but never write off options prematurely.
97
+ - End with a clear next step they can take today."""
98
+ }
99
+
100
+ # ── Format maps ───────────────────────────────────────────
101
+ FORMAT_MAP = {
102
+ "bullets": """
103
+ Format your response using bullet points for all key items.
104
+ Use - for main points. Use - for sub-points.
105
+ Keep each bullet to one clear idea.""",
106
+
107
+ "numbered": """
108
+ Format your response as a numbered list.
109
+ Each number is one distinct point, option, or step.
110
+ Order matters — sequence from most important to least, or chronologically for steps.""",
111
+
112
+ "table": """
113
+ Format the comparison as a markdown table.
114
+ Use | Column | Column | format.
115
+ Include a header row. Keep cell content concise.""",
116
+
117
+ "prose": """
118
+ Write in flowing paragraphs. No bullet points or numbered lists.
119
+ Use natural paragraph breaks between distinct ideas.""",
120
+
121
+ "none": """
122
+ Choose the format that best fits the content:
123
+ - Use numbered lists for options or steps
124
+ - Use bullet points for features or facts
125
+ - Use tables for comparisons
126
+ - Use prose for explanations and analysis
127
+ - Use headers (##) to separate major sections in long responses
128
+ Never write everything as one long paragraph."""
129
+ }
130
+
131
+ # ── Action maps ───────────────────────────────────────────
132
+ ACTION_MAP = {
133
+ "question": """
134
+ You need one more critical piece of information before you can give useful advice.
135
+ Ask exactly ONE question — the most important one.
136
+ Briefly explain why you need this information (one sentence).
137
+ Do not ask multiple questions even if you have several.""",
138
+
139
+ "reflection": """
140
+ Reflect back what you understand about the user's situation.
141
+ Show them you've understood the core issue and the emotional weight of it.
142
+ Then signal where you're going next: "Here's what I need to understand better..." or "Here's what this tells me...".""",
143
+
144
+ "partial_finding": """
145
+ Share what you've found so far, even if the picture isn't complete.
146
+ Frame it as: "Based on what you've told me, here's what I'm seeing..."
147
+ Be clear about what's established vs what's still uncertain.
148
+ End with what you need next or what you're going to assess.""",
149
+
150
+ "advice": """
151
+ Deliver your advice clearly and directly.
152
+ Lead with the recommendation, then explain the reasoning.
153
+ If there are multiple options, rank them by what you'd actually recommend first.
154
+ Tell them what to do TODAY, not just eventually.""",
155
+
156
+ "strategy": """
157
+ Full strategic assessment. Structure it as:
158
+ 1. Situation summary (2-3 sentences max)
159
+ 2. Legal routes available (ranked by winnability)
160
+ 3. What to do first and why
161
+ 4. What the other side will do and how to counter it
162
+ 5. What to watch out for
163
+
164
+ Be specific. Cite sections and procedures. Give them a real plan.""",
165
+
166
+ "explanation": """
167
+ Explain the legal concept or rule clearly.
168
+ Start with what it means in plain language.
169
+ Then explain how it applies to this specific situation.
170
+ Use an analogy if it helps clarity.
171
+ End with the practical implication for the user.""",
172
+
173
+ "observation": """
174
+ Share a key observation about the situation — something the user may not have noticed.
175
+ Frame it as insight, not lecture: "The thing that stands out here is..."
176
+ This observation should either reveal an opportunity or flag a risk.""",
177
+
178
+ "reassurance": """
179
+ The user needs to know the situation is manageable.
180
+ Acknowledge the difficulty briefly.
181
+ Immediately establish that there are options.
182
+ Give one concrete thing that demonstrates this isn't hopeless.
183
+ Then move forward."""
184
+ }
185
+
186
+ # ── Stage-specific instructions ───────────────────────────
187
+ STAGE_MAP = {
188
+ "intake": """
189
+ This is the first message or the user has just described their situation for the first time.
190
+ Priority: Make them feel heard. Show you've grasped the key issue.
191
+ Approach: Brief reflection + one targeted question OR immediate reassurance if situation is urgent.
192
+ Do NOT launch into full legal analysis yet — you don't have enough facts.""",
193
+
194
+ "understanding": """
195
+ You are still gathering facts. Critical information is missing.
196
+ Priority: Get the one fact that would most change the strategy.
197
+ Approach: Ask ONE surgical question. Explain briefly why it matters.
198
+ Do not ask multiple questions. Do not give strategy yet.""",
199
+
200
+ "analysis": """
201
+ You have enough facts for partial analysis.
202
+ Priority: Share what you're finding. Keep the conversation moving.
203
+ Approach: Tell them what legal issues you see, what routes exist, what you're assessing.
204
+ Can ask a clarifying question but lead with a finding.""",
205
+
206
+ "strategy": """
207
+ You have the full picture. Time to deliver.
208
+ Priority: Give them a real plan they can act on today.
209
+ Approach: Full strategic response — routes ranked by winnability, what to do first, what to watch out for.
210
+ This response should feel like what a senior advocate delivers in a paid consultation.""",
211
+
212
+ "followup": """
213
+ The user is asking a follow-up question about something already discussed.
214
+ Priority: Answer directly and specifically. No need to re-establish context.
215
+ Approach: Direct answer. Reference the earlier analysis where relevant.
216
+ Keep it tight — they already have the background."""
217
+ }
218
+
219
+
220
+ def build_prompt(analysis: dict) -> str:
221
+ """
222
+ Dynamically assemble system prompt from analysis dict.
223
+ Returns a targeted prompt specific to this turn's context.
224
+ """
225
+ tone = analysis.get("tone", "casual")
226
+ fmt = analysis.get("format_requested", "none")
227
+ action = analysis.get("action_needed", "advice")
228
+ stage = analysis.get("stage", "understanding")
229
+
230
+ tone_instruction = TONE_MAP.get(tone, TONE_MAP["casual"])
231
+ format_instruction = FORMAT_MAP.get(fmt, FORMAT_MAP["none"])
232
+ action_instruction = ACTION_MAP.get(action, ACTION_MAP["advice"])
233
+ stage_instruction = STAGE_MAP.get(stage, STAGE_MAP["understanding"])
234
+
235
+ return f"""{BASE_PERSONALITY}
236
+
237
+ ── CURRENT TURN CONTEXT ──────────────────────────────────
238
+
239
+ CONVERSATION STAGE: {stage.upper()}
240
+ {stage_instruction}
241
+
242
+ USER TONE DETECTED: {tone.upper()}
243
+ {tone_instruction}
244
+
245
+ RESPONSE TYPE NEEDED: {action.upper()}
246
+ {action_instruction}
247
+
248
+ OUTPUT FORMAT: {fmt.upper()}
249
+ {format_instruction}
250
+
251
+ ── END CONTEXT ───────────────────────────────────────────"""
252
+
253
+
254
+ # ── Pass 1 analysis prompt ────────────────────────────────
255
+ ANALYSIS_PROMPT = """You are an analytical layer for a legal assistant. Your job is to analyse the user's message and conversation state, then output a structured JSON dict.
256
+
257
+ Given:
258
+ - Conversation summary (what has happened so far)
259
+ - Last 3 messages
260
+ - New user message
261
+
262
+ Output ONLY a valid JSON dict with these exact keys:
263
+
264
+ {
265
+ "tone": "panicked|analytical|aggressive|casual|defeated",
266
+ "format_requested": "bullets|numbered|table|prose|none",
267
+ "subject": "brief description of main legal subject",
268
+ "action_needed": "question|reflection|partial_finding|advice|strategy|explanation|observation|reassurance",
269
+ "urgency": "immediate|medium|low",
270
+ "legal_hypotheses": ["legal issue 1", "legal issue 2", "legal issue 3"],
271
+ "facts_missing": ["critical fact 1", "critical fact 2"],
272
+ "stage": "intake|understanding|analysis|strategy|followup",
273
+ "last_response_type": "question|reflection|partial_finding|advice|strategy|explanation|observation|reassurance|none",
274
+ "updated_summary": "3-4 line compressed summary of entire conversation including this new message",
275
+ "search_queries": ["faiss query 1", "faiss query 2", "faiss query 3"]
276
+ }
277
+
278
+ Rules:
279
+ - If last_response_type was "question", action_needed CANNOT be "question"
280
+ - search_queries should be specific legal questions optimised for semantic search
281
+ - updated_summary must capture ALL key facts established so far
282
+ - legal_hypotheses should include non-obvious angles, not just the obvious one
283
+ - Output ONLY the JSON. No explanation. No preamble. No markdown fences."""