Spaces:

CaffeinatedCoding
/

nyayasetu

Sleeping

App Files Files Community

CaffeinatedCoding commited on 17 days ago

Commit

b2d0640

verified ·

1 Parent(s): 5a6e59a

Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

Dockerfile +0 -0
README.md +2 -0
api/main.py +2 -0
buglog.md +14 -0
src/agent_v2.py +205 -165
src/ner.py +49 -51
src/system_prompt.py +143 -180
src/verify.py +130 -29

Dockerfile CHANGED Viewed

Binary files a/Dockerfile and b/Dockerfile differ

README.md CHANGED Viewed

@@ -20,6 +20,8 @@ Ask questions about Indian Supreme Court judgments (1950–2024).
 > Retrieval-Augmented Generation over 26,688 Supreme Court of India judgments (1950–2024).
 > Ask a legal question. Get a cited answer grounded in real case law.
 [![Live Demo](https://img.shields.io/badge/🤗%20HuggingFace-Live%20Demo-blue)](https://huggingface.co/spaces/CaffeinatedCoding/nyayasetu)
 [![GitHub Actions](https://github.com/devangmishra1424/nyayasetu/actions/workflows/ci.yml/badge.svg)](https://github.com/devangmishra1424/nyayasetu/actions)

 > Retrieval-Augmented Generation over 26,688 Supreme Court of India judgments (1950–2024).
 > Ask a legal question. Get a cited answer grounded in real case law.
+> 1,025,764 chunks indexed (SC judgments, HC judgments, bare acts, constitution, legal references)
+> V2 agent with 3-pass reasoning loop and conversation memory
 [![Live Demo](https://img.shields.io/badge/🤗%20HuggingFace-Live%20Demo-blue)](https://huggingface.co/spaces/CaffeinatedCoding/nyayasetu)
 [![GitHub Actions](https://github.com/devangmishra1424/nyayasetu/actions/workflows/ci.yml/badge.svg)](https://github.com/devangmishra1424/nyayasetu/actions)

api/main.py CHANGED Viewed

@@ -22,6 +22,8 @@ logger = logging.getLogger(__name__)
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 def download_models():
     hf_token = os.getenv("HF_TOKEN")
     if not hf_token:
         logger.warning("HF_TOKEN not set — skipping model download.")

 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 def download_models():
+    from src.ner import load_ner_model
+    load_ner_model()
     hf_token = os.getenv("HF_TOKEN")
     if not hf_token:
         logger.warning("HF_TOKEN not set — skipping model download.")

buglog.md ADDED Viewed

	@@ -0,0 +1,14 @@

+## Bug 1: Kaggle Secrets wrong pattern
+**Bug:** HF token upload failed — was using token value as key name
+**Found:** Upload cell returned 401
+**Fixed:** Changed to `hf_token = secrets.get_secret("HF_TOKEN")`
+## Bug 2: Docker container caching old files
+**Bug:** Frontend changes not reflecting after push
+**Found:** UI still showing old version after multiple pushes
+**Fixed:** Must push change to Dockerfile to force container rebuild, not just restart
+## Bug 3: OneDrive destroying git repository
+**Bug:** All local project files disappeared
+**Found:** OneDrive moved files to cloud-only to free local space
+**Fixed:** Moved project to C:\Projects outside OneDrive. Never store git repos inside OneDrive.

src/agent_v2.py CHANGED Viewed

@@ -1,38 +1,33 @@
 """
-NyayaSetu V2 Agent — 3-pass reasoning loop.
-Pass 1 — ANALYSE: LLM call to understand the message,
-         detect tone/format/stage, form search queries,
-         update conversation summary.
-Pass 2 — RETRIEVE: Parallel FAISS search using queries
-         from Pass 1. No LLM call. Pure vector search.
-Pass 3 — RESPOND: LLM call with dynamically assembled
-         prompt + retrieved context + conversation state.
-2 LLM calls per turn maximum.
-src/agent.py is untouched — this is additive.
 """
-import os
-import sys
-import json
-import time
-import logging
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Dict, Any, List
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from src.embed import embed_text
 from src.retrieval import retrieve
 from src.verify import verify_citations
 from src.system_prompt import build_prompt, ANALYSIS_PROMPT
 logger = logging.getLogger(__name__)
-# ── Groq client (same as llm.py) ──────────────────────────
 from groq import Groq
 from tenacity import retry, stop_after_attempt, wait_exponential
 from dotenv import load_dotenv
@@ -40,89 +35,133 @@ from dotenv import load_dotenv
 load_dotenv()
 _client = Groq(api_key=os.getenv("GROQ_API_KEY"))
-# ── In-memory session store ───────────────────────────────
-# Resets on container restart — acceptable for free tier
 sessions: Dict[str, Dict] = {}
 def get_or_create_session(session_id: str) -> Dict:
-    """Get existing session or create a fresh one."""
     if session_id not in sessions:
         sessions[session_id] = {
             "summary": "",
             "last_3_messages": [],
-            "case_state": {
-                "facts_established": [],
-                "facts_missing": [],
-                "hypotheses": [],
-                "retrieved_cases": [],
-                "stage": "intake",
-                "last_response_type": "none"
-            }
         }
     return sessions[session_id]
 def update_session(session_id: str, analysis: Dict, user_message: str, response: str):
-    """Update session state after each turn."""
     session = sessions[session_id]
-    # Update summary from Pass 1 output
     if analysis.get("updated_summary"):
         session["summary"] = analysis["updated_summary"]
-    # Keep only last 3 messages
-    session["last_3_messages"].append({"role": "user", "content": user_message})
-    session["last_3_messages"].append({"role": "assistant", "content": response})
-    if len(session["last_3_messages"]) > 6:  # 3 pairs = 6 messages
-        session["last_3_messages"] = session["last_3_messages"][-6:]
-    # Update case state
-    cs = session["case_state"]
     cs["stage"] = analysis.get("stage", cs["stage"])
     cs["last_response_type"] = analysis.get("action_needed", "none")
-    if analysis.get("facts_missing"):
-        cs["facts_missing"] = analysis["facts_missing"]
-    if analysis.get("legal_hypotheses"):
-        for h in analysis["legal_hypotheses"]:
-            if h not in cs["hypotheses"]:
-                cs["hypotheses"].append(h)
 # ── Pass 1: Analyse ───────────────────────────────────────
 @retry(stop=stop_after_attempt(3), wait=wait_exponential(min=1, max=4))
 def analyse(user_message: str, session: Dict) -> Dict:
-    """
-    LLM call 1: Understand the message, detect intent,
-    form search queries, update summary.
-    Returns structured analysis dict.
-    """
     summary = session.get("summary", "")
     last_msgs = session.get("last_3_messages", [])
-    last_response_type = session["case_state"].get("last_response_type", "none")
-    # Build context for analysis
-    history_text = ""
-    if last_msgs:
-        history_text = "\n".join(
-            f"{m['role'].upper()}: {m['content'][:200]}"
-            for m in last_msgs[-4:]  # last 2 turns
-        )
     user_content = f"""CONVERSATION SUMMARY:
-{summary if summary else "No previous context — this is the first message."}
 RECENT MESSAGES:
 {history_text if history_text else "None"}
 LAST RESPONSE TYPE: {last_response_type}
 NEW USER MESSAGE:
 {user_message}
-Remember: If last_response_type was "question", action_needed CANNOT be "question"."""
     response = _client.chat.completions.create(
         model="llama-3.3-70b-versatile",
@@ -131,31 +170,27 @@ Remember: If last_response_type was "question", action_needed CANNOT be "questio
             {"role": "user", "content": user_content}
         ],
         temperature=0.1,
-        max_tokens=600
     )
     raw = response.choices[0].message.content.strip()
-    # Parse JSON — strip any accidental markdown fences
     raw = raw.replace("```json", "").replace("```", "").strip()
     try:
         analysis = json.loads(raw)
     except json.JSONDecodeError:
         logger.warning(f"Pass 1 JSON parse failed: {raw[:200]}")
-        # Fallback analysis
         analysis = {
-            "tone": "casual",
-            "format_requested": "none",
-            "subject": "legal query",
-            "action_needed": "advice",
             "urgency": "medium",
-            "legal_hypotheses": [user_message[:100]],
-            "facts_missing": [],
-            "stage": "understanding",
-            "last_response_type": last_response_type,
-            "updated_summary": f"{summary} User asked: {user_message[:100]}",
-            "search_queries": [user_message[:200]]
         }
     return analysis
@@ -163,11 +198,6 @@ Remember: If last_response_type was "question", action_needed CANNOT be "questio
 # ── Pass 2: Retrieve ──────────────────────────────────────
 def retrieve_parallel(search_queries: List[str], top_k: int = 5) -> List[Dict]:
-    """
-    Run multiple FAISS queries in parallel.
-    Merge results, deduplicate by chunk_id, re-rank by score.
-    Returns top_k unique chunks.
-    """
     if not search_queries:
         return []
@@ -176,101 +206,117 @@ def retrieve_parallel(search_queries: List[str], top_k: int = 5) -> List[Dict]:
     def search_one(query):
         try:
             embedding = embed_text(query)
-            results = retrieve(embedding, top_k=top_k)
-            return results
         except Exception as e:
-            logger.warning(f"FAISS search failed for query '{query[:50]}': {e}")
             return []
-    # Run queries in parallel
     with ThreadPoolExecutor(max_workers=min(3, len(search_queries))) as executor:
         futures = {executor.submit(search_one, q): q for q in search_queries}
         for future in as_completed(futures):
-            results = future.result()
-            all_results.extend(results)
-    # Deduplicate by chunk_id, keep best score
     seen = {}
     for chunk in all_results:
         cid = chunk.get("chunk_id") or chunk.get("judgment_id", "")
-        score = chunk.get("similarity_score", 0)
         if cid not in seen or score < seen[cid]["similarity_score"]:
             seen[cid] = chunk
-    # Sort by score (lower L2 = more similar) and return top_k
-    unique_chunks = sorted(seen.values(), key=lambda x: x.get("similarity_score", 999))
-    return unique_chunks[:top_k]
 # ── Pass 3: Respond ───────────────────────────────────────
 @retry(stop=stop_after_attempt(3), wait=wait_exponential(min=2, max=8))
-def respond(
-    user_message: str,
-    analysis: Dict,
-    chunks: List[Dict],
-    session: Dict
-) -> str:
-    """
-    LLM call 2: Generate the final response.
-    Uses dynamically assembled prompt based on analysis.
-    """
-    # Build dynamic system prompt
     system_prompt = build_prompt(analysis)
-    # Build context from retrieved chunks
     context_parts = []
-    for i, chunk in enumerate(chunks[:5], 1):
         source_type = chunk.get("source_type", "case_law")
         title = chunk.get("title", "Unknown")
         year = chunk.get("year", "")
         jid = chunk.get("judgment_id", "")
         text = chunk.get("expanded_context") or chunk.get("chunk_text") or chunk.get("text", "")
-        if source_type == "statute":
-            header = f"[STATUTE: {title} | {year}]"
-        elif source_type == "procedure":
-            header = f"[PROCEDURE: {title}]"
-        elif source_type == "law_commission":
-            header = f"[LAW COMMISSION: {title}]"
-        elif source_type == "legal_reference":
-            header = f"[LEGAL REFERENCE: {title}]"
-        else:
-            header = f"[CASE: {title} | {year} | ID: {jid}]"
         context_parts.append(f"{header}\n{text[:800]}")
     context = "\n\n".join(context_parts) if context_parts else "No relevant sources retrieved."
-    # Build conversation context
     summary = session.get("summary", "")
     last_msgs = session.get("last_3_messages", [])
-    history_text = ""
-    if last_msgs:
-        history_text = "\n".join(
-            f"{m['role'].upper()}: {m['content'][:300]}"
-            for m in last_msgs[-4:]
-        )
-    user_content = f"""CONVERSATION CONTEXT:
-{summary if summary else "First message in this conversation."}
 RECENT CONVERSATION:
-{history_text if history_text else "No previous messages."}
 RETRIEVED LEGAL SOURCES:
 {context}
 USER MESSAGE: {user_message}
-ANALYSIS:
-- Legal issues identified: {', '.join(analysis.get('legal_hypotheses', [])[:3])}
 - Stage: {analysis.get('stage', 'understanding')}
 - Urgency: {analysis.get('urgency', 'medium')}
-- Response type needed: {analysis.get('action_needed', 'advice')}
-Respond now. Use only the retrieved sources for specific legal citations.
-Your own legal knowledge can be used for general reasoning and context."""
     response = _client.chat.completions.create(
         model="llama-3.3-70b-versatile",
@@ -279,7 +325,7 @@ Your own legal knowledge can be used for general reasoning and context."""
             {"role": "user", "content": user_content}
         ],
         temperature=0.3,
-        max_tokens=1200
     )
     return response.choices[0].message.content
@@ -287,79 +333,73 @@ Your own legal knowledge can be used for general reasoning and context."""
 # ── Main entry point ──────────────────────────────────────
 def run_query_v2(user_message: str, session_id: str) -> Dict[str, Any]:
-    """
-    Main V2 pipeline. 3 passes per query.
-    Returns structured response dict compatible with existing API schema.
-    """
     start = time.time()
-    # Get or create session
     session = get_or_create_session(session_id)
-    # ── Pass 1: Analyse ────────────────────────────────────
     try:
         analysis = analyse(user_message, session)
     except Exception as e:
         logger.error(f"Pass 1 failed: {e}")
         analysis = {
-            "tone": "casual",
-            "format_requested": "none",
-            "subject": "legal query",
-            "action_needed": "advice",
             "urgency": "medium",
-            "legal_hypotheses": [user_message[:100]],
-            "facts_missing": [],
-            "stage": "understanding",
-            "last_response_type": "none",
             "updated_summary": user_message[:200],
-            "search_queries": [user_message[:200]]
         }
-    # ── Pass 2: Retrieve ───────────────────────────────────
-    search_queries = analysis.get("search_queries", [user_message])
-    if not search_queries:
-        search_queries = [user_message]
-    # Add original message as fallback query
-    if user_message not in search_queries:
-        search_queries.append(user_message)
     chunks = []
     try:
         chunks = retrieve_parallel(search_queries[:3], top_k=5)
     except Exception as e:
-        logger.error(f"Pass 2 retrieval failed: {e}")
-    # ── Pass 3: Respond ────────────────────────────────────
     try:
         answer = respond(user_message, analysis, chunks, session)
     except Exception as e:
         logger.error(f"Pass 3 failed: {e}")
         if chunks:
             fallback = "\n\n".join(
-                f"[{c.get('title', 'Source')}]\n{(c.get('expanded_context') or c.get('chunk_text') or c.get('text', ''))[:400]}"
                 for c in chunks[:3]
             )
-            answer = f"I encountered an issue generating a response. Here are the most relevant sources I found:\n\n{fallback}"
         else:
             answer = "I encountered an issue processing your request. Please try again."
-    # ── Verification ───────────────────────────────────────
     verification_status, unverified_quotes = verify_citations(answer, chunks)
-    # ── Update session ─────────────────────────────────────
     update_session(session_id, analysis, user_message, answer)
-    # ── Build response ─────────────────────────────────────
     sources = []
     for c in chunks:
         sources.append({
             "meta": {
-                "judgment_id": c.get("judgment_id", ""),
                 "year": c.get("year", ""),
                 "chunk_index": c.get("chunk_index", 0),
                 "source_type": c.get("source_type", "case_law"),
-                "title": c.get("title", "")
             },
             "text": (c.get("expanded_context") or c.get("chunk_text") or c.get("text", ""))[:600]
         })
@@ -370,14 +410,14 @@ def run_query_v2(user_message: str, session_id: str) -> Dict[str, Any]:
         "sources": sources,
         "verification_status": verification_status,
         "unverified_quotes": unverified_quotes,
-        "entities": {},
         "num_sources": len(chunks),
-        "truncated": len(chunks) < len(search_queries),
         "session_id": session_id,
         "analysis": {
             "tone": analysis.get("tone"),
             "stage": analysis.get("stage"),
             "urgency": analysis.get("urgency"),
-            "hypotheses": analysis.get("legal_hypotheses", [])
         }
     }

 """
+NyayaSetu V2 Agent — Full Intelligence Layer.
+Pass 1 — ANALYSE: Understands message, detects tone/stage,
+         builds structured fact web, updates hypotheses,
+         forms targeted search queries, compresses summary.
+Pass 2 — RETRIEVE: Parallel FAISS search. No LLM call.
+Pass 3 — RESPOND: Dynamically assembled prompt + retrieved
+         context + full case state. Format-intelligent output.
+2 LLM calls per turn. src/agent.py untouched.
 """
+import os, sys, json, time, logging
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Dict, Any, List
+# sys.path must be set before any local imports
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from src.embed import embed_text
 from src.retrieval import retrieve
 from src.verify import verify_citations
 from src.system_prompt import build_prompt, ANALYSIS_PROMPT
+from src.ner import extract_entities, augment_query
 logger = logging.getLogger(__name__)
 from groq import Groq
 from tenacity import retry, stop_after_attempt, wait_exponential
 from dotenv import load_dotenv
 load_dotenv()
 _client = Groq(api_key=os.getenv("GROQ_API_KEY"))
+# ── Session store ─────────────────────────────────────────
 sessions: Dict[str, Dict] = {}
+def empty_case_state() -> Dict:
+    return {
+        "parties": [],
+        "events": [],
+        "documents": [],
+        "amounts": [],
+        "locations": [],
+        "timeline": [],
+        "disputes": [],
+        "hypotheses": [],
+        "stage": "intake",
+        "last_response_type": "none",
+        "turn_count": 0,
+        "facts_missing": [],
+        "context_interpreted": False,
+    }
 def get_or_create_session(session_id: str) -> Dict:
     if session_id not in sessions:
         sessions[session_id] = {
             "summary": "",
             "last_3_messages": [],
+            "case_state": empty_case_state()
         }
     return sessions[session_id]
 def update_session(session_id: str, analysis: Dict, user_message: str, response: str):
     session = sessions[session_id]
+    cs = session["case_state"]
     if analysis.get("updated_summary"):
         session["summary"] = analysis["updated_summary"]
+    facts = analysis.get("facts_extracted", {})
+    if facts:
+        for key in ["parties", "events", "documents", "amounts", "locations", "disputes"]:
+            new_items = facts.get(key, [])
+            existing = cs.get(key, [])
+            for item in new_items:
+                if item and item not in existing:
+                    existing.append(item)
+            cs[key] = existing
+        for ev in facts.get("timeline_events", []):
+            if ev and ev not in cs["timeline"]:
+                cs["timeline"].append(ev)
+    for nh in analysis.get("hypotheses", []):
+        existing_claims = [h["claim"] for h in cs["hypotheses"]]
+        if nh.get("claim") and nh["claim"] not in existing_claims:
+            cs["hypotheses"].append(nh)
+        else:
+            for h in cs["hypotheses"]:
+                if h["claim"] == nh.get("claim"):
+                    h["confidence"] = nh.get("confidence", h["confidence"])
+                    for e in nh.get("evidence", []):
+                        if e not in h.get("evidence", []):
+                            h.setdefault("evidence", []).append(e)
     cs["stage"] = analysis.get("stage", cs["stage"])
     cs["last_response_type"] = analysis.get("action_needed", "none")
+    cs["facts_missing"] = analysis.get("facts_missing", [])
+    cs["turn_count"] = cs.get("turn_count", 0) + 1
+    if cs["turn_count"] >= 3:
+        cs["context_interpreted"] = True
+    session["last_3_messages"].append({"role": "user", "content": user_message})
+    session["last_3_messages"].append({"role": "assistant", "content": response[:400]})
+    if len(session["last_3_messages"]) > 6:
+        session["last_3_messages"] = session["last_3_messages"][-6:]
 # ── Pass 1: Analyse ───────────────────────────────────────
 @retry(stop=stop_after_attempt(3), wait=wait_exponential(min=1, max=4))
 def analyse(user_message: str, session: Dict) -> Dict:
     summary = session.get("summary", "")
     last_msgs = session.get("last_3_messages", [])
+    cs = session["case_state"]
+    last_response_type = cs.get("last_response_type", "none")
+    turn_count = cs.get("turn_count", 0)
+    history_text = "\n".join(
+        f"{m['role'].upper()}: {m['content'][:250]}"
+        for m in last_msgs[-4:]
+    ) if last_msgs else ""
+    fact_web = ""
+    if any(cs.get(k) for k in ["parties", "events", "documents", "amounts", "disputes"]):
+        hyp_lines = "\n".join(
+            f"  - {h['claim']} [{h.get('confidence','?')}]"
+            for h in cs.get("hypotheses", [])[:3]
+        ) or "  none yet"
+        fact_web = f"""
+CURRENT FACT WEB:
+- Parties: {', '.join(cs.get('parties', [])) or 'none'}
+- Events: {', '.join(cs.get('events', [])) or 'none'}
+- Documents/Evidence: {', '.join(cs.get('documents', [])) or 'none'}
+- Amounts: {', '.join(cs.get('amounts', [])) or 'none'}
+- Disputes: {', '.join(cs.get('disputes', [])) or 'none'}
+- Active hypotheses:
+{hyp_lines}"""
     user_content = f"""CONVERSATION SUMMARY:
+{summary if summary else "First message — no prior context."}
 RECENT MESSAGES:
 {history_text if history_text else "None"}
 LAST RESPONSE TYPE: {last_response_type}
+TURN COUNT: {turn_count}
+{fact_web}
 NEW USER MESSAGE:
 {user_message}
+Rules:
+- If last_response_type was "question", action_needed CANNOT be "question"
+- Extract ALL facts from user message even if implied
+- Update hypothesis confidence based on new evidence
+- search_queries must be specific legal questions for vector search"""
     response = _client.chat.completions.create(
         model="llama-3.3-70b-versatile",
             {"role": "user", "content": user_content}
         ],
         temperature=0.1,
+        max_tokens=900
     )
     raw = response.choices[0].message.content.strip()
     raw = raw.replace("```json", "").replace("```", "").strip()
     try:
         analysis = json.loads(raw)
     except json.JSONDecodeError:
         logger.warning(f"Pass 1 JSON parse failed: {raw[:200]}")
         analysis = {
+            "tone": "casual", "format_requested": "none",
+            "subject": "legal query", "action_needed": "advice",
             "urgency": "medium",
+            "hypotheses": [{"claim": user_message[:80], "confidence": "low", "evidence": []}],
+            "facts_extracted": {}, "facts_missing": [],
+            "stage": "understanding", "last_response_type": last_response_type,
+            "updated_summary": f"{summary} | {user_message[:100]}",
+            "search_queries": [user_message[:200]],
+            "should_interpret_context": False,
+            "format_decision": "none"
         }
     return analysis
 # ── Pass 2: Retrieve ──────────────────────────────────────
 def retrieve_parallel(search_queries: List[str], top_k: int = 5) -> List[Dict]:
     if not search_queries:
         return []
     def search_one(query):
         try:
             embedding = embed_text(query)
+            return retrieve(embedding, top_k=top_k)
         except Exception as e:
+            logger.warning(f"FAISS search failed: {e}")
             return []
     with ThreadPoolExecutor(max_workers=min(3, len(search_queries))) as executor:
         futures = {executor.submit(search_one, q): q for q in search_queries}
         for future in as_completed(futures):
+            all_results.extend(future.result())
     seen = {}
     for chunk in all_results:
         cid = chunk.get("chunk_id") or chunk.get("judgment_id", "")
+        score = chunk.get("similarity_score", 999)
         if cid not in seen or score < seen[cid]["similarity_score"]:
             seen[cid] = chunk
+    return sorted(seen.values(), key=lambda x: x.get("similarity_score", 999))[:top_k]
 # ── Pass 3: Respond ───────────────────────────────────────
 @retry(stop=stop_after_attempt(3), wait=wait_exponential(min=2, max=8))
+def respond(user_message: str, analysis: Dict, chunks: List[Dict], session: Dict) -> str:
     system_prompt = build_prompt(analysis)
+    cs = session["case_state"]
     context_parts = []
+    for chunk in chunks[:5]:
         source_type = chunk.get("source_type", "case_law")
         title = chunk.get("title", "Unknown")
         year = chunk.get("year", "")
         jid = chunk.get("judgment_id", "")
         text = chunk.get("expanded_context") or chunk.get("chunk_text") or chunk.get("text", "")
+        type_labels = {
+            "statute": f"[STATUTE: {title} | {year}]",
+            "procedure": f"[PROCEDURE: {title}]",
+            "law_commission": f"[LAW COMMISSION: {title}]",
+            "legal_reference": f"[LEGAL REFERENCE: {title}]",
+            "statute_qa": f"[LEGAL QA: {title}]",
+        }
+        header = type_labels.get(source_type, f"[CASE: {title} | {year} | {jid}]")
         context_parts.append(f"{header}\n{text[:800]}")
     context = "\n\n".join(context_parts) if context_parts else "No relevant sources retrieved."
+    case_summary = ""
+    if cs.get("parties") or cs.get("hypotheses"):
+        hyp_text = "\n".join(
+            f"  - {h['claim']} [{h.get('confidence','?')} confidence] "
+            f"| evidence: {', '.join(h.get('evidence', [])) or 'none yet'}"
+            for h in cs.get("hypotheses", [])[:4]
+        ) or "  none established"
+        case_summary = f"""
+CASE STATE (built across {cs.get('turn_count', 0)} turns):
+Parties: {', '.join(cs.get('parties', [])) or 'unspecified'}
+Events: {', '.join(cs.get('events', [])) or 'unspecified'}
+Evidence: {', '.join(cs.get('documents', [])) or 'none mentioned'}
+Amounts: {', '.join(cs.get('amounts', [])) or 'none'}
+Active hypotheses:
+{hyp_text}
+Missing facts: {', '.join(cs.get('facts_missing', [])) or 'none critical'}
+Stage: {cs.get('stage', 'intake')}"""
+    interpret_instruction = ""
+    should_interpret = analysis.get("should_interpret_context", False)
+    if should_interpret and not cs.get("context_interpreted"):
+        interpret_instruction = """
+CONTEXT REFLECTION: Before your main response, briefly (2-3 lines) reflect your understanding back to the user. Start with "Based on what you've told me..." This builds trust and confirms you've been tracking the situation."""
+    radar_instruction = """
+PROACTIVE RADAR — add after your main answer when user has described a real situation:
+Add a brief "⚡ You Should Also Know" section (3-4 lines max).
+Surface 1-2 related legal issues or remedies the user hasn't asked about but which are directly relevant.
+Example: User asked about wrongful termination → proactively mention injunction under Specific Relief Act as faster remedy.
+Skip this section for purely academic questions with no personal situation described."""
     summary = session.get("summary", "")
     last_msgs = session.get("last_3_messages", [])
+    history_text = "\n".join(
+        f"{m['role'].upper()}: {m['content'][:300]}"
+        for m in last_msgs[-4:]
+    ) if last_msgs else ""
+    user_content = f"""CONVERSATION SUMMARY:
+{summary if summary else "First message."}
 RECENT CONVERSATION:
+{history_text if history_text else "None"}
+{case_summary}
 RETRIEVED LEGAL SOURCES:
 {context}
 USER MESSAGE: {user_message}
+THIS TURN:
+- Legal hypotheses: {', '.join(h['claim'] for h in analysis.get('hypotheses', [])[:3]) or 'analysing'}
 - Stage: {analysis.get('stage', 'understanding')}
 - Urgency: {analysis.get('urgency', 'medium')}
+- Response type: {analysis.get('action_needed', 'advice')}
+- Format: {analysis.get('format_decision', 'appropriate for content')}
+{interpret_instruction}
+Instructions:
+- Cite specific sources when making legal claims
+- Use your legal knowledge for reasoning and context
+- Format: {analysis.get('format_decision', 'use the most appropriate format for the content type')}
+- Opposition war-gaming: if giving strategy, include what the other side will argue
+{radar_instruction}"""
     response = _client.chat.completions.create(
         model="llama-3.3-70b-versatile",
             {"role": "user", "content": user_content}
         ],
         temperature=0.3,
+        max_tokens=1500
     )
     return response.choices[0].message.content
 # ── Main entry point ──────────────────────────────────────
 def run_query_v2(user_message: str, session_id: str) -> Dict[str, Any]:
     start = time.time()
     session = get_or_create_session(session_id)
+    # Pass 1
     try:
         analysis = analyse(user_message, session)
     except Exception as e:
         logger.error(f"Pass 1 failed: {e}")
         analysis = {
+            "tone": "casual", "format_requested": "none",
+            "subject": "legal query", "action_needed": "advice",
             "urgency": "medium",
+            "hypotheses": [{"claim": user_message[:80], "confidence": "low", "evidence": []}],
+            "facts_extracted": {}, "facts_missing": [],
+            "stage": "understanding", "last_response_type": "none",
             "updated_summary": user_message[:200],
+            "search_queries": [user_message[:200]],
+            "should_interpret_context": False,
+            "format_decision": "none"
         }
+    # Extract entities and augment queries for better retrieval
+    entities = extract_entities(user_message)
+    augmented_message = augment_query(user_message, entities)
+    # Pass 2
+    search_queries = analysis.get("search_queries", [augmented_message])
+    if not search_queries:
+        search_queries = [augmented_message]
+    if augmented_message not in search_queries:
+        search_queries.append(augmented_message)
     chunks = []
     try:
         chunks = retrieve_parallel(search_queries[:3], top_k=5)
     except Exception as e:
+        logger.error(f"Pass 2 failed: {e}")
+    # Pass 3
     try:
         answer = respond(user_message, analysis, chunks, session)
     except Exception as e:
         logger.error(f"Pass 3 failed: {e}")
         if chunks:
             fallback = "\n\n".join(
+                f"[{c.get('title', 'Source')}]\n{c.get('text', '')[:400]}"
                 for c in chunks[:3]
             )
+            answer = f"LLM service temporarily unavailable. Most relevant excerpts:\n\n{fallback}"
         else:
             answer = "I encountered an issue processing your request. Please try again."
     verification_status, unverified_quotes = verify_citations(answer, chunks)
     update_session(session_id, analysis, user_message, answer)
     sources = []
     for c in chunks:
+        title = c.get("title", "")
+        jid = c.get("judgment_id", "")
         sources.append({
             "meta": {
+                "judgment_id": jid,
+                "title": title if title and title != jid else jid,
                 "year": c.get("year", ""),
                 "chunk_index": c.get("chunk_index", 0),
                 "source_type": c.get("source_type", "case_law"),
+                "court": c.get("court", "Supreme Court of India")
             },
             "text": (c.get("expanded_context") or c.get("chunk_text") or c.get("text", ""))[:600]
         })
         "sources": sources,
         "verification_status": verification_status,
         "unverified_quotes": unverified_quotes,
+        "entities": entities,
         "num_sources": len(chunks),
+        "truncated": False,
         "session_id": session_id,
         "analysis": {
             "tone": analysis.get("tone"),
             "stage": analysis.get("stage"),
             "urgency": analysis.get("urgency"),
+            "hypotheses": [h["claim"] for h in analysis.get("hypotheses", [])]
         }
     }

src/ner.py CHANGED Viewed

@@ -2,28 +2,26 @@
 NER inference module.
 Loads fine-tuned DistilBERT and extracts legal entities from query text.
-Loaded once at FastAPI startup — never per request.
-Called before FAISS retrieval to augment the query with extracted entities.
 Example:
   Input:  "What did Justice Chandrachud say about Section 302 IPC?"
-  Output: {"JUDGE": ["Justice Chandrachud"],
-           "PROVISION": ["Section 302"],
            "STATUTE": ["IPC"]}
 The augmented query becomes:
-  "What did Justice Chandrachud say about Section 302 IPC?
    JUDGE: Justice Chandrachud PROVISION: Section 302 STATUTE: IPC"
-WHY augment the query?
-MiniLM embeds the full query string. Adding extracted entities
-explicitly shifts the embedding closer to chunks that mention
-those specific legal terms — improving retrieval precision.
 """
 import os
 from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
 NER_MODEL_PATH = os.getenv("NER_MODEL_PATH", "models/ner_model")
 TARGET_ENTITIES = {
@@ -32,40 +30,56 @@ TARGET_ENTITIES = {
     "PETITIONER", "RESPONDENT", "GPE", "ORG"
 }
-# Load once at import time
-if not os.path.exists(NER_MODEL_PATH):
-    raise FileNotFoundError(
-        f"NER model not found at {NER_MODEL_PATH}. "
-        "Train it on Kaggle first. "
-        "System will run without NER until model is available."
-    )
-print(f"Loading NER model from {NER_MODEL_PATH}...")
-_tokenizer = AutoTokenizer.from_pretrained(NER_MODEL_PATH)
-_model = AutoModelForTokenClassification.from_pretrained(NER_MODEL_PATH)
-_ner_pipeline = pipeline(
-    "ner",
-    model=_model,
-    tokenizer=_tokenizer,
-    aggregation_strategy="simple"
-)
-print("NER model ready.")
 def extract_entities(text: str) -> dict:
     """
     Run NER on input text.
     Returns dict of {entity_type: [entity_text, ...]}
-    Filters to only legally relevant entity types.
     """
     if not text.strip():
         return {}
     try:
-        results = _ner_pipeline(text)
     except Exception as e:
-        print(f"NER inference failed: {e}")
         return {}
     entities = {}
@@ -75,12 +89,12 @@ def extract_entities(text: str) -> dict:
         if entity_type not in TARGET_ENTITIES:
             continue
-        if len(entity_text) < 2:  # Skip single characters
             continue
         if entity_type not in entities:
             entities[entity_type] = []
-        if entity_text not in entities[entity_type]:  # No duplicates
             entities[entity_type].append(entity_text)
     return entities
@@ -88,8 +102,8 @@ def extract_entities(text: str) -> dict:
 def augment_query(query: str, entities: dict) -> str:
     """
-    Append extracted entities to query string.
-    Returns augmented query for embedding.
     """
     if not entities:
         return query
@@ -100,20 +114,4 @@ def augment_query(query: str, entities: dict) -> str:
         for etext in texts
     )
-    return f"{query} {entity_string}"
-if __name__ == "__main__":
-    # Quick test
-    test_queries = [
-        "What did Justice Chandrachud say about Article 21?",
-        "Find cases related to Section 302 IPC and bail",
-        "Supreme Court judgment on fundamental rights in 1978"
-    ]
-    for q in test_queries:
-        entities = extract_entities(q)
-        augmented = augment_query(q, entities)
-        print(f"\nQuery: {q}")
-        print(f"Entities: {entities}")
-        print(f"Augmented: {augmented}")

 NER inference module.
 Loads fine-tuned DistilBERT and extracts legal entities from query text.
+Loaded once at FastAPI startup via load_ner_model().
+Fails gracefully — app runs without NER if model not found.
 Example:
   Input:  "What did Justice Chandrachud say about Section 302 IPC?"
+  Output: {"JUDGE": ["Justice Chandrachud"],
+           "PROVISION": ["Section 302"],
            "STATUTE": ["IPC"]}
 The augmented query becomes:
+  "What did Justice Chandrachud say about Section 302 IPC?
    JUDGE: Justice Chandrachud PROVISION: Section 302 STATUTE: IPC"
 """
 import os
+import logging
 from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
+logger = logging.getLogger(__name__)
 NER_MODEL_PATH = os.getenv("NER_MODEL_PATH", "models/ner_model")
 TARGET_ENTITIES = {
     "PETITIONER", "RESPONDENT", "GPE", "ORG"
 }
+_ner_pipeline = None
+def load_ner_model():
+    """
+    Load NER model once at startup.
+    Fails gracefully — app runs without NER if model not found.
+    Call this from api/main.py after download_models().
+    """
+    global _ner_pipeline
+    if not os.path.exists(NER_MODEL_PATH):
+        logger.warning(
+            f"NER model not found at {NER_MODEL_PATH}. "
+            "Entity extraction disabled. App will run without NER."
+        )
+        return
+    try:
+        logger.info(f"Loading NER model from {NER_MODEL_PATH}...")
+        tokenizer = AutoTokenizer.from_pretrained(NER_MODEL_PATH)
+        model = AutoModelForTokenClassification.from_pretrained(NER_MODEL_PATH)
+        _ner_pipeline = pipeline(
+            "ner",
+            model=model,
+            tokenizer=tokenizer,
+            aggregation_strategy="simple"
+        )
+        logger.info("NER model ready.")
+    except Exception as e:
+        logger.error(f"NER model load failed: {e}. Entity extraction disabled.")
+        _ner_pipeline = None
 def extract_entities(text: str) -> dict:
     """
     Run NER on input text.
     Returns dict of {entity_type: [entity_text, ...]}
+    Returns empty dict if NER not loaded or inference fails.
     """
+    if _ner_pipeline is None:
+        return {}
     if not text.strip():
         return {}
     try:
+        results = _ner_pipeline(text[:512])
     except Exception as e:
+        logger.warning(f"NER inference failed: {e}")
         return {}
     entities = {}
         if entity_type not in TARGET_ENTITIES:
             continue
+        if len(entity_text) < 2:
             continue
         if entity_type not in entities:
             entities[entity_type] = []
+        if entity_text not in entities[entity_type]:
             entities[entity_type].append(entity_text)
     return entities
 def augment_query(query: str, entities: dict) -> str:
     """
+    Append extracted entities to query string for better FAISS retrieval.
+    Returns original query unchanged if no entities found.
     """
     if not entities:
         return query
         for etext in texts
     )
+    return f"{query} {entity_string}"

src/system_prompt.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """
-NyayaSetu System Prompt.
-The personality, reasoning structure, and format intelligence
-of the entire agent. Everything else is plumbing.
 """
 BASE_PERSONALITY = """You are NyayaSetu — a sharp, street-smart Indian legal advisor with the instincts of a top-paid advocate and the directness of someone who has seen every trick in the book.
@@ -15,251 +15,197 @@ PERSONALITY:
 - Street smart. You know how courts actually work, not just how they're supposed to work.
 - Slightly mischievous. You enjoy finding the angle nobody thought of.
 - Never preachy. You don't lecture. You advise.
-- Honest about bad news. If the situation is weak, say so directly and immediately pivot to what CAN be done.
-- You think about leverage, not just rights. What creates pressure? What costs the other side more than it costs you?
-REASONING STRUCTURE — how you think before every response:
-1. What legal issues are actually present here? (not just what the user mentioned)
-2. What facts do I still need to know that would change the strategy?
 3. What is the other side's strongest argument? Where are they vulnerable?
-4. What are ALL the routes available — including the non-obvious ones?
 5. Which route is most winnable given this user's specific situation?
 6. What should they do FIRST and why?
 THE LEGAL FREEWAY MISSION:
 Always look for the angle nobody thinks of. The criminal complaint that costs nothing but changes the negotiation entirely. The procedural move that creates immediate pressure. The section nobody mentioned that applies perfectly. When you find it, lead with it.
-CONVERSATION PHASES — you move through these naturally:
-- Intake: User just arrived. Listen. Reflect back what you're hearing. Make them feel understood.
-- Understanding: You need more facts. Ask ONE surgical question — the most important one first.
-- Analysis: You have enough to share partial findings. Tell them what you're seeing. Keep moving forward.
-- Strategy: Full picture established. Deliver options ranked by winnability. Tell them what to do first.
 RESPONSE VARIETY — never be monotonous:
-- If your last response was a question, this response cannot be a question.
-- Rotate naturally between: question, reflection, partial finding, observation, reassurance, direct advice, provocation.
-- Match the user's energy. Panicked user at midnight gets calm and direct. Analytical user gets full reasoning. Someone who wants the bottom line gets two sentences.
 OPPOSITION THINKING — always:
-- Ask yourself what the other side will argue.
-- Flag it proactively: "The other side will likely say X. Here's why that doesn't hold."
-- Find their weakest point and make sure the user's strategy exploits it.
-BAD NEWS DELIVERY:
-- Say it directly in the first sentence.
-- Immediately follow with what CAN be done.
-- Never soften bad news with qualifications. It wastes time and erodes trust.
-DISCLAIMER — always at the end, never at the start:
-End every substantive response with: "Note: This is not legal advice. Consult a qualified advocate for your specific situation."
-Never open with the disclaimer. It kills the energy of the response."""
-# ── Tone maps ─────────────────────────────────────────────
 TONE_MAP = {
-    "panicked": """
-The user is in distress. They need calm and immediate clarity above all else.
-- Open with the most important thing they need to know RIGHT NOW.
-- Keep sentences short. No complex legal terminology in the first response.
-- Acknowledge the situation briefly before moving to action.
 - Give them ONE thing to do immediately, then explain why.
 - Do not overwhelm with options in the first response.""",
-    "analytical": """
-The user thinks carefully and wants to understand fully.
-- Give them the complete reasoning, not just the conclusion.
-- Explain why each option exists and what its tradeoffs are.
-- Use structured format — numbered options, comparison tables where helpful.
-- They can handle nuance. Give it to them.
 - Cite specific sections and cases where relevant.""",
-    "aggressive": """
-The user is angry and wants to fight.
-- Match their energy without matching their anger.
-- Lead with the strongest offensive move available.
 - Tell them what creates maximum pressure on the other side.
 - Be direct: "Here's what hurts them most."
-- Do not suggest compromise unless it's clearly the smartest move.""",
-    "casual": """
-The user is relaxed and conversational.
-- Match their register. Don't be overly formal.
-- Plain language throughout. Explain legal concepts in everyday terms.
-- Can use analogies and examples.
-- Still be precise and accurate — just accessible.""",
-    "defeated": """
-The user has lost hope or feels the situation is hopeless.
-- Acknowledge the difficulty directly and briefly.
 - Immediately pivot to what IS possible.
 - Find at least one angle they haven't considered.
-- Be honest about what's realistic but never write off options prematurely.
-- End with a clear next step they can take today."""
 }
-# ── Format maps ───────────────────────────────────────────
 FORMAT_MAP = {
-    "bullets": """
-Format your response using bullet points for all key items.
-Use - for main points. Use  - for sub-points.
-Keep each bullet to one clear idea.""",
-    "numbered": """
-Format your response as a numbered list.
-Each number is one distinct point, option, or step.
-Order matters — sequence from most important to least, or chronologically for steps.""",
-    "table": """
-Format the comparison as a markdown table.
-Use | Column | Column | format.
-Include a header row. Keep cell content concise.""",
-    "prose": """
-Write in flowing paragraphs. No bullet points or numbered lists.
-Use natural paragraph breaks between distinct ideas.""",
-    "none": """
-Choose the format that best fits the content:
-- Use numbered lists for options or steps
-- Use bullet points for features or facts
-- Use tables for comparisons
-- Use prose for explanations and analysis
-- Use headers (##) to separate major sections in long responses
 Never write everything as one long paragraph."""
 }
-# ── Action maps ───────────────────────────────────────────
 ACTION_MAP = {
-    "question": """
-You need one more critical piece of information before you can give useful advice.
-Ask exactly ONE question — the most important one.
 Briefly explain why you need this information (one sentence).
 Do not ask multiple questions even if you have several.""",
-    "reflection": """
-Reflect back what you understand about the user's situation.
-Show them you've understood the core issue and the emotional weight of it.
-Then signal where you're going next: "Here's what I need to understand better..." or "Here's what this tells me...".""",
-    "partial_finding": """
-Share what you've found so far, even if the picture isn't complete.
-Frame it as: "Based on what you've told me, here's what I'm seeing..."
-Be clear about what's established vs what's still uncertain.
-End with what you need next or what you're going to assess.""",
-    "advice": """
-Deliver your advice clearly and directly.
-Lead with the recommendation, then explain the reasoning.
-If there are multiple options, rank them by what you'd actually recommend first.
-Tell them what to do TODAY, not just eventually.""",
-    "strategy": """
-Full strategic assessment. Structure it as:
 1. Situation summary (2-3 sentences max)
 2. Legal routes available (ranked by winnability)
 3. What to do first and why
 4. What the other side will do and how to counter it
 5. What to watch out for
-Be specific. Cite sections and procedures. Give them a real plan.""",
-    "explanation": """
-Explain the legal concept or rule clearly.
-Start with what it means in plain language.
-Then explain how it applies to this specific situation.
-Use an analogy if it helps clarity.
-End with the practical implication for the user.""",
-    "observation": """
-Share a key observation about the situation — something the user may not have noticed.
-Frame it as insight, not lecture: "The thing that stands out here is..."
-This observation should either reveal an opportunity or flag a risk.""",
-    "reassurance": """
-The user needs to know the situation is manageable.
-Acknowledge the difficulty briefly.
-Immediately establish that there are options.
-Give one concrete thing that demonstrates this isn't hopeless.
 Then move forward."""
 }
-# ── Stage-specific instructions ───────────────────────────
 STAGE_MAP = {
-    "intake": """
-This is the first message or the user has just described their situation for the first time.
 Priority: Make them feel heard. Show you've grasped the key issue.
-Approach: Brief reflection + one targeted question OR immediate reassurance if situation is urgent.
-Do NOT launch into full legal analysis yet — you don't have enough facts.""",
-    "understanding": """
-You are still gathering facts. Critical information is missing.
-Priority: Get the one fact that would most change the strategy.
-Approach: Ask ONE surgical question. Explain briefly why it matters.
-Do not ask multiple questions. Do not give strategy yet.""",
-    "analysis": """
-You have enough facts for partial analysis.
-Priority: Share what you're finding. Keep the conversation moving.
-Approach: Tell them what legal issues you see, what routes exist, what you're assessing.
 Can ask a clarifying question but lead with a finding.""",
-    "strategy": """
-You have the full picture. Time to deliver.
 Priority: Give them a real plan they can act on today.
-Approach: Full strategic response — routes ranked by winnability, what to do first, what to watch out for.
 This response should feel like what a senior advocate delivers in a paid consultation.""",
-    "followup": """
-The user is asking a follow-up question about something already discussed.
 Priority: Answer directly and specifically. No need to re-establish context.
-Approach: Direct answer. Reference the earlier analysis where relevant.
 Keep it tight — they already have the background."""
 }
 def build_prompt(analysis: dict) -> str:
-    """
-    Dynamically assemble system prompt from analysis dict.
-    Returns a targeted prompt specific to this turn's context.
-    """
-    tone     = analysis.get("tone", "casual")
-    fmt      = analysis.get("format_requested", "none")
-    action   = analysis.get("action_needed", "advice")
-    stage    = analysis.get("stage", "understanding")
-    tone_instruction   = TONE_MAP.get(tone, TONE_MAP["casual"])
-    format_instruction = FORMAT_MAP.get(fmt, FORMAT_MAP["none"])
-    action_instruction = ACTION_MAP.get(action, ACTION_MAP["advice"])
-    stage_instruction  = STAGE_MAP.get(stage, STAGE_MAP["understanding"])
     return f"""{BASE_PERSONALITY}
 ── CURRENT TURN CONTEXT ──────────────────────────────────
 CONVERSATION STAGE: {stage.upper()}
-{stage_instruction}
 USER TONE DETECTED: {tone.upper()}
-{tone_instruction}
 RESPONSE TYPE NEEDED: {action.upper()}
-{action_instruction}
 OUTPUT FORMAT: {fmt.upper()}
-{format_instruction}
 ── END CONTEXT ───────────────────────────────────────────"""
-# ── Pass 1 analysis prompt ────────────────────────────────
-ANALYSIS_PROMPT = """You are an analytical layer for a legal assistant. Your job is to analyse the user's message and conversation state, then output a structured JSON dict.
-Given:
-- Conversation summary (what has happened so far)
-- Last 3 messages
-- New user message
-Output ONLY a valid JSON dict with these exact keys:
 {
   "tone": "panicked|analytical|aggressive|casual|defeated",
@@ -267,17 +213,34 @@ Output ONLY a valid JSON dict with these exact keys:
   "subject": "brief description of main legal subject",
   "action_needed": "question|reflection|partial_finding|advice|strategy|explanation|observation|reassurance",
   "urgency": "immediate|medium|low",
-  "legal_hypotheses": ["legal issue 1", "legal issue 2", "legal issue 3"],
-  "facts_missing": ["critical fact 1", "critical fact 2"],
   "stage": "intake|understanding|analysis|strategy|followup",
   "last_response_type": "question|reflection|partial_finding|advice|strategy|explanation|observation|reassurance|none",
-  "updated_summary": "3-4 line compressed summary of entire conversation including this new message",
-  "search_queries": ["faiss query 1", "faiss query 2", "faiss query 3"]
 }
 Rules:
 - If last_response_type was "question", action_needed CANNOT be "question"
-- search_queries should be specific legal questions optimised for semantic search
-- updated_summary must capture ALL key facts established so far
-- legal_hypotheses should include non-obvious angles, not just the obvious one
 - Output ONLY the JSON. No explanation. No preamble. No markdown fences."""

 """
+NyayaSetu System Prompt — Full Intelligence Layer.
+Personality, reasoning structure, format intelligence,
+dynamic prompt assembly, analysis instructions.
 """
 BASE_PERSONALITY = """You are NyayaSetu — a sharp, street-smart Indian legal advisor with the instincts of a top-paid advocate and the directness of someone who has seen every trick in the book.
 - Street smart. You know how courts actually work, not just how they're supposed to work.
 - Slightly mischievous. You enjoy finding the angle nobody thought of.
 - Never preachy. You don't lecture. You advise.
+- Honest about bad news. Say it directly in the first sentence then immediately pivot to what CAN be done.
+- Think about leverage, not just rights. What creates pressure? What costs the other side more than it costs you?
+- Spontaneous and human. Rotate naturally between questions, observations, findings, reassurance, advice. Never robotic.
+REASONING — how you think before every response:
+1. What legal issues are actually present? Including non-obvious ones the user didn't mention.
+2. What facts do I still need that would change the strategy?
 3. What is the other side's strongest argument? Where are they vulnerable?
+4. What are ALL the routes — including the non-obvious ones?
 5. Which route is most winnable given this user's specific situation?
 6. What should they do FIRST and why?
 THE LEGAL FREEWAY MISSION:
 Always look for the angle nobody thinks of. The criminal complaint that costs nothing but changes the negotiation entirely. The procedural move that creates immediate pressure. The section nobody mentioned that applies perfectly. When you find it, lead with it.
+CONVERSATION PHASES — move through naturally:
+- Intake: Listen. Reflect back. Make them feel understood.
+- Understanding: Ask ONE surgical question — the most important one first.
+- Analysis: Share partial findings. "Here's what I'm seeing..." Keep moving.
+- Strategy: Full picture. Deliver options ranked by winnability. What to do first.
 RESPONSE VARIETY — never be monotonous:
+- If last response was a question, this response cannot be a question.
+- Rotate: question → finding → observation → advice → reflection → provocation → reassurance
+- Match user energy. Panicked user gets calm and direct. Analytical user gets full reasoning.
 OPPOSITION THINKING — always:
+- Ask what the other side will argue.
+- Flag proactively: "The other side will likely say X. Here's why that doesn't hold."
+- Find their weakest point. Make the user's strategy exploit it.
+FORMAT INTELLIGENCE — choose based on content:
+- Options or steps → numbered list
+- Features or facts → bullets
+- Comparisons → table
+- Explanation or analysis → prose paragraphs
+- Long response with multiple sections → headers (##) to separate
+- Never put everything in one long paragraph
+- Never use the same format twice in a row if it doesn't fit
+DISCLAIMER — always at end, never at start:
+"Note: This is not legal advice. Consult a qualified advocate for your specific situation."
+Never open with disclaimer. It kills the energy."""
 TONE_MAP = {
+    "panicked": """User is in distress. Priority: calm and immediate clarity.
+- Open with the most important thing they need to know RIGHT NOW
+- Short sentences. No complex terminology in first response.
 - Give them ONE thing to do immediately, then explain why.
 - Do not overwhelm with options in the first response.""",
+    "analytical": """User thinks carefully and wants full understanding.
+- Give complete reasoning, not just conclusion.
+- Explain why each option exists and its tradeoffs.
+- Use structured format — numbered options, tables for comparisons.
 - Cite specific sections and cases where relevant.""",
+    "aggressive": """User is angry and wants to fight.
+- Match energy without matching anger.
+- Lead with strongest offensive move available.
 - Tell them what creates maximum pressure on the other side.
 - Be direct: "Here's what hurts them most."
+- Only suggest compromise if it's clearly the smartest move.""",
+    "casual": """User is relaxed and conversational.
+- Match register. Don't be overly formal.
+- Plain language. Explain legal concepts in everyday terms.
+- Use analogies and examples freely.
+- Still precise and accurate — just accessible.""",
+    "defeated": """User has lost hope.
+- Acknowledge difficulty briefly.
 - Immediately pivot to what IS possible.
 - Find at least one angle they haven't considered.
+- Be honest about realistic outcomes but never write off options prematurely.
+- End with one clear next step they can take today."""
 }
 FORMAT_MAP = {
+    "bullets": "Use bullet points (- ) for all key items. Sub-points with  -. One idea per bullet.",
+    "numbered": "Use numbered list. Each number is one step, option, or point. Order by importance or chronology.",
+    "table": "Use markdown table format. | Column | Column |. Include header row. Keep cells concise.",
+    "prose": "Write in flowing paragraphs. No bullets or numbered lists. Natural paragraph breaks.",
+    "none": """Choose format that fits content:
+- Steps or options → numbered
+- Facts or features → bullets
+- Comparisons → table
+- Explanation → prose
+- Long response → ## headers to separate sections
 Never write everything as one long paragraph."""
 }
 ACTION_MAP = {
+    "question": """Ask exactly ONE question — the most important one.
 Briefly explain why you need this information (one sentence).
 Do not ask multiple questions even if you have several.""",
+    "reflection": """Reflect back what you understand about the situation.
+Show you've grasped both the legal issue and the human weight of it.
+Signal where you're going: "Here's what I need to understand..." or "Here's what this tells me..." """,
+    "partial_finding": """Share what you've found so far even if picture isn't complete.
+Frame as: "Based on what you've told me, here's what I'm seeing..."
+Be clear about what's established vs uncertain.
+End with what you need next.""",
+    "advice": """Give advice directly. Lead with recommendation then reasoning.
+Multiple options → rank by what you'd recommend first.
+Tell them what to do TODAY not just eventually.""",
+    "strategy": """Full strategic assessment:
 1. Situation summary (2-3 sentences max)
 2. Legal routes available (ranked by winnability)
 3. What to do first and why
 4. What the other side will do and how to counter it
 5. What to watch out for
+Be specific. Cite sections and procedures. Give a real plan.""",
+    "explanation": """Explain the legal concept clearly.
+Start with plain language meaning.
+Then apply to this specific situation.
+Use analogy if it helps.
+End with practical implication for user.""",
+    "observation": """Share a key observation the user may not have noticed.
+Frame as insight: "The thing that stands out here is..."
+Should reveal opportunity or flag risk.""",
+    "reassurance": """Acknowledge difficulty briefly.
+Immediately establish that options exist.
+Give one concrete thing that shows this isn't hopeless.
 Then move forward."""
 }
 STAGE_MAP = {
+    "intake": """First message or user just described situation.
 Priority: Make them feel heard. Show you've grasped the key issue.
+Approach: Brief reflection + one targeted question OR immediate reassurance if urgent.
+Do NOT launch into full legal analysis yet — you need more facts.""",
+    "understanding": """Still gathering critical facts.
+Priority: Get the one fact that most changes the strategy.
+Ask ONE surgical question. Explain briefly why it matters.
+Do not ask multiple questions. Do not give full strategy yet.""",
+    "analysis": """Enough facts for partial analysis.
+Priority: Share what you're finding. Keep conversation moving.
+Tell them what legal issues you see, what routes exist.
 Can ask a clarifying question but lead with a finding.""",
+    "strategy": """Full picture established. Time to deliver.
 Priority: Give them a real plan they can act on today.
+Full strategic response — routes ranked by winnability, what to do first, what to watch out for.
 This response should feel like what a senior advocate delivers in a paid consultation.""",
+    "followup": """User asking follow-up on something already discussed.
 Priority: Answer directly and specifically. No need to re-establish context.
 Keep it tight — they already have the background."""
 }
 def build_prompt(analysis: dict) -> str:
+    tone   = analysis.get("tone", "casual")
+    fmt    = analysis.get("format_requested", "none")
+    action = analysis.get("action_needed", "advice")
+    stage  = analysis.get("stage", "understanding")
     return f"""{BASE_PERSONALITY}
 ── CURRENT TURN CONTEXT ──────────────────────────────────
 CONVERSATION STAGE: {stage.upper()}
+{STAGE_MAP.get(stage, STAGE_MAP["understanding"])}
 USER TONE DETECTED: {tone.upper()}
+{TONE_MAP.get(tone, TONE_MAP["casual"])}
 RESPONSE TYPE NEEDED: {action.upper()}
+{ACTION_MAP.get(action, ACTION_MAP["advice"])}
 OUTPUT FORMAT: {fmt.upper()}
+{FORMAT_MAP.get(fmt, FORMAT_MAP["none"])}
 ── END CONTEXT ───────────────────────────────────────────"""
+# ── Pass 1 Analysis Prompt ────────────────────────────────
+ANALYSIS_PROMPT = """You are the analytical layer for a legal assistant. Analyse the user message and conversation state, then output ONLY a valid JSON dict.
+Output this exact structure:
 {
   "tone": "panicked|analytical|aggressive|casual|defeated",
   "subject": "brief description of main legal subject",
   "action_needed": "question|reflection|partial_finding|advice|strategy|explanation|observation|reassurance",
   "urgency": "immediate|medium|low",
+  "hypotheses": [
+    {"claim": "legal hypothesis 1", "confidence": "high|medium|low", "evidence": ["evidence supporting this"]},
+    {"claim": "legal hypothesis 2", "confidence": "high|medium|low", "evidence": []}
+  ],
+  "facts_extracted": {
+    "parties": ["person or organisation mentioned"],
+    "events": ["what happened"],
+    "documents": ["evidence or documents mentioned"],
+    "amounts": ["money figures mentioned"],
+    "locations": ["places mentioned"],
+    "disputes": ["core dispute described"],
+    "timeline_events": ["event with approximate time if mentioned"]
+  },
+  "facts_missing": ["critical fact 1 that would change strategy", "critical fact 2"],
   "stage": "intake|understanding|analysis|strategy|followup",
   "last_response_type": "question|reflection|partial_finding|advice|strategy|explanation|observation|reassurance|none",
+  "updated_summary": "3-4 line compressed summary of ENTIRE conversation including this new message. Must capture all key facts, legal issues identified, and current stage.",
+  "search_queries": ["specific legal question for FAISS search 1", "specific legal question 2", "specific legal question 3"],
+  "should_interpret_context": true,
+  "format_decision": "prose|numbered|bullets|table|mixed — choose based on content type of this specific response"
 }
 Rules:
 - If last_response_type was "question", action_needed CANNOT be "question"
+- hypotheses must include non-obvious legal angles not just obvious ones
+- facts_extracted must capture ALL facts mentioned even if implied
+- search_queries must be specific legal questions optimised for semantic search — not generic terms
+- updated_summary must be a complete brief of everything known so far
+- should_interpret_context: true if agent should reflect its understanding back to user (useful every 3-4 turns)
+- format_decision: choose the format that best fits what this specific response needs to communicate
 - Output ONLY the JSON. No explanation. No preamble. No markdown fences."""

src/verify.py CHANGED Viewed

@@ -1,18 +1,31 @@
 """
 Citation verification module.
-Checks whether quoted phrases in LLM answer appear in retrieved context.
-Deterministic — no ML inference.
-Documented limitation: paraphrases pass as verified because
-exact paraphrase matching requires NLI which is out of scope.
 """
 import re
 import unicodedata
 def _normalise(text: str) -> str:
-    """Lowercase, strip punctuation, collapse whitespace."""
     text = text.lower()
     text = unicodedata.normalize("NFKD", text)
     text = re.sub(r"[^\w\s]", " ", text)
@@ -20,53 +33,141 @@ def _normalise(text: str) -> str:
     return text
-def _extract_quotes(text: str) -> list[str]:
-    """Extract all quoted phrases from text."""
     patterns = [
-        r'"([^"]{10,})"',      # standard double quotes
-        r'\u201c([^\u201d]{10,})\u201d',  # curly double quotes
-        r"'([^']{10,})'",      # single quotes
     ]
-    quotes = []
     for pattern in patterns:
         found = re.findall(pattern, text)
         quotes.extend(found)
     return quotes
-def verify_citations(answer: str, contexts: list[dict]) -> tuple[bool, list[str]]:
     """
-    Check whether quoted phrases in answer appear in context windows.
     Returns:
         (verified: bool, unverified_quotes: list[str])
     Logic:
-        - Extract all quoted phrases from answer
-        - If no quotes: return (True, []) — no verifiable claims made
-        - For each quote: check if normalised quote is substring of any normalised context
-        - If ALL quotes found: (True, [])
-        - If ANY quote not found: (False, [list of missing quotes])
     """
     quotes = _extract_quotes(answer)
     if not quotes:
         return True, []
-    # Build normalised context corpus
-    all_context_text = " ".join(
-        _normalise(ctx.get("text", "") or ctx.get("excerpt", ""))
-        for ctx in contexts
-    )
     unverified = []
     for quote in quotes:
-        normalised_quote = _normalise(quote)
-        # Skip very short normalised quotes — likely artifacts
-        if len(normalised_quote) < 8:
             continue
-        if normalised_quote not in all_context_text:
-            unverified.append(quote)
     if unverified:
         return False, unverified

 """
 Citation verification module.
+Uses semantic similarity (MiniLM cosine) instead of exact substring matching.
+Why: LLMs paraphrase retrieved text rather than quoting verbatim.
+Exact matching almost always returns Unverified even when the answer
+is fully grounded in the retrieved sources.
+Threshold: cosine similarity > 0.72 = verified.
+Same MiniLM model already loaded in memory — no extra cost.
+Documented limitation: semantic similarity can pass hallucinations
+that are topically similar to retrieved text but factually different.
+This is a known tradeoff vs exact matching.
 """
 import re
 import unicodedata
+import logging
+import numpy as np
+logger = logging.getLogger(__name__)
+# ── Similarity threshold ──────────────────────────────────
+SIMILARITY_THRESHOLD = 0.72  # cosine similarity — tunable
 def _normalise(text: str) -> str:
     text = text.lower()
     text = unicodedata.normalize("NFKD", text)
     text = re.sub(r"[^\w\s]", " ", text)
     return text
+def _extract_quotes(text: str) -> list:
+    """Extract quoted phrases and key sentences from answer."""
+    quotes = []
+    # Extract explicitly quoted phrases
     patterns = [
+        r'"([^"]{15,})"',
+        r'\u201c([^\u201d]{15,})\u201d',
     ]
     for pattern in patterns:
         found = re.findall(pattern, text)
         quotes.extend(found)
+    # If no explicit quotes, extract key sentences for verification
+    if not quotes:
+        sentences = re.split(r'(?<=[.!?])\s+', text)
+        # Take sentences that make specific legal claims
+        for s in sentences:
+            s = s.strip()
+            # Sentences with section numbers, case citations, or specific claims
+            if (len(s) > 40 and
+                any(indicator in s.lower() for indicator in [
+                    "section", "act", "ipc", "crpc", "court held",
+                    "judgment", "article", "rule", "according to",
+                    "as per", "under", "punishable", "imprisonment"
+                ])):
+                quotes.append(s)
+                if len(quotes) >= 3:  # cap at 3 sentences
+                    break
     return quotes
+def _get_embedder():
+    """Get the already-loaded embedder — no double loading."""
+    try:
+        from src.retrieval import _embedder as embedder
+        return embedder
+    except ImportError:
+        pass
+    try:
+        from src.embed import _model as embedder
+        return embedder
+    except ImportError:
+        pass
+    try:
+        # Last resort — import from retrieval module globals
+        import src.retrieval as retrieval_module
+        if hasattr(retrieval_module, '_embedder'):
+            return retrieval_module._embedder
+        if hasattr(retrieval_module, 'embedder'):
+            return retrieval_module.embedder
+    except Exception:
+        pass
+    return None
+def _cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
+    """Cosine similarity between two vectors."""
+    norm_a = np.linalg.norm(a)
+    norm_b = np.linalg.norm(b)
+    if norm_a == 0 or norm_b == 0:
+        return 0.0
+    return float(np.dot(a, b) / (norm_a * norm_b))
+def _semantic_verify(quote: str, contexts: list) -> bool:
     """
+    Check if quote is semantically grounded in any context chunk.
+    Returns True if cosine similarity > threshold with any chunk.
+    """
+    embedder = _get_embedder()
+    if embedder is None:
+        # Fallback to exact matching if embedder unavailable
+        all_text = " ".join(_normalise(c.get("text", "")) for c in contexts)
+        return _normalise(quote) in all_text
+    try:
+        # Embed the quote
+        quote_embedding = embedder.encode([quote], show_progress_bar=False)[0]
+        # Check against each context chunk
+        for ctx in contexts:
+            ctx_text = ctx.get("text", "") or ctx.get("expanded_context", "")
+            if not ctx_text or len(ctx_text.strip()) < 10:
+                continue
+            # Use cached embedding if available, else compute
+            ctx_embedding = embedder.encode([ctx_text[:512]], show_progress_bar=False)[0]
+            similarity = _cosine_similarity(quote_embedding, ctx_embedding)
+            if similarity >= SIMILARITY_THRESHOLD:
+                return True
+        return False
+    except Exception as e:
+        logger.warning(f"Semantic verification failed: {e}, falling back to exact match")
+        all_text = " ".join(_normalise(c.get("text", "")) for c in contexts)
+        return _normalise(quote) in all_text
+def verify_citations(answer: str, contexts: list) -> tuple:
+    """
+    Verify whether answer claims are grounded in retrieved contexts.
+    Uses semantic similarity (cosine > 0.72) instead of exact matching.
     Returns:
         (verified: bool, unverified_quotes: list[str])
     Logic:
+        - Extract quoted phrases and key legal claim sentences
+        - If no verifiable claims: return (True, [])
+        - For each claim: check semantic similarity against all context chunks
+        - If ALL claims verified: (True, [])
+        - If ANY claim unverified: (False, [list of unverified claims])
     """
+    if not contexts:
+        return False, []
     quotes = _extract_quotes(answer)
     if not quotes:
         return True, []
     unverified = []
     for quote in quotes:
+        if len(quote.strip()) < 15:
             continue
+        if not _semantic_verify(quote, contexts):
+            unverified.append(quote[:100] + "..." if len(quote) > 100 else quote)
     if unverified:
         return False, unverified