Spaces:

CaffeinatedCoding
/

nyayasetu

Running

App Files Files Community

CaffeinatedCoding commited on 15 days ago

Commit

5d60eec

verified ·

1 Parent(s): e860d63

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

requirements.txt +13 -11
src/agent_v2.py +22 -9
src/llm.py +34 -110

requirements.txt CHANGED Viewed

@@ -1,15 +1,17 @@
 fastapi
 uvicorn
-pydantic
-huggingface_hub
-sentence-transformers
-numpy
-groq
-google-generativeai
-tenacity
 python-dotenv
-transformers
-faiss-cpu
-torch
-kagglehub
 pytest

+torch
+transformers
+sentence-transformers
+faiss-cpu
 fastapi
 uvicorn
 python-dotenv
+groq
+dvc
+mlflow
+optuna
 pytest
+kagglehub
+pymupdf
+tenacity
+seqeval
+httpx

src/agent_v2.py CHANGED Viewed

@@ -25,14 +25,15 @@ from src.retrieval import retrieve
 from src.verify import verify_citations
 from src.system_prompt import build_prompt, ANALYSIS_PROMPT
 from src.ner import extract_entities, augment_query
-from src.llm import call_llm_raw
 logger = logging.getLogger(__name__)
 from tenacity import retry, stop_after_attempt, wait_exponential
 from dotenv import load_dotenv
 load_dotenv()
 # ── Session store ─────────────────────────────────────────
 sessions: Dict[str, Dict] = {}
@@ -164,10 +165,16 @@ Rules:
 - Update hypothesis confidence based on new evidence
 - search_queries must be specific legal questions for vector search"""
-    raw = call_llm_raw([
-        {"role": "system", "content": ANALYSIS_PROMPT},
-        {"role": "user", "content": user_content}
-    ]).strip()
     raw = raw.replace("```json", "").replace("```", "").strip()
     try:
@@ -318,10 +325,16 @@ Instructions:
 - Opposition war-gaming: if giving strategy, include what the other side will argue
 {radar_instruction}"""
-    return call_llm_raw([
-        {"role": "system", "content": system_prompt},
-        {"role": "user", "content": user_content}
-    ])
 # ── Main entry point ──────────────────────────────────────

 from src.verify import verify_citations
 from src.system_prompt import build_prompt, ANALYSIS_PROMPT
 from src.ner import extract_entities, augment_query
 logger = logging.getLogger(__name__)
+from groq import Groq
 from tenacity import retry, stop_after_attempt, wait_exponential
 from dotenv import load_dotenv
 load_dotenv()
+_client = Groq(api_key=os.getenv("GROQ_API_KEY"))
 # ── Session store ─────────────────────────────────────────
 sessions: Dict[str, Dict] = {}
 - Update hypothesis confidence based on new evidence
 - search_queries must be specific legal questions for vector search"""
+    response = _client.chat.completions.create(
+        model="llama-3.3-70b-versatile",
+        messages=[
+            {"role": "system", "content": ANALYSIS_PROMPT},
+            {"role": "user", "content": user_content}
+        ],
+        temperature=0.1,
+        max_tokens=900
+    )
+    raw = response.choices[0].message.content.strip()
     raw = raw.replace("```json", "").replace("```", "").strip()
     try:
 - Opposition war-gaming: if giving strategy, include what the other side will argue
 {radar_instruction}"""
+    response = _client.chat.completions.create(
+        model="llama-3.3-70b-versatile",
+        messages=[
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_content}
+        ],
+        temperature=0.3,
+        max_tokens=1500
+    )
+    return response.choices[0].message.content
 # ── Main entry point ──────────────────────────────────────

src/llm.py CHANGED Viewed

@@ -1,92 +1,27 @@
 """
-LLM module. Gemini Flash as primary, Groq as fallback.
-Gemini works reliably from HF Spaces. Groq is backup.
 """
 import os
-import logging
 from tenacity import retry, stop_after_attempt, wait_exponential
 from dotenv import load_dotenv
 load_dotenv()
-logger = logging.getLogger(__name__)
-# ── Gemini setup ──────────────────────────────────────────
-import google.generativeai as genai
-_gemini_client = None
-_gemini_model = None
-def _init_gemini():
-    global _gemini_client, _gemini_model
-    api_key = os.getenv("GEMINI_API_KEY")
-    if not api_key:
-        logger.warning("GEMINI_API_KEY not set")
-        return False
-    try:
-        genai.configure(api_key=api_key)
-        _gemini_model = genai.GenerativeModel("gemini-1.5-flash")
-        logger.info("Gemini Flash ready")
-        return True
-    except Exception as e:
-        logger.error(f"Gemini init failed: {e}")
-        return False
-# ── Groq setup ────────────────────────────────────────────
-_groq_client = None
-def _init_groq():
-    global _groq_client
-    api_key = os.getenv("GROQ_API_KEY")
-    if not api_key:
-        return False
-    try:
-        from groq import Groq
-        _groq_client = Groq(api_key=api_key)
-        logger.info("Groq ready as fallback")
-        return True
-    except Exception as e:
-        logger.error(f"Groq init failed: {e}")
-        return False
-_gemini_ready = _init_gemini()
-_groq_ready = _init_groq()
-SYSTEM_PROMPT = """You are NyayaSetu — a sharp, street-smart Indian legal advisor.
-You work FOR the user. Your job is to find the angle, identify the leverage,
-and tell the user exactly what to do — the way a senior lawyer would in a
-private consultation, not the way a textbook would explain it.
-Be direct. Be human. Vary your response style naturally.
-Sometimes short and punchy. Sometimes detailed and structured.
-Match the energy of what the user needs right now.
-When citing sources, reference the Judgment ID naturally in your response.
-Always end with: "Note: This is not legal advice. Consult a qualified advocate."
-"""
-def _call_gemini(messages: list) -> str:
-    """Call Gemini Flash."""
-    # Convert messages to Gemini format
-    system = next((m["content"] for m in messages if m["role"] == "system"), "")
-    user_parts = [m["content"] for m in messages if m["role"] == "user"]
-    full_prompt = f"{system}\n\n{chr(10).join(user_parts)}"
-    response = _gemini_model.generate_content(
-        full_prompt,
-        generation_config=genai.types.GenerationConfig(
-            temperature=0.3,
-            max_output_tokens=1500,
-        )
-    )
-    return response.text
-def _call_groq(messages: list) -> str:
-    """Call Groq Llama as fallback."""
-    response = _groq_client.chat.completions.create(
         model="llama-3.3-70b-versatile",
         messages=messages,
         temperature=0.3,
@@ -95,42 +30,31 @@ def _call_groq(messages: list) -> str:
     return response.choices[0].message.content
-@retry(stop=stop_after_attempt(2), wait=wait_exponential(min=1, max=4))
 def call_llm(query: str, context: str) -> str:
     """
-    Call LLM with Gemini primary, Groq fallback.
-    Used by V1 agent (src/agent.py).
     """
-    messages = [
-        {"role": "system", "content": SYSTEM_PROMPT},
-        {"role": "user", "content": f"QUESTION: {query}\n\nSOURCES:\n{context}\n\nAnswer based on sources. Cite judgment IDs."}
-    ]
-    return _call_llm_with_fallback(messages)
-def call_llm_raw(messages: list) -> str:
-    """
-    Call LLM with pre-built messages list.
-    Used by V2 agent (src/agent_v2.py) for Pass 1 and Pass 3.
-    """
-    return _call_llm_with_fallback(messages)
-def _call_llm_with_fallback(messages: list) -> str:
-    """Try Gemini first, fall back to Groq."""
-    # Try Gemini first
-    if _gemini_ready and _gemini_model:
-        try:
-            return _call_gemini(messages)
-        except Exception as e:
-            logger.warning(f"Gemini failed: {e}, trying Groq")
-    # Fall back to Groq
-    if _groq_ready and _groq_client:
-        try:
-            return _call_groq(messages)
-        except Exception as e:
-            logger.error(f"Groq also failed: {e}")
-    raise Exception("All LLM providers failed")

 """
+LLM module. Single Groq API call with tenacity retry.
+WHY Groq? Free tier, fastest inference (~500 tokens/sec).
+WHY temperature=0.1? Lower = more deterministic, less hallucination.
+WHY one call per query? Multi-step chains add latency and failure points.
 """
 import os
+from groq import Groq
 from tenacity import retry, stop_after_attempt, wait_exponential
 from dotenv import load_dotenv
 load_dotenv()
+_client = Groq(api_key=os.getenv("GROQ_API_KEY"))
+def call_llm_raw(messages: list) -> str:
+    """
+    Call Groq with pre-built messages list.
+    Used by V2 agent for Pass 1 and Pass 3.
+    """
+    response = _client.chat.completions.create(
         model="llama-3.3-70b-versatile",
         messages=messages,
         temperature=0.3,
     return response.choices[0].message.content
+@retry(
+    stop=stop_after_attempt(3),
+    wait=wait_exponential(multiplier=1, min=2, max=8)
+)
 def call_llm(query: str, context: str) -> str:
     """
+    Call Groq Llama-3. Used by V1 agent.
+    Retries 3 times with exponential backoff.
     """
+    user_message = f"""QUESTION: {query}
+SUPREME COURT JUDGMENT EXCERPTS:
+{context}
+Answer based only on the excerpts above. Cite judgment IDs.
+Use proper markdown formatting."""
+    response = _client.chat.completions.create(
+        model="llama-3.3-70b-versatile",
+        messages=[
+            {"role": "system", "content": "You are NyayaSetu, an Indian legal research assistant. Answer only from provided excerpts. Cite judgment IDs. End with: NOTE: This is not legal advice."},
+            {"role": "user", "content": user_message}
+        ],
+        temperature=0.1,
+        max_tokens=1500
+    )
+    return response.choices[0].message.content