CaffeinatedCoding commited on
Commit
5d60eec
·
verified ·
1 Parent(s): e860d63

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. requirements.txt +13 -11
  2. src/agent_v2.py +22 -9
  3. src/llm.py +34 -110
requirements.txt CHANGED
@@ -1,15 +1,17 @@
 
 
 
 
1
  fastapi
2
  uvicorn
3
- pydantic
4
- huggingface_hub
5
- sentence-transformers
6
- numpy
7
- groq
8
- google-generativeai
9
- tenacity
10
  python-dotenv
11
- transformers
12
- faiss-cpu
13
- torch
14
- kagglehub
15
  pytest
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ sentence-transformers
4
+ faiss-cpu
5
  fastapi
6
  uvicorn
 
 
 
 
 
 
 
7
  python-dotenv
8
+ groq
9
+ dvc
10
+ mlflow
11
+ optuna
12
  pytest
13
+ kagglehub
14
+ pymupdf
15
+ tenacity
16
+ seqeval
17
+ httpx
src/agent_v2.py CHANGED
@@ -25,14 +25,15 @@ from src.retrieval import retrieve
25
  from src.verify import verify_citations
26
  from src.system_prompt import build_prompt, ANALYSIS_PROMPT
27
  from src.ner import extract_entities, augment_query
28
- from src.llm import call_llm_raw
29
 
30
  logger = logging.getLogger(__name__)
31
 
 
32
  from tenacity import retry, stop_after_attempt, wait_exponential
33
  from dotenv import load_dotenv
34
 
35
  load_dotenv()
 
36
 
37
  # ── Session store ─────────────────────────────────────────
38
  sessions: Dict[str, Dict] = {}
@@ -164,10 +165,16 @@ Rules:
164
  - Update hypothesis confidence based on new evidence
165
  - search_queries must be specific legal questions for vector search"""
166
 
167
- raw = call_llm_raw([
168
- {"role": "system", "content": ANALYSIS_PROMPT},
169
- {"role": "user", "content": user_content}
170
- ]).strip()
 
 
 
 
 
 
171
  raw = raw.replace("```json", "").replace("```", "").strip()
172
 
173
  try:
@@ -318,10 +325,16 @@ Instructions:
318
  - Opposition war-gaming: if giving strategy, include what the other side will argue
319
  {radar_instruction}"""
320
 
321
- return call_llm_raw([
322
- {"role": "system", "content": system_prompt},
323
- {"role": "user", "content": user_content}
324
- ])
 
 
 
 
 
 
325
 
326
 
327
  # ── Main entry point ──────────────────────────────────────
 
25
  from src.verify import verify_citations
26
  from src.system_prompt import build_prompt, ANALYSIS_PROMPT
27
  from src.ner import extract_entities, augment_query
 
28
 
29
  logger = logging.getLogger(__name__)
30
 
31
+ from groq import Groq
32
  from tenacity import retry, stop_after_attempt, wait_exponential
33
  from dotenv import load_dotenv
34
 
35
  load_dotenv()
36
+ _client = Groq(api_key=os.getenv("GROQ_API_KEY"))
37
 
38
  # ── Session store ─────────────────────────────────────────
39
  sessions: Dict[str, Dict] = {}
 
165
  - Update hypothesis confidence based on new evidence
166
  - search_queries must be specific legal questions for vector search"""
167
 
168
+ response = _client.chat.completions.create(
169
+ model="llama-3.3-70b-versatile",
170
+ messages=[
171
+ {"role": "system", "content": ANALYSIS_PROMPT},
172
+ {"role": "user", "content": user_content}
173
+ ],
174
+ temperature=0.1,
175
+ max_tokens=900
176
+ )
177
+ raw = response.choices[0].message.content.strip()
178
  raw = raw.replace("```json", "").replace("```", "").strip()
179
 
180
  try:
 
325
  - Opposition war-gaming: if giving strategy, include what the other side will argue
326
  {radar_instruction}"""
327
 
328
+ response = _client.chat.completions.create(
329
+ model="llama-3.3-70b-versatile",
330
+ messages=[
331
+ {"role": "system", "content": system_prompt},
332
+ {"role": "user", "content": user_content}
333
+ ],
334
+ temperature=0.3,
335
+ max_tokens=1500
336
+ )
337
+ return response.choices[0].message.content
338
 
339
 
340
  # ── Main entry point ──────────────────────────────────────
src/llm.py CHANGED
@@ -1,92 +1,27 @@
1
  """
2
- LLM module. Gemini Flash as primary, Groq as fallback.
3
- Gemini works reliably from HF Spaces. Groq is backup.
 
 
 
4
  """
5
 
6
  import os
7
- import logging
8
  from tenacity import retry, stop_after_attempt, wait_exponential
9
  from dotenv import load_dotenv
10
 
11
  load_dotenv()
12
- logger = logging.getLogger(__name__)
13
-
14
- # ── Gemini setup ──────────────────────────────────────────
15
- import google.generativeai as genai
16
-
17
- _gemini_client = None
18
- _gemini_model = None
19
-
20
- def _init_gemini():
21
- global _gemini_client, _gemini_model
22
- api_key = os.getenv("GEMINI_API_KEY")
23
- if not api_key:
24
- logger.warning("GEMINI_API_KEY not set")
25
- return False
26
- try:
27
- genai.configure(api_key=api_key)
28
- _gemini_model = genai.GenerativeModel("gemini-1.5-flash")
29
- logger.info("Gemini Flash ready")
30
- return True
31
- except Exception as e:
32
- logger.error(f"Gemini init failed: {e}")
33
- return False
34
-
35
- # ── Groq setup ────────────────────────────────────────────
36
- _groq_client = None
37
-
38
- def _init_groq():
39
- global _groq_client
40
- api_key = os.getenv("GROQ_API_KEY")
41
- if not api_key:
42
- return False
43
- try:
44
- from groq import Groq
45
- _groq_client = Groq(api_key=api_key)
46
- logger.info("Groq ready as fallback")
47
- return True
48
- except Exception as e:
49
- logger.error(f"Groq init failed: {e}")
50
- return False
51
-
52
- _gemini_ready = _init_gemini()
53
- _groq_ready = _init_groq()
54
-
55
- SYSTEM_PROMPT = """You are NyayaSetu — a sharp, street-smart Indian legal advisor.
56
- You work FOR the user. Your job is to find the angle, identify the leverage,
57
- and tell the user exactly what to do — the way a senior lawyer would in a
58
- private consultation, not the way a textbook would explain it.
59
-
60
- Be direct. Be human. Vary your response style naturally.
61
- Sometimes short and punchy. Sometimes detailed and structured.
62
- Match the energy of what the user needs right now.
63
-
64
- When citing sources, reference the Judgment ID naturally in your response.
65
- Always end with: "Note: This is not legal advice. Consult a qualified advocate."
66
- """
67
 
68
-
69
- def _call_gemini(messages: list) -> str:
70
- """Call Gemini Flash."""
71
- # Convert messages to Gemini format
72
- system = next((m["content"] for m in messages if m["role"] == "system"), "")
73
- user_parts = [m["content"] for m in messages if m["role"] == "user"]
74
-
75
- full_prompt = f"{system}\n\n{chr(10).join(user_parts)}"
76
-
77
- response = _gemini_model.generate_content(
78
- full_prompt,
79
- generation_config=genai.types.GenerationConfig(
80
- temperature=0.3,
81
- max_output_tokens=1500,
82
- )
83
- )
84
- return response.text
85
 
86
 
87
- def _call_groq(messages: list) -> str:
88
- """Call Groq Llama as fallback."""
89
- response = _groq_client.chat.completions.create(
 
 
 
90
  model="llama-3.3-70b-versatile",
91
  messages=messages,
92
  temperature=0.3,
@@ -95,42 +30,31 @@ def _call_groq(messages: list) -> str:
95
  return response.choices[0].message.content
96
 
97
 
98
- @retry(stop=stop_after_attempt(2), wait=wait_exponential(min=1, max=4))
 
 
 
99
  def call_llm(query: str, context: str) -> str:
100
  """
101
- Call LLM with Gemini primary, Groq fallback.
102
- Used by V1 agent (src/agent.py).
103
  """
104
- messages = [
105
- {"role": "system", "content": SYSTEM_PROMPT},
106
- {"role": "user", "content": f"QUESTION: {query}\n\nSOURCES:\n{context}\n\nAnswer based on sources. Cite judgment IDs."}
107
- ]
108
- return _call_llm_with_fallback(messages)
109
 
 
 
110
 
111
- def call_llm_raw(messages: list) -> str:
112
- """
113
- Call LLM with pre-built messages list.
114
- Used by V2 agent (src/agent_v2.py) for Pass 1 and Pass 3.
115
- """
116
- return _call_llm_with_fallback(messages)
117
 
 
 
 
 
 
 
 
 
 
118
 
119
- def _call_llm_with_fallback(messages: list) -> str:
120
- """Try Gemini first, fall back to Groq."""
121
-
122
- # Try Gemini first
123
- if _gemini_ready and _gemini_model:
124
- try:
125
- return _call_gemini(messages)
126
- except Exception as e:
127
- logger.warning(f"Gemini failed: {e}, trying Groq")
128
-
129
- # Fall back to Groq
130
- if _groq_ready and _groq_client:
131
- try:
132
- return _call_groq(messages)
133
- except Exception as e:
134
- logger.error(f"Groq also failed: {e}")
135
-
136
- raise Exception("All LLM providers failed")
 
1
  """
2
+ LLM module. Single Groq API call with tenacity retry.
3
+
4
+ WHY Groq? Free tier, fastest inference (~500 tokens/sec).
5
+ WHY temperature=0.1? Lower = more deterministic, less hallucination.
6
+ WHY one call per query? Multi-step chains add latency and failure points.
7
  """
8
 
9
  import os
10
+ from groq import Groq
11
  from tenacity import retry, stop_after_attempt, wait_exponential
12
  from dotenv import load_dotenv
13
 
14
  load_dotenv()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ _client = Groq(api_key=os.getenv("GROQ_API_KEY"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
 
19
+ def call_llm_raw(messages: list) -> str:
20
+ """
21
+ Call Groq with pre-built messages list.
22
+ Used by V2 agent for Pass 1 and Pass 3.
23
+ """
24
+ response = _client.chat.completions.create(
25
  model="llama-3.3-70b-versatile",
26
  messages=messages,
27
  temperature=0.3,
 
30
  return response.choices[0].message.content
31
 
32
 
33
+ @retry(
34
+ stop=stop_after_attempt(3),
35
+ wait=wait_exponential(multiplier=1, min=2, max=8)
36
+ )
37
  def call_llm(query: str, context: str) -> str:
38
  """
39
+ Call Groq Llama-3. Used by V1 agent.
40
+ Retries 3 times with exponential backoff.
41
  """
42
+ user_message = f"""QUESTION: {query}
 
 
 
 
43
 
44
+ SUPREME COURT JUDGMENT EXCERPTS:
45
+ {context}
46
 
47
+ Answer based only on the excerpts above. Cite judgment IDs.
48
+ Use proper markdown formatting."""
 
 
 
 
49
 
50
+ response = _client.chat.completions.create(
51
+ model="llama-3.3-70b-versatile",
52
+ messages=[
53
+ {"role": "system", "content": "You are NyayaSetu, an Indian legal research assistant. Answer only from provided excerpts. Cite judgment IDs. End with: NOTE: This is not legal advice."},
54
+ {"role": "user", "content": user_message}
55
+ ],
56
+ temperature=0.1,
57
+ max_tokens=1500
58
+ )
59
 
60
+ return response.choices[0].message.content