CaffeinatedCoding commited on
Commit
f756c47
·
verified ·
1 Parent(s): 4ad765a

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. requirements.txt +1 -1
  2. src/agent_v2.py +4 -7
  3. src/llm.py +14 -15
requirements.txt CHANGED
@@ -5,7 +5,7 @@ faiss-cpu
5
  fastapi
6
  uvicorn
7
  python-dotenv
8
- openai
9
  dvc
10
  mlflow
11
  optuna
 
5
  fastapi
6
  uvicorn
7
  python-dotenv
8
+ groq
9
  dvc
10
  mlflow
11
  optuna
src/agent_v2.py CHANGED
@@ -28,15 +28,12 @@ from src.ner import extract_entities, augment_query
28
 
29
  logger = logging.getLogger(__name__)
30
 
31
- from openai import OpenAI
32
  from tenacity import retry, stop_after_attempt, wait_exponential
33
  from dotenv import load_dotenv
34
 
35
  load_dotenv()
36
- _client = OpenAI(
37
- api_key=os.getenv("DEEPSEEK_API_KEY"),
38
- base_url="https://api.deepseek.com/v1"
39
- )
40
 
41
  # ── Session store ─────────────────────────────────────────
42
  sessions: Dict[str, Dict] = {}
@@ -169,7 +166,7 @@ Rules:
169
  - search_queries must be specific legal questions for vector search"""
170
 
171
  response = _client.chat.completions.create(
172
- model="deepseek-chat",
173
  messages=[
174
  {"role": "system", "content": ANALYSIS_PROMPT},
175
  {"role": "user", "content": user_content}
@@ -329,7 +326,7 @@ Instructions:
329
  {radar_instruction}"""
330
 
331
  response = _client.chat.completions.create(
332
- model="deepseek-chat",
333
  messages=[
334
  {"role": "system", "content": system_prompt},
335
  {"role": "user", "content": user_content}
 
28
 
29
  logger = logging.getLogger(__name__)
30
 
31
+ from groq import Groq
32
  from tenacity import retry, stop_after_attempt, wait_exponential
33
  from dotenv import load_dotenv
34
 
35
  load_dotenv()
36
+ _client = Groq(api_key=os.getenv("GROQ_API_KEY"))
 
 
 
37
 
38
  # ── Session store ─────────────────────────────────────────
39
  sessions: Dict[str, Dict] = {}
 
166
  - search_queries must be specific legal questions for vector search"""
167
 
168
  response = _client.chat.completions.create(
169
+ model="llama-3.3-70b-versatile",
170
  messages=[
171
  {"role": "system", "content": ANALYSIS_PROMPT},
172
  {"role": "user", "content": user_content}
 
326
  {radar_instruction}"""
327
 
328
  response = _client.chat.completions.create(
329
+ model="llama-3.3-70b-versatile",
330
  messages=[
331
  {"role": "system", "content": system_prompt},
332
  {"role": "user", "content": user_content}
src/llm.py CHANGED
@@ -1,14 +1,14 @@
1
  """
2
- LLM module. Single DeepSeek API call with tenacity retry.
3
 
4
- WHY DeepSeek? Free tier, cost-effective inference.
5
  WHY temperature=0.1? Lower = more deterministic, less hallucination.
6
  WHY one call per query? Multi-step chains add latency and failure points.
7
  """
8
 
9
  import os
10
  import logging
11
- from openai import OpenAI
12
  from tenacity import retry, stop_after_attempt, wait_exponential
13
  from dotenv import load_dotenv
14
 
@@ -16,31 +16,30 @@ load_dotenv()
16
 
17
  logger = logging.getLogger(__name__)
18
 
19
- api_key = os.getenv("DEEPSEEK_API_KEY")
20
- logger.info(f"DEEPSEEK_API_KEY loaded: {bool(api_key)} (length: {len(api_key) if api_key else 0})")
21
 
22
- _client = OpenAI(
23
- api_key=api_key,
24
- base_url="https://api.deepseek.com/v1"
25
  )
26
- logger.info("DeepSeek client initialized successfully")
27
 
28
 
29
  def call_llm_raw(messages: list) -> str:
30
  """
31
- Call DeepSeek with pre-built messages list.
32
  Used by V2 agent for Pass 1 and Pass 3.
33
  """
34
  try:
35
  response = _client.chat.completions.create(
36
- model="deepseek-chat",
37
  messages=messages,
38
  temperature=0.3,
39
  max_tokens=1500
40
  )
41
  return response.choices[0].message.content
42
  except Exception as e:
43
- logger.error(f"DeepSeek API error in call_llm_raw: {type(e).__name__}: {str(e)}", exc_info=True)
44
  raise
45
 
46
 
@@ -50,7 +49,7 @@ def call_llm_raw(messages: list) -> str:
50
  )
51
  def call_llm(query: str, context: str) -> str:
52
  """
53
- Call DeepSeek. Used by V1 agent.
54
  Retries 3 times with exponential backoff.
55
  """
56
  try:
@@ -63,7 +62,7 @@ Answer based only on the excerpts above. Cite judgment IDs.
63
  Use proper markdown formatting."""
64
 
65
  response = _client.chat.completions.create(
66
- model="deepseek-chat",
67
  messages=[
68
  {"role": "system", "content": "You are NyayaSetu, an Indian legal research assistant. Answer only from provided excerpts. Cite judgment IDs. End with: NOTE: This is not legal advice."},
69
  {"role": "user", "content": user_message}
@@ -74,5 +73,5 @@ Use proper markdown formatting."""
74
 
75
  return response.choices[0].message.content
76
  except Exception as e:
77
- logger.error(f"DeepSeek API error in call_llm: {type(e).__name__}: {str(e)}", exc_info=True)
78
  raise
 
1
  """
2
+ LLM module. Single Groq API call with tenacity retry.
3
 
4
+ WHY Groq? Free tier, fastest inference (~500 tokens/sec).
5
  WHY temperature=0.1? Lower = more deterministic, less hallucination.
6
  WHY one call per query? Multi-step chains add latency and failure points.
7
  """
8
 
9
  import os
10
  import logging
11
+ from groq import Groq
12
  from tenacity import retry, stop_after_attempt, wait_exponential
13
  from dotenv import load_dotenv
14
 
 
16
 
17
  logger = logging.getLogger(__name__)
18
 
19
+ api_key = os.getenv("GROQ_API_KEY")
20
+ logger.info(f"GROQ_API_KEY loaded: {bool(api_key)} (length: {len(api_key) if api_key else 0})")
21
 
22
+ _client = Groq(
23
+ api_key=api_key
 
24
  )
25
+ logger.info("Groq client initialized successfully")
26
 
27
 
28
  def call_llm_raw(messages: list) -> str:
29
  """
30
+ Call Groq with pre-built messages list.
31
  Used by V2 agent for Pass 1 and Pass 3.
32
  """
33
  try:
34
  response = _client.chat.completions.create(
35
+ model="llama-3.3-70b-versatile",
36
  messages=messages,
37
  temperature=0.3,
38
  max_tokens=1500
39
  )
40
  return response.choices[0].message.content
41
  except Exception as e:
42
+ logger.error(f"Groq API error in call_llm_raw: {type(e).__name__}: {str(e)}", exc_info=True)
43
  raise
44
 
45
 
 
49
  )
50
  def call_llm(query: str, context: str) -> str:
51
  """
52
+ Call Groq Llama-3. Used by V1 agent.
53
  Retries 3 times with exponential backoff.
54
  """
55
  try:
 
62
  Use proper markdown formatting."""
63
 
64
  response = _client.chat.completions.create(
65
+ model="llama-3.3-70b-versatile",
66
  messages=[
67
  {"role": "system", "content": "You are NyayaSetu, an Indian legal research assistant. Answer only from provided excerpts. Cite judgment IDs. End with: NOTE: This is not legal advice."},
68
  {"role": "user", "content": user_message}
 
73
 
74
  return response.choices[0].message.content
75
  except Exception as e:
76
+ logger.error(f"Groq API error in call_llm: {type(e).__name__}: {str(e)}", exc_info=True)
77
  raise