CaffeinatedCoding commited on
Commit
cb13dc9
·
verified ·
1 Parent(s): bcc8c23

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. src/llm.py +34 -19
src/llm.py CHANGED
@@ -7,6 +7,7 @@ WHY one call per query? Multi-step chains add latency and failure points.
7
  """
8
 
9
  import os
 
10
  from groq import Groq
11
  import httpx
12
  from tenacity import retry, stop_after_attempt, wait_exponential
@@ -14,10 +15,16 @@ from dotenv import load_dotenv
14
 
15
  load_dotenv()
16
 
 
 
 
 
 
17
  _client = Groq(
18
- api_key=os.getenv("GROQ_API_KEY"),
19
  http_client=httpx.Client(timeout=30.0)
20
  )
 
21
 
22
 
23
  def call_llm_raw(messages: list) -> str:
@@ -25,13 +32,17 @@ def call_llm_raw(messages: list) -> str:
25
  Call Groq with pre-built messages list.
26
  Used by V2 agent for Pass 1 and Pass 3.
27
  """
28
- response = _client.chat.completions.create(
29
- model="llama-3.3-70b-versatile",
30
- messages=messages,
31
- temperature=0.3,
32
- max_tokens=1500
33
- )
34
- return response.choices[0].message.content
 
 
 
 
35
 
36
 
37
  @retry(
@@ -43,7 +54,8 @@ def call_llm(query: str, context: str) -> str:
43
  Call Groq Llama-3. Used by V1 agent.
44
  Retries 3 times with exponential backoff.
45
  """
46
- user_message = f"""QUESTION: {query}
 
47
 
48
  SUPREME COURT JUDGMENT EXCERPTS:
49
  {context}
@@ -51,14 +63,17 @@ SUPREME COURT JUDGMENT EXCERPTS:
51
  Answer based only on the excerpts above. Cite judgment IDs.
52
  Use proper markdown formatting."""
53
 
54
- response = _client.chat.completions.create(
55
- model="llama-3.3-70b-versatile",
56
- messages=[
57
- {"role": "system", "content": "You are NyayaSetu, an Indian legal research assistant. Answer only from provided excerpts. Cite judgment IDs. End with: NOTE: This is not legal advice."},
58
- {"role": "user", "content": user_message}
59
- ],
60
- temperature=0.1,
61
- max_tokens=1500
62
- )
63
 
64
- return response.choices[0].message.content
 
 
 
 
7
  """
8
 
9
  import os
10
+ import logging
11
  from groq import Groq
12
  import httpx
13
  from tenacity import retry, stop_after_attempt, wait_exponential
 
15
 
16
  load_dotenv()
17
 
18
+ logger = logging.getLogger(__name__)
19
+
20
+ api_key = os.getenv("GROQ_API_KEY")
21
+ logger.info(f"GROQ_API_KEY loaded: {bool(api_key)} (length: {len(api_key) if api_key else 0})")
22
+
23
  _client = Groq(
24
+ api_key=api_key,
25
  http_client=httpx.Client(timeout=30.0)
26
  )
27
+ logger.info("Groq client initialized successfully")
28
 
29
 
30
  def call_llm_raw(messages: list) -> str:
 
32
  Call Groq with pre-built messages list.
33
  Used by V2 agent for Pass 1 and Pass 3.
34
  """
35
+ try:
36
+ response = _client.chat.completions.create(
37
+ model="llama-3.3-70b-versatile",
38
+ messages=messages,
39
+ temperature=0.3,
40
+ max_tokens=1500
41
+ )
42
+ return response.choices[0].message.content
43
+ except Exception as e:
44
+ logger.error(f"Groq API error in call_llm_raw: {type(e).__name__}: {str(e)}", exc_info=True)
45
+ raise
46
 
47
 
48
  @retry(
 
54
  Call Groq Llama-3. Used by V1 agent.
55
  Retries 3 times with exponential backoff.
56
  """
57
+ try:
58
+ user_message = f"""QUESTION: {query}
59
 
60
  SUPREME COURT JUDGMENT EXCERPTS:
61
  {context}
 
63
  Answer based only on the excerpts above. Cite judgment IDs.
64
  Use proper markdown formatting."""
65
 
66
+ response = _client.chat.completions.create(
67
+ model="llama-3.3-70b-versatile",
68
+ messages=[
69
+ {"role": "system", "content": "You are NyayaSetu, an Indian legal research assistant. Answer only from provided excerpts. Cite judgment IDs. End with: NOTE: This is not legal advice."},
70
+ {"role": "user", "content": user_message}
71
+ ],
72
+ temperature=0.1,
73
+ max_tokens=1500
74
+ )
75
 
76
+ return response.choices[0].message.content
77
+ except Exception as e:
78
+ logger.error(f"Groq API error in call_llm: {type(e).__name__}: {str(e)}", exc_info=True)
79
+ raise