Spaces:

CaffeinatedCoding
/

nyayasetu

Running

App Files Files Community

CaffeinatedCoding commited on 17 days ago

Commit

cb13dc9

verified ·

1 Parent(s): bcc8c23

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

src/llm.py +34 -19

src/llm.py CHANGED Viewed

@@ -7,6 +7,7 @@ WHY one call per query? Multi-step chains add latency and failure points.
 """
 import os
 from groq import Groq
 import httpx
 from tenacity import retry, stop_after_attempt, wait_exponential
@@ -14,10 +15,16 @@ from dotenv import load_dotenv
 load_dotenv()
 _client = Groq(
-    api_key=os.getenv("GROQ_API_KEY"),
     http_client=httpx.Client(timeout=30.0)
 )
 def call_llm_raw(messages: list) -> str:
@@ -25,13 +32,17 @@ def call_llm_raw(messages: list) -> str:
     Call Groq with pre-built messages list.
     Used by V2 agent for Pass 1 and Pass 3.
     """
-    response = _client.chat.completions.create(
-        model="llama-3.3-70b-versatile",
-        messages=messages,
-        temperature=0.3,
-        max_tokens=1500
-    )
-    return response.choices[0].message.content
 @retry(
@@ -43,7 +54,8 @@ def call_llm(query: str, context: str) -> str:
     Call Groq Llama-3. Used by V1 agent.
     Retries 3 times with exponential backoff.
     """
-    user_message = f"""QUESTION: {query}
 SUPREME COURT JUDGMENT EXCERPTS:
 {context}
@@ -51,14 +63,17 @@ SUPREME COURT JUDGMENT EXCERPTS:
 Answer based only on the excerpts above. Cite judgment IDs.
 Use proper markdown formatting."""
-    response = _client.chat.completions.create(
-        model="llama-3.3-70b-versatile",
-        messages=[
-            {"role": "system", "content": "You are NyayaSetu, an Indian legal research assistant. Answer only from provided excerpts. Cite judgment IDs. End with: NOTE: This is not legal advice."},
-            {"role": "user", "content": user_message}
-        ],
-        temperature=0.1,
-        max_tokens=1500
-    )
-    return response.choices[0].message.content

 """
 import os
+import logging
 from groq import Groq
 import httpx
 from tenacity import retry, stop_after_attempt, wait_exponential
 load_dotenv()
+logger = logging.getLogger(__name__)
+api_key = os.getenv("GROQ_API_KEY")
+logger.info(f"GROQ_API_KEY loaded: {bool(api_key)} (length: {len(api_key) if api_key else 0})")
 _client = Groq(
+    api_key=api_key,
     http_client=httpx.Client(timeout=30.0)
 )
+logger.info("Groq client initialized successfully")
 def call_llm_raw(messages: list) -> str:
     Call Groq with pre-built messages list.
     Used by V2 agent for Pass 1 and Pass 3.
     """
+    try:
+        response = _client.chat.completions.create(
+            model="llama-3.3-70b-versatile",
+            messages=messages,
+            temperature=0.3,
+            max_tokens=1500
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        logger.error(f"Groq API error in call_llm_raw: {type(e).__name__}: {str(e)}", exc_info=True)
+        raise
 @retry(
     Call Groq Llama-3. Used by V1 agent.
     Retries 3 times with exponential backoff.
     """
+    try:
+        user_message = f"""QUESTION: {query}
 SUPREME COURT JUDGMENT EXCERPTS:
 {context}
 Answer based only on the excerpts above. Cite judgment IDs.
 Use proper markdown formatting."""
+        response = _client.chat.completions.create(
+            model="llama-3.3-70b-versatile",
+            messages=[
+                {"role": "system", "content": "You are NyayaSetu, an Indian legal research assistant. Answer only from provided excerpts. Cite judgment IDs. End with: NOTE: This is not legal advice."},
+                {"role": "user", "content": user_message}
+            ],
+            temperature=0.1,
+            max_tokens=1500
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        logger.error(f"Groq API error in call_llm: {type(e).__name__}: {str(e)}", exc_info=True)
+        raise