Spaces:

CaffeinatedCoding
/

nyayasetu

Running

App Files Files Community

CaffeinatedCoding commited on 26 days ago

Commit

330e02a

verified ·

1 Parent(s): 2844ebb

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

requirements.txt +1 -0
src/llm.py +24 -21

requirements.txt CHANGED Viewed

@@ -6,6 +6,7 @@ fastapi
 uvicorn
 python-dotenv
 groq
 dvc
 mlflow
 optuna

 uvicorn
 python-dotenv
 groq
+openai
 dvc
 mlflow
 optuna

src/llm.py CHANGED Viewed

@@ -1,12 +1,12 @@
 """
 LLM module. HuggingFace Inference API as primary.
 Works natively from HF Spaces — same infrastructure.
-Groq as local dev fallback.
 WHY HF Inference API?
 HF Spaces can always reach HuggingFace's own APIs.
 No network routing issues. Uses existing HF_TOKEN.
-Same Llama 3.3 70B model as Groq.
 """
 import os
@@ -23,6 +23,10 @@ _hf_client = None
 # ── OpenRouter (free tier, reliable fallback) ──────────────
 _openrouter_client = None
 def _init_hf():
     global _hf_client
     token = os.getenv("HF_TOKEN")
@@ -41,6 +45,7 @@ def _init_hf():
         logger.error(f"HF Inference API init failed: {e}")
         return False
 def _init_openrouter():
     global _openrouter_client
     api_key = os.getenv("OPENROUTER_API_KEY")
@@ -58,9 +63,6 @@ def _init_openrouter():
         logger.error(f"OpenRouter init failed: {e}")
         return False
-# ── Groq fallback (works locally, may be blocked on HF Spaces) ──
-_openrouter_ready = _init_openrouter()
-_groq_client = None
 def _init_groq():
     global _groq_client
@@ -76,13 +78,14 @@ def _init_groq():
         logger.error(f"Groq init failed: {e}")
         return False
 _hf_ready = _init_hf()
 _groq_ready = _init_groq()
 def _call_hf(messages: list) -> str:
     """Call HuggingFace Inference API."""
-    # Convert to HF format
     response = _hf_client.chat_completion(
         messages=messages,
         max_tokens=1500,
@@ -90,7 +93,8 @@ def _call_hf(messages: list) -> str:
     )
     return response.choices[0].message.content
-openrouter(messages: list) -> str:
     """Call OpenRouter free tier."""
     response = _openrouter_client.chat.completions.create(
         model="meta-llama/llama-3.3-70b-instruct:free",
@@ -101,9 +105,19 @@ openrouter(messages: list) -> str:
     return response.choices[0].message.content
-def _call_
 def _call_groq(messages: list) -> str:
-    """Call Groq as fthen OpenRouter, then Groq."""
     if _hf_ready and _hf_client:
         try:
             return _call_hf(messages)
@@ -114,18 +128,7 @@ def _call_groq(messages: list) -> str:
         try:
             return _call_openrouter(messages)
         except Exception as e:
-            logger.warning(f"OpenRouter
-    )
-    return response.choices[0].message.content
-def _call_with_fallback(messages: list) -> str:
-    """Try HF first, fall back to Groq."""
-    if _hf_ready and _hf_client:
-        try:
-            return _call_hf(messages)
-        except Exception as e:
-            logger.warning(f"HF Inference failed: {e}, trying Groq")
     if _groq_ready and _groq_client:
         try:

 """
 LLM module. HuggingFace Inference API as primary.
 Works natively from HF Spaces — same infrastructure.
+OpenRouter and Groq as fallback providers.
 WHY HF Inference API?
 HF Spaces can always reach HuggingFace's own APIs.
 No network routing issues. Uses existing HF_TOKEN.
+Same Llama 3.3 70B model as others.
 """
 import os
 # ── OpenRouter (free tier, reliable fallback) ──────────────
 _openrouter_client = None
+# ── Groq fallback (works locally, may be blocked on HF Spaces) ──
+_groq_client = None
 def _init_hf():
     global _hf_client
     token = os.getenv("HF_TOKEN")
         logger.error(f"HF Inference API init failed: {e}")
         return False
 def _init_openrouter():
     global _openrouter_client
     api_key = os.getenv("OPENROUTER_API_KEY")
         logger.error(f"OpenRouter init failed: {e}")
         return False
 def _init_groq():
     global _groq_client
         logger.error(f"Groq init failed: {e}")
         return False
 _hf_ready = _init_hf()
+_openrouter_ready = _init_openrouter()
 _groq_ready = _init_groq()
 def _call_hf(messages: list) -> str:
     """Call HuggingFace Inference API."""
     response = _hf_client.chat_completion(
         messages=messages,
         max_tokens=1500,
     )
     return response.choices[0].message.content
+def _call_openrouter(messages: list) -> str:
     """Call OpenRouter free tier."""
     response = _openrouter_client.chat.completions.create(
         model="meta-llama/llama-3.3-70b-instruct:free",
     return response.choices[0].message.content
 def _call_groq(messages: list) -> str:
+    """Call Groq as fallback."""
+    response = _groq_client.chat.completions.create(
+        model="llama-3.3-70b-versatile",
+        messages=messages,
+        temperature=0.3,
+        max_tokens=1500
+    )
+    return response.choices[0].message.content
+def _call_with_fallback(messages: list) -> str:
+    """Try HF first, then OpenRouter, then Groq."""
     if _hf_ready and _hf_client:
         try:
             return _call_hf(messages)
         try:
             return _call_openrouter(messages)
         except Exception as e:
+            logger.warning(f"OpenRouter failed: {e}, trying Groq")
     if _groq_ready and _groq_client:
         try: