RayMelius Claude Sonnet 4.6 commited on
Commit
bef23dd
·
1 Parent(s): a32c399

Add Groq as primary LLM provider (free, 14k req/day)

Browse files

HF credit balance is depleted (402). Groq is genuinely free with
no credit system. Priority order: Ollama → Groq → HuggingFace.
- Add GROQ_API_KEY / GROQ_MODEL (llama-3.1-8b-instant) config
- Detect HF 402 credit-depleted error with actionable message
- Update /ai/debug to test Groq first when configured
- Update no-LLM warning to mention Groq

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. dashboard/dashboard.py +66 -26
dashboard/dashboard.py CHANGED
@@ -30,6 +30,9 @@ FRONTEND_URL = os.getenv("FRONTEND_URL", "")
30
  HF_TOKEN = os.getenv("HF_TOKEN", "")
31
  HF_MODEL = os.getenv("HF_MODEL", "Qwen/Qwen2.5-7B-Instruct")
32
  HF_URL = "https://router.huggingface.co/v1/chat/completions"
 
 
 
33
  OLLAMA_HOST = os.getenv("OLLAMA_HOST", "")
34
  OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3.1:8b")
35
 
@@ -91,9 +94,30 @@ def _call_llm(prompt):
91
  except Exception as e:
92
  print(f"[Dashboard/LLM] Ollama error: {e}")
93
 
94
- # 2. HuggingFace router
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  if not HF_TOKEN:
96
- return None, "HF_TOKEN not set"
97
  print(f"[Dashboard/LLM] Calling HF router ({HF_MODEL})…")
98
  for attempt in range(3):
99
  try:
@@ -123,6 +147,9 @@ def _call_llm(prompt):
123
  err_code = r.json().get("error", {}).get("code", "")
124
  except Exception:
125
  err_code = ""
 
 
 
126
  if err_code == "model_not_supported" or "provider" in r.text.lower():
127
  return None, (f"Model '{HF_MODEL}' not available on any enabled provider. "
128
  "Set HF_MODEL secret to a supported model (e.g. Qwen/Qwen2.5-7B-Instruct).")
@@ -138,8 +165,8 @@ def _call_llm(prompt):
138
 
139
  def _generate_and_broadcast():
140
  """Background thread: call LLM, publish result via SSE + Kafka."""
141
- if not HF_TOKEN and not OLLAMA_HOST:
142
- err = {"text": "⚠️ No LLM configured. Set HF_TOKEN in Space Settings → Secrets.", "source": "config", "timestamp": time.time()}
143
  broadcast_event("ai_insight", err)
144
  return
145
 
@@ -592,38 +619,51 @@ def trigger_ai_insight():
592
  def ai_debug():
593
  """Synchronous LLM test — returns raw API result for debugging."""
594
  result = {
595
- "hf_token_set": bool(HF_TOKEN),
 
 
596
  "hf_token_prefix": HF_TOKEN[:8] + "…" if HF_TOKEN else None,
597
- "hf_model": HF_MODEL,
598
- "hf_url": HF_URL,
599
- "ollama_host": OLLAMA_HOST,
600
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
601
  if not HF_TOKEN:
602
- result["error"] = "HF_TOKEN not set"
603
  return jsonify(result)
604
  try:
605
- r = requests.post(
606
- HF_URL,
607
- headers={"Authorization": f"Bearer {HF_TOKEN}",
608
- "Content-Type": "application/json"},
609
- json={"model": HF_MODEL,
610
- "messages": [{"role": "user", "content": "Reply with exactly: OK"}],
611
- "max_tokens": 10},
612
- timeout=30,
613
- )
614
- result["http_status"] = r.status_code
615
- result["response_body"] = r.text[:500]
616
  try:
617
  rj = r.json()
618
- result["response_json"] = rj
619
- err_code = rj.get("error", {}).get("code", "")
620
- if err_code == "model_not_supported" or "provider" in r.text.lower():
621
- result["fix"] = (f"Model '{HF_MODEL}' not available on any enabled provider. "
622
- "Try a different model or check huggingface.co/settings/inference-providers.")
623
  except Exception:
624
  pass
625
  except Exception as e:
626
- result["exception"] = str(e)
627
  return jsonify(result)
628
 
629
 
 
30
  HF_TOKEN = os.getenv("HF_TOKEN", "")
31
  HF_MODEL = os.getenv("HF_MODEL", "Qwen/Qwen2.5-7B-Instruct")
32
  HF_URL = "https://router.huggingface.co/v1/chat/completions"
33
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
34
+ GROQ_MODEL = os.getenv("GROQ_MODEL", "llama-3.1-8b-instant")
35
+ GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"
36
  OLLAMA_HOST = os.getenv("OLLAMA_HOST", "")
37
  OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3.1:8b")
38
 
 
94
  except Exception as e:
95
  print(f"[Dashboard/LLM] Ollama error: {e}")
96
 
97
+ # 2. Groq (free, fast)
98
+ if GROQ_API_KEY:
99
+ try:
100
+ r = requests.post(GROQ_URL,
101
+ headers={"Authorization": f"Bearer {GROQ_API_KEY}",
102
+ "Content-Type": "application/json"},
103
+ json={"model": GROQ_MODEL,
104
+ "messages": [{"role": "user", "content": prompt}],
105
+ "max_tokens": 180,
106
+ "temperature": 0.7},
107
+ timeout=30)
108
+ print(f"[Dashboard/LLM] Groq status {r.status_code}")
109
+ if r.status_code == 200:
110
+ text = r.json()["choices"][0]["message"]["content"].strip()
111
+ if text:
112
+ return text, f"Groq/{GROQ_MODEL}"
113
+ else:
114
+ print(f"[Dashboard/LLM] Groq error: {r.text[:200]}")
115
+ except Exception as e:
116
+ print(f"[Dashboard/LLM] Groq exception: {e}")
117
+
118
+ # 3. HuggingFace router
119
  if not HF_TOKEN:
120
+ return None, "No LLM configured. Set GROQ_API_KEY (free at console.groq.com) or HF_TOKEN."
121
  print(f"[Dashboard/LLM] Calling HF router ({HF_MODEL})…")
122
  for attempt in range(3):
123
  try:
 
147
  err_code = r.json().get("error", {}).get("code", "")
148
  except Exception:
149
  err_code = ""
150
+ if r.status_code == 402 or "credit" in r.text.lower() or "depleted" in r.text.lower():
151
+ return None, ("HF credit balance depleted. Add GROQ_API_KEY secret instead "
152
+ "(free at console.groq.com — 14,400 req/day).")
153
  if err_code == "model_not_supported" or "provider" in r.text.lower():
154
  return None, (f"Model '{HF_MODEL}' not available on any enabled provider. "
155
  "Set HF_MODEL secret to a supported model (e.g. Qwen/Qwen2.5-7B-Instruct).")
 
165
 
166
  def _generate_and_broadcast():
167
  """Background thread: call LLM, publish result via SSE + Kafka."""
168
+ if not HF_TOKEN and not OLLAMA_HOST and not GROQ_API_KEY:
169
+ err = {"text": "⚠️ No LLM configured. Add GROQ_API_KEY secret (free at console.groq.com).", "source": "config", "timestamp": time.time()}
170
  broadcast_event("ai_insight", err)
171
  return
172
 
 
619
  def ai_debug():
620
  """Synchronous LLM test — returns raw API result for debugging."""
621
  result = {
622
+ "groq_key_set": bool(GROQ_API_KEY),
623
+ "groq_model": GROQ_MODEL,
624
+ "hf_token_set": bool(HF_TOKEN),
625
  "hf_token_prefix": HF_TOKEN[:8] + "…" if HF_TOKEN else None,
626
+ "hf_model": HF_MODEL,
627
+ "ollama_host": OLLAMA_HOST,
 
628
  }
629
+ # Test Groq if configured
630
+ if GROQ_API_KEY:
631
+ try:
632
+ r = requests.post(GROQ_URL,
633
+ headers={"Authorization": f"Bearer {GROQ_API_KEY}",
634
+ "Content-Type": "application/json"},
635
+ json={"model": GROQ_MODEL,
636
+ "messages": [{"role": "user", "content": "Reply with exactly: OK"}],
637
+ "max_tokens": 10},
638
+ timeout=15)
639
+ result["groq_status"] = r.status_code
640
+ result["groq_response"] = r.text[:200]
641
+ except Exception as e:
642
+ result["groq_exception"] = str(e)
643
+ return jsonify(result)
644
+ # Fall back to testing HF
645
  if not HF_TOKEN:
646
+ result["error"] = "No LLM configured. Add GROQ_API_KEY secret (free at console.groq.com)."
647
  return jsonify(result)
648
  try:
649
+ r = requests.post(HF_URL,
650
+ headers={"Authorization": f"Bearer {HF_TOKEN}",
651
+ "Content-Type": "application/json"},
652
+ json={"model": HF_MODEL,
653
+ "messages": [{"role": "user", "content": "Reply with exactly: OK"}],
654
+ "max_tokens": 10},
655
+ timeout=30)
656
+ result["hf_status"] = r.status_code
657
+ result["hf_response"] = r.text[:400]
 
 
658
  try:
659
  rj = r.json()
660
+ result["hf_response_json"] = rj
661
+ if r.status_code == 402 or "credit" in r.text.lower():
662
+ result["fix"] = "HF credit depleted. Add GROQ_API_KEY secret (free at console.groq.com)."
 
 
663
  except Exception:
664
  pass
665
  except Exception as e:
666
+ result["hf_exception"] = str(e)
667
  return jsonify(result)
668
 
669