Add Groq as primary LLM provider (free, 14k req/day)
Browse filesHF credit balance is depleted (402). Groq is genuinely free with
no credit system. Priority order: Ollama → Groq → HuggingFace.
- Add GROQ_API_KEY / GROQ_MODEL (llama-3.1-8b-instant) config
- Detect HF 402 credit-depleted error with actionable message
- Update /ai/debug to test Groq first when configured
- Update no-LLM warning to mention Groq
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- dashboard/dashboard.py +66 -26
dashboard/dashboard.py
CHANGED
|
@@ -30,6 +30,9 @@ FRONTEND_URL = os.getenv("FRONTEND_URL", "")
|
|
| 30 |
HF_TOKEN = os.getenv("HF_TOKEN", "")
|
| 31 |
HF_MODEL = os.getenv("HF_MODEL", "Qwen/Qwen2.5-7B-Instruct")
|
| 32 |
HF_URL = "https://router.huggingface.co/v1/chat/completions"
|
|
|
|
|
|
|
|
|
|
| 33 |
OLLAMA_HOST = os.getenv("OLLAMA_HOST", "")
|
| 34 |
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3.1:8b")
|
| 35 |
|
|
@@ -91,9 +94,30 @@ def _call_llm(prompt):
|
|
| 91 |
except Exception as e:
|
| 92 |
print(f"[Dashboard/LLM] Ollama error: {e}")
|
| 93 |
|
| 94 |
-
# 2.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
if not HF_TOKEN:
|
| 96 |
-
return None, "
|
| 97 |
print(f"[Dashboard/LLM] Calling HF router ({HF_MODEL})…")
|
| 98 |
for attempt in range(3):
|
| 99 |
try:
|
|
@@ -123,6 +147,9 @@ def _call_llm(prompt):
|
|
| 123 |
err_code = r.json().get("error", {}).get("code", "")
|
| 124 |
except Exception:
|
| 125 |
err_code = ""
|
|
|
|
|
|
|
|
|
|
| 126 |
if err_code == "model_not_supported" or "provider" in r.text.lower():
|
| 127 |
return None, (f"Model '{HF_MODEL}' not available on any enabled provider. "
|
| 128 |
"Set HF_MODEL secret to a supported model (e.g. Qwen/Qwen2.5-7B-Instruct).")
|
|
@@ -138,8 +165,8 @@ def _call_llm(prompt):
|
|
| 138 |
|
| 139 |
def _generate_and_broadcast():
|
| 140 |
"""Background thread: call LLM, publish result via SSE + Kafka."""
|
| 141 |
-
if not HF_TOKEN and not OLLAMA_HOST:
|
| 142 |
-
err = {"text": "⚠️ No LLM configured.
|
| 143 |
broadcast_event("ai_insight", err)
|
| 144 |
return
|
| 145 |
|
|
@@ -592,38 +619,51 @@ def trigger_ai_insight():
|
|
| 592 |
def ai_debug():
|
| 593 |
"""Synchronous LLM test — returns raw API result for debugging."""
|
| 594 |
result = {
|
| 595 |
-
"
|
|
|
|
|
|
|
| 596 |
"hf_token_prefix": HF_TOKEN[:8] + "…" if HF_TOKEN else None,
|
| 597 |
-
"hf_model":
|
| 598 |
-
"
|
| 599 |
-
"ollama_host": OLLAMA_HOST,
|
| 600 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 601 |
if not HF_TOKEN:
|
| 602 |
-
result["error"] = "
|
| 603 |
return jsonify(result)
|
| 604 |
try:
|
| 605 |
-
r = requests.post(
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
result["http_status"] = r.status_code
|
| 615 |
-
result["response_body"] = r.text[:500]
|
| 616 |
try:
|
| 617 |
rj = r.json()
|
| 618 |
-
result["
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
result["fix"] = (f"Model '{HF_MODEL}' not available on any enabled provider. "
|
| 622 |
-
"Try a different model or check huggingface.co/settings/inference-providers.")
|
| 623 |
except Exception:
|
| 624 |
pass
|
| 625 |
except Exception as e:
|
| 626 |
-
result["
|
| 627 |
return jsonify(result)
|
| 628 |
|
| 629 |
|
|
|
|
| 30 |
HF_TOKEN = os.getenv("HF_TOKEN", "")
|
| 31 |
HF_MODEL = os.getenv("HF_MODEL", "Qwen/Qwen2.5-7B-Instruct")
|
| 32 |
HF_URL = "https://router.huggingface.co/v1/chat/completions"
|
| 33 |
+
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
|
| 34 |
+
GROQ_MODEL = os.getenv("GROQ_MODEL", "llama-3.1-8b-instant")
|
| 35 |
+
GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"
|
| 36 |
OLLAMA_HOST = os.getenv("OLLAMA_HOST", "")
|
| 37 |
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3.1:8b")
|
| 38 |
|
|
|
|
| 94 |
except Exception as e:
|
| 95 |
print(f"[Dashboard/LLM] Ollama error: {e}")
|
| 96 |
|
| 97 |
+
# 2. Groq (free, fast)
|
| 98 |
+
if GROQ_API_KEY:
|
| 99 |
+
try:
|
| 100 |
+
r = requests.post(GROQ_URL,
|
| 101 |
+
headers={"Authorization": f"Bearer {GROQ_API_KEY}",
|
| 102 |
+
"Content-Type": "application/json"},
|
| 103 |
+
json={"model": GROQ_MODEL,
|
| 104 |
+
"messages": [{"role": "user", "content": prompt}],
|
| 105 |
+
"max_tokens": 180,
|
| 106 |
+
"temperature": 0.7},
|
| 107 |
+
timeout=30)
|
| 108 |
+
print(f"[Dashboard/LLM] Groq status {r.status_code}")
|
| 109 |
+
if r.status_code == 200:
|
| 110 |
+
text = r.json()["choices"][0]["message"]["content"].strip()
|
| 111 |
+
if text:
|
| 112 |
+
return text, f"Groq/{GROQ_MODEL}"
|
| 113 |
+
else:
|
| 114 |
+
print(f"[Dashboard/LLM] Groq error: {r.text[:200]}")
|
| 115 |
+
except Exception as e:
|
| 116 |
+
print(f"[Dashboard/LLM] Groq exception: {e}")
|
| 117 |
+
|
| 118 |
+
# 3. HuggingFace router
|
| 119 |
if not HF_TOKEN:
|
| 120 |
+
return None, "No LLM configured. Set GROQ_API_KEY (free at console.groq.com) or HF_TOKEN."
|
| 121 |
print(f"[Dashboard/LLM] Calling HF router ({HF_MODEL})…")
|
| 122 |
for attempt in range(3):
|
| 123 |
try:
|
|
|
|
| 147 |
err_code = r.json().get("error", {}).get("code", "")
|
| 148 |
except Exception:
|
| 149 |
err_code = ""
|
| 150 |
+
if r.status_code == 402 or "credit" in r.text.lower() or "depleted" in r.text.lower():
|
| 151 |
+
return None, ("HF credit balance depleted. Add GROQ_API_KEY secret instead "
|
| 152 |
+
"(free at console.groq.com — 14,400 req/day).")
|
| 153 |
if err_code == "model_not_supported" or "provider" in r.text.lower():
|
| 154 |
return None, (f"Model '{HF_MODEL}' not available on any enabled provider. "
|
| 155 |
"Set HF_MODEL secret to a supported model (e.g. Qwen/Qwen2.5-7B-Instruct).")
|
|
|
|
| 165 |
|
| 166 |
def _generate_and_broadcast():
|
| 167 |
"""Background thread: call LLM, publish result via SSE + Kafka."""
|
| 168 |
+
if not HF_TOKEN and not OLLAMA_HOST and not GROQ_API_KEY:
|
| 169 |
+
err = {"text": "⚠️ No LLM configured. Add GROQ_API_KEY secret (free at console.groq.com).", "source": "config", "timestamp": time.time()}
|
| 170 |
broadcast_event("ai_insight", err)
|
| 171 |
return
|
| 172 |
|
|
|
|
| 619 |
def ai_debug():
|
| 620 |
"""Synchronous LLM test — returns raw API result for debugging."""
|
| 621 |
result = {
|
| 622 |
+
"groq_key_set": bool(GROQ_API_KEY),
|
| 623 |
+
"groq_model": GROQ_MODEL,
|
| 624 |
+
"hf_token_set": bool(HF_TOKEN),
|
| 625 |
"hf_token_prefix": HF_TOKEN[:8] + "…" if HF_TOKEN else None,
|
| 626 |
+
"hf_model": HF_MODEL,
|
| 627 |
+
"ollama_host": OLLAMA_HOST,
|
|
|
|
| 628 |
}
|
| 629 |
+
# Test Groq if configured
|
| 630 |
+
if GROQ_API_KEY:
|
| 631 |
+
try:
|
| 632 |
+
r = requests.post(GROQ_URL,
|
| 633 |
+
headers={"Authorization": f"Bearer {GROQ_API_KEY}",
|
| 634 |
+
"Content-Type": "application/json"},
|
| 635 |
+
json={"model": GROQ_MODEL,
|
| 636 |
+
"messages": [{"role": "user", "content": "Reply with exactly: OK"}],
|
| 637 |
+
"max_tokens": 10},
|
| 638 |
+
timeout=15)
|
| 639 |
+
result["groq_status"] = r.status_code
|
| 640 |
+
result["groq_response"] = r.text[:200]
|
| 641 |
+
except Exception as e:
|
| 642 |
+
result["groq_exception"] = str(e)
|
| 643 |
+
return jsonify(result)
|
| 644 |
+
# Fall back to testing HF
|
| 645 |
if not HF_TOKEN:
|
| 646 |
+
result["error"] = "No LLM configured. Add GROQ_API_KEY secret (free at console.groq.com)."
|
| 647 |
return jsonify(result)
|
| 648 |
try:
|
| 649 |
+
r = requests.post(HF_URL,
|
| 650 |
+
headers={"Authorization": f"Bearer {HF_TOKEN}",
|
| 651 |
+
"Content-Type": "application/json"},
|
| 652 |
+
json={"model": HF_MODEL,
|
| 653 |
+
"messages": [{"role": "user", "content": "Reply with exactly: OK"}],
|
| 654 |
+
"max_tokens": 10},
|
| 655 |
+
timeout=30)
|
| 656 |
+
result["hf_status"] = r.status_code
|
| 657 |
+
result["hf_response"] = r.text[:400]
|
|
|
|
|
|
|
| 658 |
try:
|
| 659 |
rj = r.json()
|
| 660 |
+
result["hf_response_json"] = rj
|
| 661 |
+
if r.status_code == 402 or "credit" in r.text.lower():
|
| 662 |
+
result["fix"] = "HF credit depleted. Add GROQ_API_KEY secret (free at console.groq.com)."
|
|
|
|
|
|
|
| 663 |
except Exception:
|
| 664 |
pass
|
| 665 |
except Exception as e:
|
| 666 |
+
result["hf_exception"] = str(e)
|
| 667 |
return jsonify(result)
|
| 668 |
|
| 669 |
|