Add LLM call counter, test button, and better HF error handling
Browse files- LLM pill now shows ΓN call count each tick in green when active
- LLM popup has a π¬ Test button β makes a real call and shows raw response
- GET /api/llm/test endpoint for diagnosing provider issues
- HF 401/403 triggers circuit breaker + nokey status (gated model / bad token)
- HF 503 reads estimated_time from response body and waits accordingly
- auth_error field exposed on /llm/test for actionable diagnostics
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- src/soci/api/routes.py +18 -0
- src/soci/engine/llm.py +22 -4
- web/index.html +26 -1
src/soci/api/routes.py
CHANGED
|
@@ -283,6 +283,24 @@ async def get_llm_providers():
|
|
| 283 |
return {"current": current, "providers": providers}
|
| 284 |
|
| 285 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
@router.post("/llm/provider")
|
| 287 |
async def set_llm_provider(req: SwitchProviderRequest):
|
| 288 |
"""Hot-swap the active LLM provider."""
|
|
|
|
| 283 |
return {"current": current, "providers": providers}
|
| 284 |
|
| 285 |
|
| 286 |
+
@router.get("/llm/test")
|
| 287 |
+
async def test_llm():
|
| 288 |
+
"""Make a minimal LLM call and return the raw response β for diagnosing provider issues."""
|
| 289 |
+
from soci.api.server import get_simulation
|
| 290 |
+
sim = get_simulation()
|
| 291 |
+
try:
|
| 292 |
+
raw = await sim.llm.complete(
|
| 293 |
+
system="You are a test assistant.",
|
| 294 |
+
user_message='Reply with exactly: {"ok": true}',
|
| 295 |
+
max_tokens=32,
|
| 296 |
+
)
|
| 297 |
+
return {"ok": bool(raw), "raw": raw, "provider": getattr(sim.llm, "provider", "?"),
|
| 298 |
+
"model": getattr(sim.llm, "default_model", "?"),
|
| 299 |
+
"auth_error": getattr(sim.llm, "_auth_error", "")}
|
| 300 |
+
except Exception as e:
|
| 301 |
+
return {"ok": False, "raw": "", "error": str(e)}
|
| 302 |
+
|
| 303 |
+
|
| 304 |
@router.post("/llm/provider")
|
| 305 |
async def set_llm_provider(req: SwitchProviderRequest):
|
| 306 |
"""Hot-swap the active LLM provider."""
|
src/soci/engine/llm.py
CHANGED
|
@@ -843,6 +843,7 @@ class HFInferenceClient:
|
|
| 843 |
timeout=120.0, # HF can be slow under load
|
| 844 |
)
|
| 845 |
self._rate_limited_until: float = 0.0
|
|
|
|
| 846 |
|
| 847 |
def _is_quota_exhausted(self) -> bool:
|
| 848 |
return time.monotonic() < self._rate_limited_until
|
|
@@ -861,6 +862,8 @@ class HFInferenceClient:
|
|
| 861 |
def llm_status(self) -> str:
|
| 862 |
if not self.api_key:
|
| 863 |
return "nokey"
|
|
|
|
|
|
|
| 864 |
return "limited" if self._is_quota_exhausted() else "active"
|
| 865 |
|
| 866 |
async def complete(
|
|
@@ -898,6 +901,7 @@ class HFInferenceClient:
|
|
| 898 |
return data["choices"][0]["message"]["content"]
|
| 899 |
except httpx.HTTPStatusError as e:
|
| 900 |
status = e.response.status_code
|
|
|
|
| 901 |
if status == 429:
|
| 902 |
retry_after = e.response.headers.get("retry-after", "10")
|
| 903 |
try:
|
|
@@ -910,13 +914,27 @@ class HFInferenceClient:
|
|
| 910 |
return ""
|
| 911 |
logger.warning(f"HF rate limited, waiting {wait}s")
|
| 912 |
await asyncio.sleep(wait)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 913 |
elif status in (503, 504):
|
| 914 |
-
# Model loading
|
| 915 |
-
|
| 916 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 917 |
await asyncio.sleep(wait)
|
| 918 |
else:
|
| 919 |
-
logger.error(f"HF HTTP error: {status} {
|
| 920 |
if attempt == self.max_retries - 1:
|
| 921 |
return ""
|
| 922 |
await asyncio.sleep(2)
|
|
|
|
| 843 |
timeout=120.0, # HF can be slow under load
|
| 844 |
)
|
| 845 |
self._rate_limited_until: float = 0.0
|
| 846 |
+
self._auth_error: str = ""
|
| 847 |
|
| 848 |
def _is_quota_exhausted(self) -> bool:
|
| 849 |
return time.monotonic() < self._rate_limited_until
|
|
|
|
| 862 |
def llm_status(self) -> str:
|
| 863 |
if not self.api_key:
|
| 864 |
return "nokey"
|
| 865 |
+
if self._auth_error:
|
| 866 |
+
return "nokey" # gated model / bad token
|
| 867 |
return "limited" if self._is_quota_exhausted() else "active"
|
| 868 |
|
| 869 |
async def complete(
|
|
|
|
| 901 |
return data["choices"][0]["message"]["content"]
|
| 902 |
except httpx.HTTPStatusError as e:
|
| 903 |
status = e.response.status_code
|
| 904 |
+
body = e.response.text[:300]
|
| 905 |
if status == 429:
|
| 906 |
retry_after = e.response.headers.get("retry-after", "10")
|
| 907 |
try:
|
|
|
|
| 914 |
return ""
|
| 915 |
logger.warning(f"HF rate limited, waiting {wait}s")
|
| 916 |
await asyncio.sleep(wait)
|
| 917 |
+
elif status in (401, 403):
|
| 918 |
+
# Auth failure or gated model β disable for a long window
|
| 919 |
+
self._rate_limited_until = time.monotonic() + 3600
|
| 920 |
+
self._auth_error = body
|
| 921 |
+
logger.error(
|
| 922 |
+
f"HF auth error ({status}): {body} β "
|
| 923 |
+
"Check HF_TOKEN and accept model license at huggingface.co"
|
| 924 |
+
)
|
| 925 |
+
return ""
|
| 926 |
elif status in (503, 504):
|
| 927 |
+
# Model loading β read estimated_time from body if available
|
| 928 |
+
try:
|
| 929 |
+
import json as _json
|
| 930 |
+
estimated = _json.loads(e.response.text).get("estimated_time", 0)
|
| 931 |
+
wait = max(float(estimated), 5.0 * (attempt + 1))
|
| 932 |
+
except Exception:
|
| 933 |
+
wait = 5.0 * (attempt + 1)
|
| 934 |
+
logger.warning(f"HF model loading ({status}), waiting {wait:.0f}s")
|
| 935 |
await asyncio.sleep(wait)
|
| 936 |
else:
|
| 937 |
+
logger.error(f"HF HTTP error: {status} {body}")
|
| 938 |
if attempt == self.max_retries - 1:
|
| 939 |
return ""
|
| 940 |
await asyncio.sleep(2)
|
web/index.html
CHANGED
|
@@ -2908,8 +2908,10 @@ function processStateData(data) {
|
|
| 2908 |
else if (hasCalls) { dotColor = '#4ecca3'; statusTip = `${data.llm_calls_last_tick} calls this tick`; }
|
| 2909 |
else { dotColor = '#f0c040'; statusTip = 'idle β no calls needed'; }
|
| 2910 |
|
|
|
|
|
|
|
| 2911 |
const el = document.getElementById('llm-model');
|
| 2912 |
-
el.innerHTML = `${icon} ${label} <span style="display:inline-block;width:7px;height:7px;border-radius:50%;background:${dotColor};vertical-align:middle;margin-left:2px"></span>`;
|
| 2913 |
el.title = `${data.llm_provider}: ${data.llm_model} β ${statusTip}`;
|
| 2914 |
}
|
| 2915 |
|
|
@@ -3391,6 +3393,29 @@ document.getElementById('llm-model').addEventListener('click', async (e) => {
|
|
| 3391 |
});
|
| 3392 |
popup.appendChild(row);
|
| 3393 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3394 |
popup.style.display = 'block';
|
| 3395 |
_llmPopupOpen = true;
|
| 3396 |
} catch { showToast('Could not fetch providers', 'event'); }
|
|
|
|
| 2908 |
else if (hasCalls) { dotColor = '#4ecca3'; statusTip = `${data.llm_calls_last_tick} calls this tick`; }
|
| 2909 |
else { dotColor = '#f0c040'; statusTip = 'idle β no calls needed'; }
|
| 2910 |
|
| 2911 |
+
const calls = data.llm_calls_last_tick || 0;
|
| 2912 |
+
const callBadge = calls > 0 ? ` <span style="font-size:10px;color:#4ecca3;opacity:0.85">Γ${calls}</span>` : '';
|
| 2913 |
const el = document.getElementById('llm-model');
|
| 2914 |
+
el.innerHTML = `${icon} ${label}${callBadge} <span style="display:inline-block;width:7px;height:7px;border-radius:50%;background:${dotColor};vertical-align:middle;margin-left:2px"></span>`;
|
| 2915 |
el.title = `${data.llm_provider}: ${data.llm_model} β ${statusTip}`;
|
| 2916 |
}
|
| 2917 |
|
|
|
|
| 3393 |
});
|
| 3394 |
popup.appendChild(row);
|
| 3395 |
}
|
| 3396 |
+
// Test button
|
| 3397 |
+
const sep = document.createElement('div');
|
| 3398 |
+
sep.style.cssText = 'border-top:1px solid #0f3460;margin:4px 0';
|
| 3399 |
+
popup.appendChild(sep);
|
| 3400 |
+
const testRow = document.createElement('div');
|
| 3401 |
+
testRow.className = 'llm-opt';
|
| 3402 |
+
testRow.innerHTML = `<span class="llm-check"></span><span style="font-size:15px">π¬</span><span>Test current LLMβ¦</span>`;
|
| 3403 |
+
testRow.addEventListener('click', async (ev) => {
|
| 3404 |
+
ev.stopPropagation();
|
| 3405 |
+
popup.style.display = 'none'; _llmPopupOpen = false;
|
| 3406 |
+
showToast('Testing LLMβ¦', 'event');
|
| 3407 |
+
try {
|
| 3408 |
+
const r = await fetch(`${API_BASE}/llm/test`);
|
| 3409 |
+
const d = await r.json();
|
| 3410 |
+
if (d.ok) {
|
| 3411 |
+
showToast(`β LLM OK β "${d.raw.slice(0,60)}"`, 'conv');
|
| 3412 |
+
} else {
|
| 3413 |
+
const msg = d.auth_error ? `Auth error: ${d.auth_error.slice(0,80)}` : (d.error || d.raw || 'empty response');
|
| 3414 |
+
showToast(`β LLM failed: ${msg}`, 'event');
|
| 3415 |
+
}
|
| 3416 |
+
} catch (err) { showToast('Test request failed', 'event'); }
|
| 3417 |
+
});
|
| 3418 |
+
popup.appendChild(testRow);
|
| 3419 |
popup.style.display = 'block';
|
| 3420 |
_llmPopupOpen = true;
|
| 3421 |
} catch { showToast('Could not fetch providers', 'event'); }
|