RayMelius Claude Sonnet 4.6 commited on
Commit
bd4679e
Β·
1 Parent(s): 1ed2ceb

Add LLM call counter, test button, and better HF error handling

Browse files

- LLM pill now shows Γ—N call count each tick in green when active
- LLM popup has a πŸ”¬ Test button β€” makes a real call and shows raw response
- GET /api/llm/test endpoint for diagnosing provider issues
- HF 401/403 triggers circuit breaker + nokey status (gated model / bad token)
- HF 503 reads estimated_time from response body and waits accordingly
- auth_error field exposed on /llm/test for actionable diagnostics

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (3) hide show
  1. src/soci/api/routes.py +18 -0
  2. src/soci/engine/llm.py +22 -4
  3. web/index.html +26 -1
src/soci/api/routes.py CHANGED
@@ -283,6 +283,24 @@ async def get_llm_providers():
283
  return {"current": current, "providers": providers}
284
 
285
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  @router.post("/llm/provider")
287
  async def set_llm_provider(req: SwitchProviderRequest):
288
  """Hot-swap the active LLM provider."""
 
283
  return {"current": current, "providers": providers}
284
 
285
 
286
+ @router.get("/llm/test")
287
+ async def test_llm():
288
+ """Make a minimal LLM call and return the raw response β€” for diagnosing provider issues."""
289
+ from soci.api.server import get_simulation
290
+ sim = get_simulation()
291
+ try:
292
+ raw = await sim.llm.complete(
293
+ system="You are a test assistant.",
294
+ user_message='Reply with exactly: {"ok": true}',
295
+ max_tokens=32,
296
+ )
297
+ return {"ok": bool(raw), "raw": raw, "provider": getattr(sim.llm, "provider", "?"),
298
+ "model": getattr(sim.llm, "default_model", "?"),
299
+ "auth_error": getattr(sim.llm, "_auth_error", "")}
300
+ except Exception as e:
301
+ return {"ok": False, "raw": "", "error": str(e)}
302
+
303
+
304
  @router.post("/llm/provider")
305
  async def set_llm_provider(req: SwitchProviderRequest):
306
  """Hot-swap the active LLM provider."""
src/soci/engine/llm.py CHANGED
@@ -843,6 +843,7 @@ class HFInferenceClient:
843
  timeout=120.0, # HF can be slow under load
844
  )
845
  self._rate_limited_until: float = 0.0
 
846
 
847
  def _is_quota_exhausted(self) -> bool:
848
  return time.monotonic() < self._rate_limited_until
@@ -861,6 +862,8 @@ class HFInferenceClient:
861
  def llm_status(self) -> str:
862
  if not self.api_key:
863
  return "nokey"
 
 
864
  return "limited" if self._is_quota_exhausted() else "active"
865
 
866
  async def complete(
@@ -898,6 +901,7 @@ class HFInferenceClient:
898
  return data["choices"][0]["message"]["content"]
899
  except httpx.HTTPStatusError as e:
900
  status = e.response.status_code
 
901
  if status == 429:
902
  retry_after = e.response.headers.get("retry-after", "10")
903
  try:
@@ -910,13 +914,27 @@ class HFInferenceClient:
910
  return ""
911
  logger.warning(f"HF rate limited, waiting {wait}s")
912
  await asyncio.sleep(wait)
 
 
 
 
 
 
 
 
 
913
  elif status in (503, 504):
914
- # Model loading / gateway timeout β€” back off and retry
915
- wait = 5.0 * (attempt + 1)
916
- logger.warning(f"HF model loading ({status}), waiting {wait}s")
 
 
 
 
 
917
  await asyncio.sleep(wait)
918
  else:
919
- logger.error(f"HF HTTP error: {status} {e.response.text[:200]}")
920
  if attempt == self.max_retries - 1:
921
  return ""
922
  await asyncio.sleep(2)
 
843
  timeout=120.0, # HF can be slow under load
844
  )
845
  self._rate_limited_until: float = 0.0
846
+ self._auth_error: str = ""
847
 
848
  def _is_quota_exhausted(self) -> bool:
849
  return time.monotonic() < self._rate_limited_until
 
862
  def llm_status(self) -> str:
863
  if not self.api_key:
864
  return "nokey"
865
+ if self._auth_error:
866
+ return "nokey" # gated model / bad token
867
  return "limited" if self._is_quota_exhausted() else "active"
868
 
869
  async def complete(
 
901
  return data["choices"][0]["message"]["content"]
902
  except httpx.HTTPStatusError as e:
903
  status = e.response.status_code
904
+ body = e.response.text[:300]
905
  if status == 429:
906
  retry_after = e.response.headers.get("retry-after", "10")
907
  try:
 
914
  return ""
915
  logger.warning(f"HF rate limited, waiting {wait}s")
916
  await asyncio.sleep(wait)
917
+ elif status in (401, 403):
918
+ # Auth failure or gated model β€” disable for a long window
919
+ self._rate_limited_until = time.monotonic() + 3600
920
+ self._auth_error = body
921
+ logger.error(
922
+ f"HF auth error ({status}): {body} β€” "
923
+ "Check HF_TOKEN and accept model license at huggingface.co"
924
+ )
925
+ return ""
926
  elif status in (503, 504):
927
+ # Model loading β€” read estimated_time from body if available
928
+ try:
929
+ import json as _json
930
+ estimated = _json.loads(e.response.text).get("estimated_time", 0)
931
+ wait = max(float(estimated), 5.0 * (attempt + 1))
932
+ except Exception:
933
+ wait = 5.0 * (attempt + 1)
934
+ logger.warning(f"HF model loading ({status}), waiting {wait:.0f}s")
935
  await asyncio.sleep(wait)
936
  else:
937
+ logger.error(f"HF HTTP error: {status} {body}")
938
  if attempt == self.max_retries - 1:
939
  return ""
940
  await asyncio.sleep(2)
web/index.html CHANGED
@@ -2908,8 +2908,10 @@ function processStateData(data) {
2908
  else if (hasCalls) { dotColor = '#4ecca3'; statusTip = `${data.llm_calls_last_tick} calls this tick`; }
2909
  else { dotColor = '#f0c040'; statusTip = 'idle β€” no calls needed'; }
2910
 
 
 
2911
  const el = document.getElementById('llm-model');
2912
- el.innerHTML = `${icon} ${label} <span style="display:inline-block;width:7px;height:7px;border-radius:50%;background:${dotColor};vertical-align:middle;margin-left:2px"></span>`;
2913
  el.title = `${data.llm_provider}: ${data.llm_model} β€” ${statusTip}`;
2914
  }
2915
 
@@ -3391,6 +3393,29 @@ document.getElementById('llm-model').addEventListener('click', async (e) => {
3391
  });
3392
  popup.appendChild(row);
3393
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3394
  popup.style.display = 'block';
3395
  _llmPopupOpen = true;
3396
  } catch { showToast('Could not fetch providers', 'event'); }
 
2908
  else if (hasCalls) { dotColor = '#4ecca3'; statusTip = `${data.llm_calls_last_tick} calls this tick`; }
2909
  else { dotColor = '#f0c040'; statusTip = 'idle β€” no calls needed'; }
2910
 
2911
+ const calls = data.llm_calls_last_tick || 0;
2912
+ const callBadge = calls > 0 ? ` <span style="font-size:10px;color:#4ecca3;opacity:0.85">Γ—${calls}</span>` : '';
2913
  const el = document.getElementById('llm-model');
2914
+ el.innerHTML = `${icon} ${label}${callBadge} <span style="display:inline-block;width:7px;height:7px;border-radius:50%;background:${dotColor};vertical-align:middle;margin-left:2px"></span>`;
2915
  el.title = `${data.llm_provider}: ${data.llm_model} β€” ${statusTip}`;
2916
  }
2917
 
 
3393
  });
3394
  popup.appendChild(row);
3395
  }
3396
+ // Test button
3397
+ const sep = document.createElement('div');
3398
+ sep.style.cssText = 'border-top:1px solid #0f3460;margin:4px 0';
3399
+ popup.appendChild(sep);
3400
+ const testRow = document.createElement('div');
3401
+ testRow.className = 'llm-opt';
3402
+ testRow.innerHTML = `<span class="llm-check"></span><span style="font-size:15px">πŸ”¬</span><span>Test current LLM…</span>`;
3403
+ testRow.addEventListener('click', async (ev) => {
3404
+ ev.stopPropagation();
3405
+ popup.style.display = 'none'; _llmPopupOpen = false;
3406
+ showToast('Testing LLM…', 'event');
3407
+ try {
3408
+ const r = await fetch(`${API_BASE}/llm/test`);
3409
+ const d = await r.json();
3410
+ if (d.ok) {
3411
+ showToast(`βœ” LLM OK β€” "${d.raw.slice(0,60)}"`, 'conv');
3412
+ } else {
3413
+ const msg = d.auth_error ? `Auth error: ${d.auth_error.slice(0,80)}` : (d.error || d.raw || 'empty response');
3414
+ showToast(`✘ LLM failed: ${msg}`, 'event');
3415
+ }
3416
+ } catch (err) { showToast('Test request failed', 'event'); }
3417
+ });
3418
+ popup.appendChild(testRow);
3419
  popup.style.display = 'block';
3420
  _llmPopupOpen = true;
3421
  } catch { showToast('Could not fetch providers', 'event'); }