Spaces:

RayMelius
/

soci2

Sleeping

RayMelius Claude Sonnet 4.6 commited on Feb 22

Commit

bd4679e

1 Parent(s): 1ed2ceb

Add LLM call counter, test button, and better HF error handling

- LLM pill now shows ×N call count each tick in green when active
- LLM popup has a 🔬 Test button — makes a real call and shows raw response
- GET /api/llm/test endpoint for diagnosing provider issues
- HF 401/403 triggers circuit breaker + nokey status (gated model / bad token)
- HF 503 reads estimated_time from response body and waits accordingly
- auth_error field exposed on /llm/test for actionable diagnostics

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (3) hide show

src/soci/api/routes.py +18 -0
src/soci/engine/llm.py +22 -4
web/index.html +26 -1

src/soci/api/routes.py CHANGED Viewed

@@ -283,6 +283,24 @@ async def get_llm_providers():
     return {"current": current, "providers": providers}
 @router.post("/llm/provider")
 async def set_llm_provider(req: SwitchProviderRequest):
     """Hot-swap the active LLM provider."""

     return {"current": current, "providers": providers}
+@router.get("/llm/test")
+async def test_llm():
+    """Make a minimal LLM call and return the raw response — for diagnosing provider issues."""
+    from soci.api.server import get_simulation
+    sim = get_simulation()
+    try:
+        raw = await sim.llm.complete(
+            system="You are a test assistant.",
+            user_message='Reply with exactly: {"ok": true}',
+            max_tokens=32,
+        )
+        return {"ok": bool(raw), "raw": raw, "provider": getattr(sim.llm, "provider", "?"),
+                "model": getattr(sim.llm, "default_model", "?"),
+                "auth_error": getattr(sim.llm, "_auth_error", "")}
+    except Exception as e:
+        return {"ok": False, "raw": "", "error": str(e)}
 @router.post("/llm/provider")
 async def set_llm_provider(req: SwitchProviderRequest):
     """Hot-swap the active LLM provider."""

src/soci/engine/llm.py CHANGED Viewed

@@ -843,6 +843,7 @@ class HFInferenceClient:
             timeout=120.0,  # HF can be slow under load
         )
         self._rate_limited_until: float = 0.0
     def _is_quota_exhausted(self) -> bool:
         return time.monotonic() < self._rate_limited_until
@@ -861,6 +862,8 @@ class HFInferenceClient:
     def llm_status(self) -> str:
         if not self.api_key:
             return "nokey"
         return "limited" if self._is_quota_exhausted() else "active"
     async def complete(
@@ -898,6 +901,7 @@ class HFInferenceClient:
                 return data["choices"][0]["message"]["content"]
             except httpx.HTTPStatusError as e:
                 status = e.response.status_code
                 if status == 429:
                     retry_after = e.response.headers.get("retry-after", "10")
                     try:
@@ -910,13 +914,27 @@ class HFInferenceClient:
                         return ""
                     logger.warning(f"HF rate limited, waiting {wait}s")
                     await asyncio.sleep(wait)
                 elif status in (503, 504):
-                    # Model loading / gateway timeout — back off and retry
-                    wait = 5.0 * (attempt + 1)
-                    logger.warning(f"HF model loading ({status}), waiting {wait}s")
                     await asyncio.sleep(wait)
                 else:
-                    logger.error(f"HF HTTP error: {status} {e.response.text[:200]}")
                     if attempt == self.max_retries - 1:
                         return ""
                     await asyncio.sleep(2)

             timeout=120.0,  # HF can be slow under load
         )
         self._rate_limited_until: float = 0.0
+        self._auth_error: str = ""
     def _is_quota_exhausted(self) -> bool:
         return time.monotonic() < self._rate_limited_until
     def llm_status(self) -> str:
         if not self.api_key:
             return "nokey"
+        if self._auth_error:
+            return "nokey"   # gated model / bad token
         return "limited" if self._is_quota_exhausted() else "active"
     async def complete(
                 return data["choices"][0]["message"]["content"]
             except httpx.HTTPStatusError as e:
                 status = e.response.status_code
+                body = e.response.text[:300]
                 if status == 429:
                     retry_after = e.response.headers.get("retry-after", "10")
                     try:
                         return ""
                     logger.warning(f"HF rate limited, waiting {wait}s")
                     await asyncio.sleep(wait)
+                elif status in (401, 403):
+                    # Auth failure or gated model — disable for a long window
+                    self._rate_limited_until = time.monotonic() + 3600
+                    self._auth_error = body
+                    logger.error(
+                        f"HF auth error ({status}): {body} — "
+                        "Check HF_TOKEN and accept model license at huggingface.co"
+                    )
+                    return ""
                 elif status in (503, 504):
+                    # Model loading — read estimated_time from body if available
+                    try:
+                        import json as _json
+                        estimated = _json.loads(e.response.text).get("estimated_time", 0)
+                        wait = max(float(estimated), 5.0 * (attempt + 1))
+                    except Exception:
+                        wait = 5.0 * (attempt + 1)
+                    logger.warning(f"HF model loading ({status}), waiting {wait:.0f}s")
                     await asyncio.sleep(wait)
                 else:
+                    logger.error(f"HF HTTP error: {status} {body}")
                     if attempt == self.max_retries - 1:
                         return ""
                     await asyncio.sleep(2)

web/index.html CHANGED Viewed

@@ -2908,8 +2908,10 @@ function processStateData(data) {
     else if (hasCalls)   { dotColor = '#4ecca3'; statusTip = `${data.llm_calls_last_tick} calls this tick`; }
     else                 { dotColor = '#f0c040'; statusTip = 'idle — no calls needed'; }
     const el = document.getElementById('llm-model');
-    el.innerHTML = `${icon} ${label} <span style="display:inline-block;width:7px;height:7px;border-radius:50%;background:${dotColor};vertical-align:middle;margin-left:2px"></span>`;
     el.title = `${data.llm_provider}: ${data.llm_model} — ${statusTip}`;
   }
@@ -3391,6 +3393,29 @@ document.getElementById('llm-model').addEventListener('click', async (e) => {
       });
       popup.appendChild(row);
     }
     popup.style.display = 'block';
     _llmPopupOpen = true;
   } catch { showToast('Could not fetch providers', 'event'); }

     else if (hasCalls)   { dotColor = '#4ecca3'; statusTip = `${data.llm_calls_last_tick} calls this tick`; }
     else                 { dotColor = '#f0c040'; statusTip = 'idle — no calls needed'; }
+    const calls = data.llm_calls_last_tick || 0;
+    const callBadge = calls > 0 ? ` <span style="font-size:10px;color:#4ecca3;opacity:0.85">×${calls}</span>` : '';
     const el = document.getElementById('llm-model');
+    el.innerHTML = `${icon} ${label}${callBadge} <span style="display:inline-block;width:7px;height:7px;border-radius:50%;background:${dotColor};vertical-align:middle;margin-left:2px"></span>`;
     el.title = `${data.llm_provider}: ${data.llm_model} — ${statusTip}`;
   }
       });
       popup.appendChild(row);
     }
+    // Test button
+    const sep = document.createElement('div');
+    sep.style.cssText = 'border-top:1px solid #0f3460;margin:4px 0';
+    popup.appendChild(sep);
+    const testRow = document.createElement('div');
+    testRow.className = 'llm-opt';
+    testRow.innerHTML = `<span class="llm-check"></span><span style="font-size:15px">🔬</span><span>Test current LLM…</span>`;
+    testRow.addEventListener('click', async (ev) => {
+      ev.stopPropagation();
+      popup.style.display = 'none'; _llmPopupOpen = false;
+      showToast('Testing LLM…', 'event');
+      try {
+        const r = await fetch(`${API_BASE}/llm/test`);
+        const d = await r.json();
+        if (d.ok) {
+          showToast(`✔ LLM OK — "${d.raw.slice(0,60)}"`, 'conv');
+        } else {
+          const msg = d.auth_error ? `Auth error: ${d.auth_error.slice(0,80)}` : (d.error || d.raw || 'empty response');
+          showToast(`✘ LLM failed: ${msg}`, 'event');
+        }
+      } catch (err) { showToast('Test request failed', 'event'); }
+    });
+    popup.appendChild(testRow);
     popup.style.display = 'block';
     _llmPopupOpen = true;
   } catch { showToast('Could not fetch providers', 'event'); }