Fix LLM switcher: quota status detection, NN→100%, slider sync, label cleanup
Browse files- Detect actual quota exhaustion via llm_status (429 errors), not just
in-memory counter which resets on server restart
- NN auto-sets probability to 100% when selected
- Remove "local" from NN label
- Popup probability slider initializes from current probability, not hardcoded 20%
- Fix pqForCalc to include rpm field for accurate runtime estimates
- Quota badge shows "exhausted" when API is actually rate-limited
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- src/soci/api/routes.py +9 -1
- web/index.html +19 -12
src/soci/api/routes.py
CHANGED
|
@@ -285,7 +285,7 @@ async def get_llm_providers():
|
|
| 285 |
current_model = getattr(get_simulation().llm, "default_model", "")
|
| 286 |
providers = []
|
| 287 |
# NN is always available — local ONNX model, no API key needed
|
| 288 |
-
providers.append({"id": "nn", "label": "Soci Agent NN
|
| 289 |
if os.environ.get("ANTHROPIC_API_KEY"):
|
| 290 |
providers.append({"id": "claude", "label": "Claude Haiku", "icon": "◆", "model": ""})
|
| 291 |
if os.environ.get("GROQ_API_KEY"):
|
|
@@ -391,6 +391,14 @@ async def get_llm_quota():
|
|
| 391 |
providers_quota[pid]["rpm"] = rpm
|
| 392 |
providers_quota[pid]["max_calls_per_hour"] = rpm * 60
|
| 393 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 394 |
return {
|
| 395 |
"provider": provider,
|
| 396 |
"daily_limit": cur["daily_limit"],
|
|
|
|
| 285 |
current_model = getattr(get_simulation().llm, "default_model", "")
|
| 286 |
providers = []
|
| 287 |
# NN is always available — local ONNX model, no API key needed
|
| 288 |
+
providers.append({"id": "nn", "label": "Soci Agent NN", "icon": "🧠", "model": ""})
|
| 289 |
if os.environ.get("ANTHROPIC_API_KEY"):
|
| 290 |
providers.append({"id": "claude", "label": "Claude Haiku", "icon": "◆", "model": ""})
|
| 291 |
if os.environ.get("GROQ_API_KEY"):
|
|
|
|
| 391 |
providers_quota[pid]["rpm"] = rpm
|
| 392 |
providers_quota[pid]["max_calls_per_hour"] = rpm * 60
|
| 393 |
|
| 394 |
+
# Expose rate-limit status (detects actual exhaustion from 429 errors)
|
| 395 |
+
llm_status = getattr(llm, "llm_status", "active")
|
| 396 |
+
if provider in providers_quota:
|
| 397 |
+
providers_quota[provider]["status"] = llm_status
|
| 398 |
+
if llm_status == "limited":
|
| 399 |
+
# Override remaining to 0 — the API is actually returning 429s
|
| 400 |
+
providers_quota[provider]["remaining"] = 0
|
| 401 |
+
|
| 402 |
return {
|
| 403 |
"provider": provider,
|
| 404 |
"daily_limit": cur["daily_limit"],
|
web/index.html
CHANGED
|
@@ -3426,9 +3426,10 @@ document.getElementById('llm-model').addEventListener('click', async (e) => {
|
|
| 3426 |
const pQuota = (quota && quota.providers && quota.providers[p.id]) || null;
|
| 3427 |
if (isRateLimited(p.id) && pQuota && pQuota.daily_limit > 0) {
|
| 3428 |
const rem = pQuota.remaining || 0;
|
|
|
|
| 3429 |
const pct = Math.round((rem / pQuota.daily_limit) * 100);
|
| 3430 |
-
const color =
|
| 3431 |
-
badge = `<span style="font-size:10px;color:${color};margin-left:auto">${
|
| 3432 |
}
|
| 3433 |
|
| 3434 |
row.innerHTML = `<span class="llm-check">${isActive ? '✔' : ''}</span>
|
|
@@ -3450,19 +3451,21 @@ document.getElementById('llm-model').addEventListener('click', async (e) => {
|
|
| 3450 |
const rem = pq ? (pq.remaining || 0) : 0;
|
| 3451 |
const lim = pq ? (pq.daily_limit || 0) : 0;
|
| 3452 |
const pct = lim > 0 ? Math.round((rem / lim) * 100) : 0;
|
| 3453 |
-
// Build a quota-like object for estimateRuntime using this provider's
|
| 3454 |
const pqForCalc = {remaining: rem, daily_limit: lim,
|
| 3455 |
-
|
| 3456 |
-
|
| 3457 |
|
| 3458 |
-
|
| 3459 |
-
|
|
|
|
| 3460 |
row.after(panel);
|
| 3461 |
return;
|
| 3462 |
}
|
| 3463 |
|
| 3464 |
const rpm = pqForCalc.rpm || 4;
|
| 3465 |
const runtime = estimateRuntime(pqForCalc);
|
|
|
|
| 3466 |
|
| 3467 |
panel.innerHTML =
|
| 3468 |
`<div style="color:#4ecca3;font-weight:600;margin-bottom:4px">${p.icon} ${p.label}</div>` +
|
|
@@ -3471,14 +3474,14 @@ document.getElementById('llm-model').addEventListener('click', async (e) => {
|
|
| 3471 |
`Rate limit: ${rpm} req/min · Estimated runtime: <b style="color:#4ecca3">${runtime}</b></div>` +
|
| 3472 |
`<div style="display:flex;align-items:center;gap:8px;margin-top:6px">` +
|
| 3473 |
`<label style="font-size:11px;color:#8899aa">Probability:</label>` +
|
| 3474 |
-
`<input type="range" min="1" max="100" value="
|
| 3475 |
-
`<span class="popup-prob-val" style="font-size:12px;color:#4ecca3;min-width:32px">
|
| 3476 |
`</div>` +
|
| 3477 |
`<div style="font-size:10px;color:#8899aa;margin:2px 0 8px 0">` +
|
| 3478 |
`Higher = more LLM decisions, lower = more routine behavior</div>` +
|
| 3479 |
`<button class="popup-switch-btn" style="width:100%;padding:6px;border:none;border-radius:4px;` +
|
| 3480 |
`background:#4ecca3;color:#0a0a23;font-weight:600;cursor:pointer;font-size:12px">` +
|
| 3481 |
-
`Switch to ${p.label} at
|
| 3482 |
|
| 3483 |
row.after(panel);
|
| 3484 |
|
|
@@ -3516,10 +3519,13 @@ document.getElementById('llm-model').addEventListener('click', async (e) => {
|
|
| 3516 |
});
|
| 3517 |
});
|
| 3518 |
} else if (!isActive) {
|
| 3519 |
-
// Non-rate-limited: switch immediately
|
| 3520 |
row.addEventListener('click', async () => {
|
| 3521 |
popup.style.display = 'none'; _llmPopupOpen = false;
|
| 3522 |
try {
|
|
|
|
|
|
|
|
|
|
| 3523 |
const body = {provider: p.id};
|
| 3524 |
if (p.model) body.model = p.model;
|
| 3525 |
const r = await fetch(`${API_BASE}/llm/provider`, {
|
|
@@ -3528,7 +3534,8 @@ document.getElementById('llm-model').addEventListener('click', async (e) => {
|
|
| 3528 |
body: JSON.stringify(body),
|
| 3529 |
});
|
| 3530 |
if (!r.ok) { const err = await r.json(); showToast(`LLM switch failed: ${err.detail}`, 'event'); return; }
|
| 3531 |
-
|
|
|
|
| 3532 |
} catch (err) { showToast('LLM switch error', 'event'); }
|
| 3533 |
});
|
| 3534 |
}
|
|
|
|
| 3426 |
const pQuota = (quota && quota.providers && quota.providers[p.id]) || null;
|
| 3427 |
if (isRateLimited(p.id) && pQuota && pQuota.daily_limit > 0) {
|
| 3428 |
const rem = pQuota.remaining || 0;
|
| 3429 |
+
const isLimited = pQuota.status === 'limited' || rem <= 0;
|
| 3430 |
const pct = Math.round((rem / pQuota.daily_limit) * 100);
|
| 3431 |
+
const color = isLimited ? '#e94560' : pct < 20 ? '#f0c040' : '#8899aa';
|
| 3432 |
+
badge = `<span style="font-size:10px;color:${color};margin-left:auto">${isLimited ? 'exhausted' : rem + ' left'}</span>`;
|
| 3433 |
}
|
| 3434 |
|
| 3435 |
row.innerHTML = `<span class="llm-check">${isActive ? '✔' : ''}</span>
|
|
|
|
| 3451 |
const rem = pq ? (pq.remaining || 0) : 0;
|
| 3452 |
const lim = pq ? (pq.daily_limit || 0) : 0;
|
| 3453 |
const pct = lim > 0 ? Math.round((rem / lim) * 100) : 0;
|
| 3454 |
+
// Build a quota-like object for estimateRuntime using this provider's RPM
|
| 3455 |
const pqForCalc = {remaining: rem, daily_limit: lim,
|
| 3456 |
+
rpm: pq.rpm || 4,
|
| 3457 |
+
max_calls_per_hour: pq.max_calls_per_hour || 240};
|
| 3458 |
|
| 3459 |
+
const isExhausted = rem <= 0 || (pq && pq.status === 'limited');
|
| 3460 |
+
if (isExhausted) {
|
| 3461 |
+
panel.innerHTML = `<div style="color:#e94560;margin-bottom:6px">Quota exhausted. Resets daily (10:00 AM).</div>`;
|
| 3462 |
row.after(panel);
|
| 3463 |
return;
|
| 3464 |
}
|
| 3465 |
|
| 3466 |
const rpm = pqForCalc.rpm || 4;
|
| 3467 |
const runtime = estimateRuntime(pqForCalc);
|
| 3468 |
+
const curProb = Math.round(llmCallProbability * 100) || 10;
|
| 3469 |
|
| 3470 |
panel.innerHTML =
|
| 3471 |
`<div style="color:#4ecca3;font-weight:600;margin-bottom:4px">${p.icon} ${p.label}</div>` +
|
|
|
|
| 3474 |
`Rate limit: ${rpm} req/min · Estimated runtime: <b style="color:#4ecca3">${runtime}</b></div>` +
|
| 3475 |
`<div style="display:flex;align-items:center;gap:8px;margin-top:6px">` +
|
| 3476 |
`<label style="font-size:11px;color:#8899aa">Probability:</label>` +
|
| 3477 |
+
`<input type="range" min="1" max="100" value="${curProb}" style="flex:1;accent-color:#4ecca3" class="popup-prob-slider">` +
|
| 3478 |
+
`<span class="popup-prob-val" style="font-size:12px;color:#4ecca3;min-width:32px">${curProb}%</span>` +
|
| 3479 |
`</div>` +
|
| 3480 |
`<div style="font-size:10px;color:#8899aa;margin:2px 0 8px 0">` +
|
| 3481 |
`Higher = more LLM decisions, lower = more routine behavior</div>` +
|
| 3482 |
`<button class="popup-switch-btn" style="width:100%;padding:6px;border:none;border-radius:4px;` +
|
| 3483 |
`background:#4ecca3;color:#0a0a23;font-weight:600;cursor:pointer;font-size:12px">` +
|
| 3484 |
+
`Switch to ${p.label} at ${curProb}%</button>`;
|
| 3485 |
|
| 3486 |
row.after(panel);
|
| 3487 |
|
|
|
|
| 3519 |
});
|
| 3520 |
});
|
| 3521 |
} else if (!isActive) {
|
| 3522 |
+
// Non-rate-limited (NN, Claude, Ollama): switch immediately, set probability to 100%
|
| 3523 |
row.addEventListener('click', async () => {
|
| 3524 |
popup.style.display = 'none'; _llmPopupOpen = false;
|
| 3525 |
try {
|
| 3526 |
+
// Set probability to 100% for local/unlimited providers
|
| 3527 |
+
const autoProb = (p.id === 'nn') ? 1.0 : 0.10;
|
| 3528 |
+
await fetch(`${API_BASE}/controls/llm_probability?value=${autoProb}`, {method: 'POST'});
|
| 3529 |
const body = {provider: p.id};
|
| 3530 |
if (p.model) body.model = p.model;
|
| 3531 |
const r = await fetch(`${API_BASE}/llm/provider`, {
|
|
|
|
| 3534 |
body: JSON.stringify(body),
|
| 3535 |
});
|
| 3536 |
if (!r.ok) { const err = await r.json(); showToast(`LLM switch failed: ${err.detail}`, 'event'); return; }
|
| 3537 |
+
updateLlmProbUI(autoProb);
|
| 3538 |
+
showToast(`Switched to ${p.label} at ${Math.round(autoProb*100)}%`, 'conv');
|
| 3539 |
} catch (err) { showToast('LLM switch error', 'event'); }
|
| 3540 |
});
|
| 3541 |
}
|