RayMelius Claude Opus 4.6 commited on
Commit
131f6e2
·
1 Parent(s): 3f8ff82

Fix LLM switcher: quota status detection, NN→100%, slider sync, label cleanup

Browse files

- Detect actual quota exhaustion via llm_status (429 errors), not just
in-memory counter which resets on server restart
- NN auto-sets probability to 100% when selected
- Remove "local" from NN label
- Popup probability slider initializes from current probability, not hardcoded 20%
- Fix pqForCalc to include rpm field for accurate runtime estimates
- Quota badge shows "exhausted" when API is actually rate-limited

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (2) hide show
  1. src/soci/api/routes.py +9 -1
  2. web/index.html +19 -12
src/soci/api/routes.py CHANGED
@@ -285,7 +285,7 @@ async def get_llm_providers():
285
  current_model = getattr(get_simulation().llm, "default_model", "")
286
  providers = []
287
  # NN is always available — local ONNX model, no API key needed
288
- providers.append({"id": "nn", "label": "Soci Agent NN (local)", "icon": "🧠", "model": ""})
289
  if os.environ.get("ANTHROPIC_API_KEY"):
290
  providers.append({"id": "claude", "label": "Claude Haiku", "icon": "◆", "model": ""})
291
  if os.environ.get("GROQ_API_KEY"):
@@ -391,6 +391,14 @@ async def get_llm_quota():
391
  providers_quota[pid]["rpm"] = rpm
392
  providers_quota[pid]["max_calls_per_hour"] = rpm * 60
393
 
 
 
 
 
 
 
 
 
394
  return {
395
  "provider": provider,
396
  "daily_limit": cur["daily_limit"],
 
285
  current_model = getattr(get_simulation().llm, "default_model", "")
286
  providers = []
287
  # NN is always available — local ONNX model, no API key needed
288
+ providers.append({"id": "nn", "label": "Soci Agent NN", "icon": "🧠", "model": ""})
289
  if os.environ.get("ANTHROPIC_API_KEY"):
290
  providers.append({"id": "claude", "label": "Claude Haiku", "icon": "◆", "model": ""})
291
  if os.environ.get("GROQ_API_KEY"):
 
391
  providers_quota[pid]["rpm"] = rpm
392
  providers_quota[pid]["max_calls_per_hour"] = rpm * 60
393
 
394
+ # Expose rate-limit status (detects actual exhaustion from 429 errors)
395
+ llm_status = getattr(llm, "llm_status", "active")
396
+ if provider in providers_quota:
397
+ providers_quota[provider]["status"] = llm_status
398
+ if llm_status == "limited":
399
+ # Override remaining to 0 — the API is actually returning 429s
400
+ providers_quota[provider]["remaining"] = 0
401
+
402
  return {
403
  "provider": provider,
404
  "daily_limit": cur["daily_limit"],
web/index.html CHANGED
@@ -3426,9 +3426,10 @@ document.getElementById('llm-model').addEventListener('click', async (e) => {
3426
  const pQuota = (quota && quota.providers && quota.providers[p.id]) || null;
3427
  if (isRateLimited(p.id) && pQuota && pQuota.daily_limit > 0) {
3428
  const rem = pQuota.remaining || 0;
 
3429
  const pct = Math.round((rem / pQuota.daily_limit) * 100);
3430
- const color = rem <= 0 ? '#e94560' : pct < 20 ? '#f0c040' : '#8899aa';
3431
- badge = `<span style="font-size:10px;color:${color};margin-left:auto">${rem <= 0 ? 'exhausted' : rem + ' left'}</span>`;
3432
  }
3433
 
3434
  row.innerHTML = `<span class="llm-check">${isActive ? '✔' : ''}</span>
@@ -3450,19 +3451,21 @@ document.getElementById('llm-model').addEventListener('click', async (e) => {
3450
  const rem = pq ? (pq.remaining || 0) : 0;
3451
  const lim = pq ? (pq.daily_limit || 0) : 0;
3452
  const pct = lim > 0 ? Math.round((rem / lim) * 100) : 0;
3453
- // Build a quota-like object for estimateRuntime using this provider's own tick rate
3454
  const pqForCalc = {remaining: rem, daily_limit: lim,
3455
- max_calls_per_tick: pq.max_calls_per_tick || 2,
3456
- ticks_per_hour: pq.ticks_per_hour || 900};
3457
 
3458
- if (rem <= 0) {
3459
- panel.innerHTML = `<div style="color:#e94560;margin-bottom:6px">Quota exhausted (0/${lim}). Resets daily.</div>`;
 
3460
  row.after(panel);
3461
  return;
3462
  }
3463
 
3464
  const rpm = pqForCalc.rpm || 4;
3465
  const runtime = estimateRuntime(pqForCalc);
 
3466
 
3467
  panel.innerHTML =
3468
  `<div style="color:#4ecca3;font-weight:600;margin-bottom:4px">${p.icon} ${p.label}</div>` +
@@ -3471,14 +3474,14 @@ document.getElementById('llm-model').addEventListener('click', async (e) => {
3471
  `Rate limit: ${rpm} req/min · Estimated runtime: <b style="color:#4ecca3">${runtime}</b></div>` +
3472
  `<div style="display:flex;align-items:center;gap:8px;margin-top:6px">` +
3473
  `<label style="font-size:11px;color:#8899aa">Probability:</label>` +
3474
- `<input type="range" min="1" max="100" value="20" style="flex:1;accent-color:#4ecca3" class="popup-prob-slider">` +
3475
- `<span class="popup-prob-val" style="font-size:12px;color:#4ecca3;min-width:32px">20%</span>` +
3476
  `</div>` +
3477
  `<div style="font-size:10px;color:#8899aa;margin:2px 0 8px 0">` +
3478
  `Higher = more LLM decisions, lower = more routine behavior</div>` +
3479
  `<button class="popup-switch-btn" style="width:100%;padding:6px;border:none;border-radius:4px;` +
3480
  `background:#4ecca3;color:#0a0a23;font-weight:600;cursor:pointer;font-size:12px">` +
3481
- `Switch to ${p.label} at 20%</button>`;
3482
 
3483
  row.after(panel);
3484
 
@@ -3516,10 +3519,13 @@ document.getElementById('llm-model').addEventListener('click', async (e) => {
3516
  });
3517
  });
3518
  } else if (!isActive) {
3519
- // Non-rate-limited: switch immediately as before
3520
  row.addEventListener('click', async () => {
3521
  popup.style.display = 'none'; _llmPopupOpen = false;
3522
  try {
 
 
 
3523
  const body = {provider: p.id};
3524
  if (p.model) body.model = p.model;
3525
  const r = await fetch(`${API_BASE}/llm/provider`, {
@@ -3528,7 +3534,8 @@ document.getElementById('llm-model').addEventListener('click', async (e) => {
3528
  body: JSON.stringify(body),
3529
  });
3530
  if (!r.ok) { const err = await r.json(); showToast(`LLM switch failed: ${err.detail}`, 'event'); return; }
3531
- showToast(`Switched to ${p.label}`, 'conv');
 
3532
  } catch (err) { showToast('LLM switch error', 'event'); }
3533
  });
3534
  }
 
3426
  const pQuota = (quota && quota.providers && quota.providers[p.id]) || null;
3427
  if (isRateLimited(p.id) && pQuota && pQuota.daily_limit > 0) {
3428
  const rem = pQuota.remaining || 0;
3429
+ const isLimited = pQuota.status === 'limited' || rem <= 0;
3430
  const pct = Math.round((rem / pQuota.daily_limit) * 100);
3431
+ const color = isLimited ? '#e94560' : pct < 20 ? '#f0c040' : '#8899aa';
3432
+ badge = `<span style="font-size:10px;color:${color};margin-left:auto">${isLimited ? 'exhausted' : rem + ' left'}</span>`;
3433
  }
3434
 
3435
  row.innerHTML = `<span class="llm-check">${isActive ? '✔' : ''}</span>
 
3451
  const rem = pq ? (pq.remaining || 0) : 0;
3452
  const lim = pq ? (pq.daily_limit || 0) : 0;
3453
  const pct = lim > 0 ? Math.round((rem / lim) * 100) : 0;
3454
+ // Build a quota-like object for estimateRuntime using this provider's RPM
3455
  const pqForCalc = {remaining: rem, daily_limit: lim,
3456
+ rpm: pq.rpm || 4,
3457
+ max_calls_per_hour: pq.max_calls_per_hour || 240};
3458
 
3459
+ const isExhausted = rem <= 0 || (pq && pq.status === 'limited');
3460
+ if (isExhausted) {
3461
+ panel.innerHTML = `<div style="color:#e94560;margin-bottom:6px">Quota exhausted. Resets daily (10:00 AM).</div>`;
3462
  row.after(panel);
3463
  return;
3464
  }
3465
 
3466
  const rpm = pqForCalc.rpm || 4;
3467
  const runtime = estimateRuntime(pqForCalc);
3468
+ const curProb = Math.round(llmCallProbability * 100) || 10;
3469
 
3470
  panel.innerHTML =
3471
  `<div style="color:#4ecca3;font-weight:600;margin-bottom:4px">${p.icon} ${p.label}</div>` +
 
3474
  `Rate limit: ${rpm} req/min · Estimated runtime: <b style="color:#4ecca3">${runtime}</b></div>` +
3475
  `<div style="display:flex;align-items:center;gap:8px;margin-top:6px">` +
3476
  `<label style="font-size:11px;color:#8899aa">Probability:</label>` +
3477
+ `<input type="range" min="1" max="100" value="${curProb}" style="flex:1;accent-color:#4ecca3" class="popup-prob-slider">` +
3478
+ `<span class="popup-prob-val" style="font-size:12px;color:#4ecca3;min-width:32px">${curProb}%</span>` +
3479
  `</div>` +
3480
  `<div style="font-size:10px;color:#8899aa;margin:2px 0 8px 0">` +
3481
  `Higher = more LLM decisions, lower = more routine behavior</div>` +
3482
  `<button class="popup-switch-btn" style="width:100%;padding:6px;border:none;border-radius:4px;` +
3483
  `background:#4ecca3;color:#0a0a23;font-weight:600;cursor:pointer;font-size:12px">` +
3484
+ `Switch to ${p.label} at ${curProb}%</button>`;
3485
 
3486
  row.after(panel);
3487
 
 
3519
  });
3520
  });
3521
  } else if (!isActive) {
3522
+ // Non-rate-limited (NN, Claude, Ollama): switch immediately, set probability to 100%
3523
  row.addEventListener('click', async () => {
3524
  popup.style.display = 'none'; _llmPopupOpen = false;
3525
  try {
3526
+ // Set probability to 100% for local/unlimited providers
3527
+ const autoProb = (p.id === 'nn') ? 1.0 : 0.10;
3528
+ await fetch(`${API_BASE}/controls/llm_probability?value=${autoProb}`, {method: 'POST'});
3529
  const body = {provider: p.id};
3530
  if (p.model) body.model = p.model;
3531
  const r = await fetch(`${API_BASE}/llm/provider`, {
 
3534
  body: JSON.stringify(body),
3535
  });
3536
  if (!r.ok) { const err = await r.json(); showToast(`LLM switch failed: ${err.detail}`, 'event'); return; }
3537
+ updateLlmProbUI(autoProb);
3538
+ showToast(`Switched to ${p.label} at ${Math.round(autoProb*100)}%`, 'conv');
3539
  } catch (err) { showToast('LLM switch error', 'event'); }
3540
  });
3541
  }