RayMelius Claude Opus 4.6 commited on
Commit
3f8ff82
·
1 Parent(s): 7226986

Fix quota runtime estimates: use RPM as bottleneck, sync top probability slider

Browse files

- Runtime estimate now based on RPM (Gemini 4 RPM = 240 calls/h, ~6.25h for 1500 RPD)
not probability, which barely affects duration with 50 agents
- Remove misleading per-probability runtime table from popup
- Show RPM and single accurate runtime estimate instead
- Fix ID collision between top slider and popup slider (use class selectors)
- Sync top probability slider when switching provider via popup
- Expose RPM and max_calls_per_hour in /api/llm/quota per-provider data
- Update nn_selfimprove budget calculations to use RPM-based math

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (3) hide show
  1. scripts/nn_selfimprove.py +46 -41
  2. src/soci/api/routes.py +7 -4
  3. web/index.html +22 -28
scripts/nn_selfimprove.py CHANGED
@@ -788,14 +788,12 @@ async def scheduled(
788
  return True
789
 
790
  async def calculate_probability(client: httpx.AsyncClient, target_minutes: int) -> float:
791
- """Query remaining Gemini quota and calculate probability to last target_minutes.
792
-
793
- Math:
794
- - ticks_per_hour 900 (4s tick delay at 1x speed)
795
- - max 2 LLM calls per tick (rate-limited budget)
796
- - Each call site rolls random() < probability, ~4 sites per tick
797
- - Expected LLM calls/hour = ticks_per_hour × min(sites × prob, max_calls_per_tick)
798
- - Solve for prob: remaining_quota / (target_hours × ticks_per_hour × effective_rate)
799
  """
800
  resp = await _api_call(client, "get", "/api/llm/quota")
801
  if not resp or resp.status_code != 200:
@@ -804,33 +802,35 @@ async def scheduled(
804
 
805
  quota = resp.json()
806
  remaining = quota.get("remaining", 1500)
807
- ticks_per_hour = quota.get("ticks_per_hour", 900)
808
- max_calls_per_tick = quota.get("max_calls_per_tick", 2)
809
- num_agents = quota.get("num_agents", 20)
810
 
811
  if remaining <= 0:
812
  logger.warning("No Gemini quota remaining!")
813
  return 0.0
814
 
 
 
 
 
 
 
815
  target_hours = target_minutes / 60.0
816
 
817
- # There are ~4 LLM call sites per tick (plan, action, social, reflect),
818
- # each gated by probability. But max_calls_per_tick caps the actual calls.
819
- # Approximate: at prob P, expected calls/tick ≈ min(num_sites × P, max_calls_per_tick)
820
- # We want: remaining = target_hours × ticks_per_hour × calls_per_tick
821
- # So: calls_per_tick = remaining / (target_hours × ticks_per_hour)
822
- # And: P = calls_per_tick / num_call_sites (since each site independently rolls P)
823
- num_call_sites = 4 # plan, action, social, reflect
824
- desired_calls_per_tick = remaining / (target_hours * ticks_per_hour)
825
- # Clamp to max budget
826
- desired_calls_per_tick = min(desired_calls_per_tick, max_calls_per_tick)
827
- prob = desired_calls_per_tick / num_call_sites
828
- prob = max(0.01, min(1.0, prob))
829
-
830
  logger.info(
831
- f"Quota: {remaining} remaining, target {target_minutes} min → "
832
- f"~{desired_calls_per_tick:.2f} calls/tickprobability {prob:.2%}"
833
  )
 
 
 
 
 
 
 
 
 
 
 
 
834
  return round(prob, 4)
835
 
836
  async def wait_until_reset():
@@ -940,34 +940,39 @@ async def budget(
940
  return
941
 
942
  quota = resp.json()
943
- remaining = quota.get("remaining", 0)
944
- daily_limit = quota.get("daily_limit", 1500)
945
- daily_requests = quota.get("daily_requests", 0)
946
- ticks_per_hour = quota.get("ticks_per_hour", 900)
947
- max_calls_per_tick = quota.get("max_calls_per_tick", 2)
948
  provider = quota.get("provider", "?")
949
  num_agents = quota.get("num_agents", 0)
950
 
 
 
 
 
 
 
 
 
 
 
951
  logger.info(f"Provider: {provider}")
952
  logger.info(f"Daily quota: {daily_requests}/{daily_limit} used, {remaining} remaining")
953
- logger.info(f"Sim: {num_agents} agents, ~{ticks_per_hour:.0f} ticks/hour")
 
 
954
 
955
  if remaining <= 0:
956
  logger.warning("No quota remaining! Wait for reset (10:00 AM Athens).")
957
  return
958
 
959
  target_hours = target_minutes / 60.0
960
- num_call_sites = 4
961
- desired_calls_per_tick = remaining / (target_hours * ticks_per_hour)
962
- desired_calls_per_tick = min(desired_calls_per_tick, max_calls_per_tick)
963
- prob = desired_calls_per_tick / num_call_sites
964
- prob = max(0.01, min(1.0, prob))
965
- prob = round(prob, 4)
966
-
967
- expected_calls = target_hours * ticks_per_hour * min(num_call_sites * prob, max_calls_per_tick)
968
  logger.info(
969
  f"Target: {target_minutes} min → probability {prob:.2%} "
970
- f"(~{expected_calls:.0f} calls, {remaining} available)"
971
  )
972
 
973
  if apply:
 
788
  return True
789
 
790
  async def calculate_probability(client: httpx.AsyncClient, target_minutes: int) -> float:
791
+ """Query remaining Gemini quota and return a reasonable probability.
792
+
793
+ The real bottleneck is RPM (requests per minute), not probability.
794
+ With 50 agents, even low probability saturates the RPM rate limiter.
795
+ Gemini: 4 RPM max 240 calls/hour 1500 RPD lasts ~6.25h.
796
+ Probability mainly controls LLM-vs-routine quality, not quota duration.
 
 
797
  """
798
  resp = await _api_call(client, "get", "/api/llm/quota")
799
  if not resp or resp.status_code != 200:
 
802
 
803
  quota = resp.json()
804
  remaining = quota.get("remaining", 1500)
 
 
 
805
 
806
  if remaining <= 0:
807
  logger.warning("No Gemini quota remaining!")
808
  return 0.0
809
 
810
+ # Get per-provider RPM info
811
+ providers = quota.get("providers", {})
812
+ gemini_info = providers.get("gemini", {})
813
+ rpm = gemini_info.get("rpm", 4)
814
+ max_calls_per_hour = rpm * 60
815
+ hours_available = remaining / max_calls_per_hour
816
  target_hours = target_minutes / 60.0
817
 
 
 
 
 
 
 
 
 
 
 
 
 
 
818
  logger.info(
819
+ f"Quota: {remaining} remaining, RPM={rpm} → "
820
+ f"max {max_calls_per_hour} calls/h~{hours_available:.1f}h available"
821
  )
822
+
823
+ if hours_available >= target_hours:
824
+ prob = gemini_prob
825
+ logger.info(f"Quota sufficient for {target_minutes}min target → using {prob:.0%}")
826
+ else:
827
+ # Quota won't last — reduce probability (marginal help with many agents)
828
+ prob = max(0.02, 0.10 * (hours_available / target_hours))
829
+ logger.warning(
830
+ f"Quota only lasts ~{hours_available:.1f}h but target is {target_hours:.1f}h "
831
+ f"→ reducing probability to {prob:.1%}"
832
+ )
833
+
834
  return round(prob, 4)
835
 
836
  async def wait_until_reset():
 
940
  return
941
 
942
  quota = resp.json()
 
 
 
 
 
943
  provider = quota.get("provider", "?")
944
  num_agents = quota.get("num_agents", 0)
945
 
946
+ # Get Gemini-specific quota from providers dict
947
+ providers = quota.get("providers", {})
948
+ gemini_info = providers.get("gemini", {})
949
+ remaining = gemini_info.get("remaining", quota.get("remaining", 0))
950
+ daily_limit = gemini_info.get("daily_limit", quota.get("daily_limit", 1500))
951
+ daily_requests = gemini_info.get("daily_requests", quota.get("daily_requests", 0))
952
+ rpm = gemini_info.get("rpm", 4)
953
+ max_calls_per_hour = rpm * 60
954
+ hours_available = remaining / max_calls_per_hour if max_calls_per_hour > 0 else 0
955
+
956
  logger.info(f"Provider: {provider}")
957
  logger.info(f"Daily quota: {daily_requests}/{daily_limit} used, {remaining} remaining")
958
+ logger.info(f"Rate limit: {rpm} RPM → max {max_calls_per_hour} calls/hour")
959
+ logger.info(f"Estimated runtime at max RPM: ~{hours_available:.1f}h")
960
+ logger.info(f"Sim: {num_agents} agents")
961
 
962
  if remaining <= 0:
963
  logger.warning("No quota remaining! Wait for reset (10:00 AM Athens).")
964
  return
965
 
966
  target_hours = target_minutes / 60.0
967
+ # Probability controls LLM-vs-routine quality, RPM is the real bottleneck
968
+ if hours_available >= target_hours:
969
+ prob = 0.20 # moderate: good mix of LLM and routine
970
+ else:
971
+ prob = max(0.02, 0.10 * (hours_available / target_hours))
972
+
 
 
973
  logger.info(
974
  f"Target: {target_minutes} min → probability {prob:.2%} "
975
+ f"(RPM-limited to ~{max_calls_per_hour} calls/h, {remaining} remaining)"
976
  )
977
 
978
  if apply:
src/soci/api/routes.py CHANGED
@@ -382,11 +382,14 @@ async def get_llm_quota():
382
  max_calls_per_tick = 2 if provider in ("gemini", "groq") else 5
383
  num_agents = len(sim.agents)
384
 
385
- # Per-provider tick estimates (for frontend runtime calc when switching TO a provider)
 
 
 
386
  for pid in providers_quota:
387
- rl_delay = 4.0 if pid in ("gemini", "groq") else 2.0
388
- providers_quota[pid]["ticks_per_hour"] = round(3600.0 / (rl_delay * max(_sim_speed, 0.01)), 1)
389
- providers_quota[pid]["max_calls_per_tick"] = 2 if pid in ("gemini", "groq") else 5
390
 
391
  return {
392
  "provider": provider,
 
382
  max_calls_per_tick = 2 if provider in ("gemini", "groq") else 5
383
  num_agents = len(sim.agents)
384
 
385
+ # Per-provider rate info (RPM is the real bottleneck, not probability)
386
+ # Gemini: 4 RPM hard limit → max 240 calls/hour
387
+ # Groq: 28 RPM hard limit → max 1680 calls/hour
388
+ provider_rpm = {"gemini": 4, "groq": 28}
389
  for pid in providers_quota:
390
+ rpm = provider_rpm.get(pid, 30)
391
+ providers_quota[pid]["rpm"] = rpm
392
+ providers_quota[pid]["max_calls_per_hour"] = rpm * 60
393
 
394
  return {
395
  "provider": provider,
web/index.html CHANGED
@@ -3402,13 +3402,11 @@ document.getElementById('llm-model').addEventListener('click', async (e) => {
3402
  const existing = popup.querySelectorAll('.llm-opt,.llm-quota-panel');
3403
  existing.forEach(el => el.remove());
3404
 
3405
- // Estimate remaining runtime at a given probability
3406
- function estimateRuntime(q, prob) {
 
3407
  if (!q || q.remaining <= 0) return 'exhausted';
3408
- const sites = 4;
3409
- const callsPerTick = Math.min(sites * prob, q.max_calls_per_tick || 2);
3410
- const ticksH = q.ticks_per_hour || 900;
3411
- const callsPerHour = ticksH * callsPerTick;
3412
  if (callsPerHour <= 0) return '∞';
3413
  const hours = q.remaining / callsPerHour;
3414
  if (hours >= 48) return `~${Math.round(hours / 24)}d`;
@@ -3463,41 +3461,35 @@ document.getElementById('llm-model').addEventListener('click', async (e) => {
3463
  return;
3464
  }
3465
 
3466
- // Runtime estimates table
3467
- const probs = [0.05, 0.10, 0.20, 0.35, 0.50, 0.75, 1.00];
3468
- let tableRows = probs.map(pr => {
3469
- const rt = estimateRuntime(pqForCalc, pr);
3470
- return `<span style="display:inline-block;width:42px;text-align:right;color:#4ecca3">${Math.round(pr*100)}%</span>` +
3471
- `<span style="color:#8899aa;margin-left:6px">${rt}</span>`;
3472
- }).join('<br>');
3473
 
3474
  panel.innerHTML =
3475
  `<div style="color:#4ecca3;font-weight:600;margin-bottom:4px">${p.icon} ${p.label}</div>` +
3476
- `<div style="margin-bottom:6px">Quota: <b>${rem}</b>/${lim} (${pct}%)</div>` +
3477
- `<div style="margin-bottom:6px;font-size:10px;color:#8899aa">Estimated runtime by probability:</div>` +
3478
- `<div style="margin-bottom:8px;font-size:11px;line-height:1.6">${tableRows}</div>` +
3479
- `<div style="display:flex;align-items:center;gap:8px">` +
3480
  `<label style="font-size:11px;color:#8899aa">Probability:</label>` +
3481
- `<input type="range" min="1" max="100" value="20" style="flex:1;accent-color:#4ecca3" id="llm-prob-slider">` +
3482
- `<span id="llm-prob-val" style="font-size:12px;color:#4ecca3;min-width:32px">20%</span>` +
3483
  `</div>` +
3484
- `<div id="llm-prob-runtime" style="font-size:10px;color:#8899aa;margin:4px 0 8px 0">Runtime: ${estimateRuntime(pqForCalc, 0.20)}</div>` +
3485
- `<button id="llm-switch-btn" style="width:100%;padding:6px;border:none;border-radius:4px;` +
 
3486
  `background:#4ecca3;color:#0a0a23;font-weight:600;cursor:pointer;font-size:12px">` +
3487
  `Switch to ${p.label} at 20%</button>`;
3488
 
3489
  row.after(panel);
3490
 
3491
- // Wire up slider
3492
- const slider = panel.querySelector('#llm-prob-slider');
3493
- const valLabel = panel.querySelector('#llm-prob-val');
3494
- const rtLabel = panel.querySelector('#llm-prob-runtime');
3495
- const btn = panel.querySelector('#llm-switch-btn');
3496
 
3497
  slider.addEventListener('input', () => {
3498
  const pv = parseInt(slider.value);
3499
  valLabel.textContent = pv + '%';
3500
- rtLabel.textContent = 'Runtime: ' + estimateRuntime(pqForCalc, pv / 100);
3501
  btn.textContent = `Switch to ${p.label} at ${pv}%`;
3502
  });
3503
 
@@ -3517,7 +3509,9 @@ document.getElementById('llm-model').addEventListener('click', async (e) => {
3517
  body: JSON.stringify(body),
3518
  });
3519
  if (!r.ok) { const err = await r.json(); showToast(`LLM switch failed: ${err.detail}`, 'event'); return; }
3520
- showToast(`Switched to ${p.label} at ${Math.round(probVal*100)}% · ${estimateRuntime(pqForCalc, probVal)} runtime`, 'conv');
 
 
3521
  } catch (err) { showToast('LLM switch error', 'event'); }
3522
  });
3523
  });
 
3402
  const existing = popup.querySelectorAll('.llm-opt,.llm-quota-panel');
3403
  existing.forEach(el => el.remove());
3404
 
3405
+ // Estimate remaining runtime based on RPM (the real bottleneck, not probability).
3406
+ // With 50 agents, even low probability saturates the RPM rate limiter.
3407
+ function estimateRuntime(q) {
3408
  if (!q || q.remaining <= 0) return 'exhausted';
3409
+ const callsPerHour = q.max_calls_per_hour || (q.rpm || 4) * 60;
 
 
 
3410
  if (callsPerHour <= 0) return '∞';
3411
  const hours = q.remaining / callsPerHour;
3412
  if (hours >= 48) return `~${Math.round(hours / 24)}d`;
 
3461
  return;
3462
  }
3463
 
3464
+ const rpm = pqForCalc.rpm || 4;
3465
+ const runtime = estimateRuntime(pqForCalc);
 
 
 
 
 
3466
 
3467
  panel.innerHTML =
3468
  `<div style="color:#4ecca3;font-weight:600;margin-bottom:4px">${p.icon} ${p.label}</div>` +
3469
+ `<div style="margin-bottom:4px">Quota: <b>${rem}</b>/${lim} (${pct}%)</div>` +
3470
+ `<div style="margin-bottom:6px;font-size:10px;color:#8899aa">` +
3471
+ `Rate limit: ${rpm} req/min · Estimated runtime: <b style="color:#4ecca3">${runtime}</b></div>` +
3472
+ `<div style="display:flex;align-items:center;gap:8px;margin-top:6px">` +
3473
  `<label style="font-size:11px;color:#8899aa">Probability:</label>` +
3474
+ `<input type="range" min="1" max="100" value="20" style="flex:1;accent-color:#4ecca3" class="popup-prob-slider">` +
3475
+ `<span class="popup-prob-val" style="font-size:12px;color:#4ecca3;min-width:32px">20%</span>` +
3476
  `</div>` +
3477
+ `<div style="font-size:10px;color:#8899aa;margin:2px 0 8px 0">` +
3478
+ `Higher = more LLM decisions, lower = more routine behavior</div>` +
3479
+ `<button class="popup-switch-btn" style="width:100%;padding:6px;border:none;border-radius:4px;` +
3480
  `background:#4ecca3;color:#0a0a23;font-weight:600;cursor:pointer;font-size:12px">` +
3481
  `Switch to ${p.label} at 20%</button>`;
3482
 
3483
  row.after(panel);
3484
 
3485
+ // Wire up slider (use class selectors to avoid ID collision with top slider)
3486
+ const slider = panel.querySelector('.popup-prob-slider');
3487
+ const valLabel = panel.querySelector('.popup-prob-val');
3488
+ const btn = panel.querySelector('.popup-switch-btn');
 
3489
 
3490
  slider.addEventListener('input', () => {
3491
  const pv = parseInt(slider.value);
3492
  valLabel.textContent = pv + '%';
 
3493
  btn.textContent = `Switch to ${p.label} at ${pv}%`;
3494
  });
3495
 
 
3509
  body: JSON.stringify(body),
3510
  });
3511
  if (!r.ok) { const err = await r.json(); showToast(`LLM switch failed: ${err.detail}`, 'event'); return; }
3512
+ // 3. Sync the top probability slider
3513
+ updateLlmProbUI(probVal);
3514
+ showToast(`Switched to ${p.label} at ${Math.round(probVal*100)}% · ${runtime} runtime`, 'conv');
3515
  } catch (err) { showToast('LLM switch error', 'event'); }
3516
  });
3517
  });