Fix quota runtime estimates: use RPM as bottleneck, sync top probability slider
Browse files- Runtime estimate now based on RPM (Gemini 4 RPM = 240 calls/h, ~6.25h for 1500 RPD)
not probability, which barely affects duration with 50 agents
- Remove misleading per-probability runtime table from popup
- Show RPM and single accurate runtime estimate instead
- Fix ID collision between top slider and popup slider (use class selectors)
- Sync top probability slider when switching provider via popup
- Expose RPM and max_calls_per_hour in /api/llm/quota per-provider data
- Update nn_selfimprove budget calculations to use RPM-based math
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- scripts/nn_selfimprove.py +46 -41
- src/soci/api/routes.py +7 -4
- web/index.html +22 -28
scripts/nn_selfimprove.py
CHANGED
|
@@ -788,14 +788,12 @@ async def scheduled(
|
|
| 788 |
return True
|
| 789 |
|
| 790 |
async def calculate_probability(client: httpx.AsyncClient, target_minutes: int) -> float:
|
| 791 |
-
"""Query remaining Gemini quota and
|
| 792 |
-
|
| 793 |
-
|
| 794 |
-
|
| 795 |
-
|
| 796 |
-
|
| 797 |
-
- Expected LLM calls/hour = ticks_per_hour × min(sites × prob, max_calls_per_tick)
|
| 798 |
-
- Solve for prob: remaining_quota / (target_hours × ticks_per_hour × effective_rate)
|
| 799 |
"""
|
| 800 |
resp = await _api_call(client, "get", "/api/llm/quota")
|
| 801 |
if not resp or resp.status_code != 200:
|
|
@@ -804,33 +802,35 @@ async def scheduled(
|
|
| 804 |
|
| 805 |
quota = resp.json()
|
| 806 |
remaining = quota.get("remaining", 1500)
|
| 807 |
-
ticks_per_hour = quota.get("ticks_per_hour", 900)
|
| 808 |
-
max_calls_per_tick = quota.get("max_calls_per_tick", 2)
|
| 809 |
-
num_agents = quota.get("num_agents", 20)
|
| 810 |
|
| 811 |
if remaining <= 0:
|
| 812 |
logger.warning("No Gemini quota remaining!")
|
| 813 |
return 0.0
|
| 814 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 815 |
target_hours = target_minutes / 60.0
|
| 816 |
|
| 817 |
-
# There are ~4 LLM call sites per tick (plan, action, social, reflect),
|
| 818 |
-
# each gated by probability. But max_calls_per_tick caps the actual calls.
|
| 819 |
-
# Approximate: at prob P, expected calls/tick ≈ min(num_sites × P, max_calls_per_tick)
|
| 820 |
-
# We want: remaining = target_hours × ticks_per_hour × calls_per_tick
|
| 821 |
-
# So: calls_per_tick = remaining / (target_hours × ticks_per_hour)
|
| 822 |
-
# And: P = calls_per_tick / num_call_sites (since each site independently rolls P)
|
| 823 |
-
num_call_sites = 4 # plan, action, social, reflect
|
| 824 |
-
desired_calls_per_tick = remaining / (target_hours * ticks_per_hour)
|
| 825 |
-
# Clamp to max budget
|
| 826 |
-
desired_calls_per_tick = min(desired_calls_per_tick, max_calls_per_tick)
|
| 827 |
-
prob = desired_calls_per_tick / num_call_sites
|
| 828 |
-
prob = max(0.01, min(1.0, prob))
|
| 829 |
-
|
| 830 |
logger.info(
|
| 831 |
-
f"Quota: {remaining} remaining,
|
| 832 |
-
f"
|
| 833 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 834 |
return round(prob, 4)
|
| 835 |
|
| 836 |
async def wait_until_reset():
|
|
@@ -940,34 +940,39 @@ async def budget(
|
|
| 940 |
return
|
| 941 |
|
| 942 |
quota = resp.json()
|
| 943 |
-
remaining = quota.get("remaining", 0)
|
| 944 |
-
daily_limit = quota.get("daily_limit", 1500)
|
| 945 |
-
daily_requests = quota.get("daily_requests", 0)
|
| 946 |
-
ticks_per_hour = quota.get("ticks_per_hour", 900)
|
| 947 |
-
max_calls_per_tick = quota.get("max_calls_per_tick", 2)
|
| 948 |
provider = quota.get("provider", "?")
|
| 949 |
num_agents = quota.get("num_agents", 0)
|
| 950 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 951 |
logger.info(f"Provider: {provider}")
|
| 952 |
logger.info(f"Daily quota: {daily_requests}/{daily_limit} used, {remaining} remaining")
|
| 953 |
-
logger.info(f"
|
|
|
|
|
|
|
| 954 |
|
| 955 |
if remaining <= 0:
|
| 956 |
logger.warning("No quota remaining! Wait for reset (10:00 AM Athens).")
|
| 957 |
return
|
| 958 |
|
| 959 |
target_hours = target_minutes / 60.0
|
| 960 |
-
|
| 961 |
-
|
| 962 |
-
|
| 963 |
-
|
| 964 |
-
|
| 965 |
-
|
| 966 |
-
|
| 967 |
-
expected_calls = target_hours * ticks_per_hour * min(num_call_sites * prob, max_calls_per_tick)
|
| 968 |
logger.info(
|
| 969 |
f"Target: {target_minutes} min → probability {prob:.2%} "
|
| 970 |
-
f"(~{
|
| 971 |
)
|
| 972 |
|
| 973 |
if apply:
|
|
|
|
| 788 |
return True
|
| 789 |
|
| 790 |
async def calculate_probability(client: httpx.AsyncClient, target_minutes: int) -> float:
|
| 791 |
+
"""Query remaining Gemini quota and return a reasonable probability.
|
| 792 |
+
|
| 793 |
+
The real bottleneck is RPM (requests per minute), not probability.
|
| 794 |
+
With 50 agents, even low probability saturates the RPM rate limiter.
|
| 795 |
+
Gemini: 4 RPM → max 240 calls/hour → 1500 RPD lasts ~6.25h.
|
| 796 |
+
Probability mainly controls LLM-vs-routine quality, not quota duration.
|
|
|
|
|
|
|
| 797 |
"""
|
| 798 |
resp = await _api_call(client, "get", "/api/llm/quota")
|
| 799 |
if not resp or resp.status_code != 200:
|
|
|
|
| 802 |
|
| 803 |
quota = resp.json()
|
| 804 |
remaining = quota.get("remaining", 1500)
|
|
|
|
|
|
|
|
|
|
| 805 |
|
| 806 |
if remaining <= 0:
|
| 807 |
logger.warning("No Gemini quota remaining!")
|
| 808 |
return 0.0
|
| 809 |
|
| 810 |
+
# Get per-provider RPM info
|
| 811 |
+
providers = quota.get("providers", {})
|
| 812 |
+
gemini_info = providers.get("gemini", {})
|
| 813 |
+
rpm = gemini_info.get("rpm", 4)
|
| 814 |
+
max_calls_per_hour = rpm * 60
|
| 815 |
+
hours_available = remaining / max_calls_per_hour
|
| 816 |
target_hours = target_minutes / 60.0
|
| 817 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 818 |
logger.info(
|
| 819 |
+
f"Quota: {remaining} remaining, RPM={rpm} → "
|
| 820 |
+
f"max {max_calls_per_hour} calls/h → ~{hours_available:.1f}h available"
|
| 821 |
)
|
| 822 |
+
|
| 823 |
+
if hours_available >= target_hours:
|
| 824 |
+
prob = gemini_prob
|
| 825 |
+
logger.info(f"Quota sufficient for {target_minutes}min target → using {prob:.0%}")
|
| 826 |
+
else:
|
| 827 |
+
# Quota won't last — reduce probability (marginal help with many agents)
|
| 828 |
+
prob = max(0.02, 0.10 * (hours_available / target_hours))
|
| 829 |
+
logger.warning(
|
| 830 |
+
f"Quota only lasts ~{hours_available:.1f}h but target is {target_hours:.1f}h "
|
| 831 |
+
f"→ reducing probability to {prob:.1%}"
|
| 832 |
+
)
|
| 833 |
+
|
| 834 |
return round(prob, 4)
|
| 835 |
|
| 836 |
async def wait_until_reset():
|
|
|
|
| 940 |
return
|
| 941 |
|
| 942 |
quota = resp.json()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 943 |
provider = quota.get("provider", "?")
|
| 944 |
num_agents = quota.get("num_agents", 0)
|
| 945 |
|
| 946 |
+
# Get Gemini-specific quota from providers dict
|
| 947 |
+
providers = quota.get("providers", {})
|
| 948 |
+
gemini_info = providers.get("gemini", {})
|
| 949 |
+
remaining = gemini_info.get("remaining", quota.get("remaining", 0))
|
| 950 |
+
daily_limit = gemini_info.get("daily_limit", quota.get("daily_limit", 1500))
|
| 951 |
+
daily_requests = gemini_info.get("daily_requests", quota.get("daily_requests", 0))
|
| 952 |
+
rpm = gemini_info.get("rpm", 4)
|
| 953 |
+
max_calls_per_hour = rpm * 60
|
| 954 |
+
hours_available = remaining / max_calls_per_hour if max_calls_per_hour > 0 else 0
|
| 955 |
+
|
| 956 |
logger.info(f"Provider: {provider}")
|
| 957 |
logger.info(f"Daily quota: {daily_requests}/{daily_limit} used, {remaining} remaining")
|
| 958 |
+
logger.info(f"Rate limit: {rpm} RPM → max {max_calls_per_hour} calls/hour")
|
| 959 |
+
logger.info(f"Estimated runtime at max RPM: ~{hours_available:.1f}h")
|
| 960 |
+
logger.info(f"Sim: {num_agents} agents")
|
| 961 |
|
| 962 |
if remaining <= 0:
|
| 963 |
logger.warning("No quota remaining! Wait for reset (10:00 AM Athens).")
|
| 964 |
return
|
| 965 |
|
| 966 |
target_hours = target_minutes / 60.0
|
| 967 |
+
# Probability controls LLM-vs-routine quality, RPM is the real bottleneck
|
| 968 |
+
if hours_available >= target_hours:
|
| 969 |
+
prob = 0.20 # moderate: good mix of LLM and routine
|
| 970 |
+
else:
|
| 971 |
+
prob = max(0.02, 0.10 * (hours_available / target_hours))
|
| 972 |
+
|
|
|
|
|
|
|
| 973 |
logger.info(
|
| 974 |
f"Target: {target_minutes} min → probability {prob:.2%} "
|
| 975 |
+
f"(RPM-limited to ~{max_calls_per_hour} calls/h, {remaining} remaining)"
|
| 976 |
)
|
| 977 |
|
| 978 |
if apply:
|
src/soci/api/routes.py
CHANGED
|
@@ -382,11 +382,14 @@ async def get_llm_quota():
|
|
| 382 |
max_calls_per_tick = 2 if provider in ("gemini", "groq") else 5
|
| 383 |
num_agents = len(sim.agents)
|
| 384 |
|
| 385 |
-
# Per-provider
|
|
|
|
|
|
|
|
|
|
| 386 |
for pid in providers_quota:
|
| 387 |
-
|
| 388 |
-
providers_quota[pid]["
|
| 389 |
-
providers_quota[pid]["
|
| 390 |
|
| 391 |
return {
|
| 392 |
"provider": provider,
|
|
|
|
| 382 |
max_calls_per_tick = 2 if provider in ("gemini", "groq") else 5
|
| 383 |
num_agents = len(sim.agents)
|
| 384 |
|
| 385 |
+
# Per-provider rate info (RPM is the real bottleneck, not probability)
|
| 386 |
+
# Gemini: 4 RPM hard limit → max 240 calls/hour
|
| 387 |
+
# Groq: 28 RPM hard limit → max 1680 calls/hour
|
| 388 |
+
provider_rpm = {"gemini": 4, "groq": 28}
|
| 389 |
for pid in providers_quota:
|
| 390 |
+
rpm = provider_rpm.get(pid, 30)
|
| 391 |
+
providers_quota[pid]["rpm"] = rpm
|
| 392 |
+
providers_quota[pid]["max_calls_per_hour"] = rpm * 60
|
| 393 |
|
| 394 |
return {
|
| 395 |
"provider": provider,
|
web/index.html
CHANGED
|
@@ -3402,13 +3402,11 @@ document.getElementById('llm-model').addEventListener('click', async (e) => {
|
|
| 3402 |
const existing = popup.querySelectorAll('.llm-opt,.llm-quota-panel');
|
| 3403 |
existing.forEach(el => el.remove());
|
| 3404 |
|
| 3405 |
-
// Estimate remaining runtime
|
| 3406 |
-
|
|
|
|
| 3407 |
if (!q || q.remaining <= 0) return 'exhausted';
|
| 3408 |
-
const
|
| 3409 |
-
const callsPerTick = Math.min(sites * prob, q.max_calls_per_tick || 2);
|
| 3410 |
-
const ticksH = q.ticks_per_hour || 900;
|
| 3411 |
-
const callsPerHour = ticksH * callsPerTick;
|
| 3412 |
if (callsPerHour <= 0) return '∞';
|
| 3413 |
const hours = q.remaining / callsPerHour;
|
| 3414 |
if (hours >= 48) return `~${Math.round(hours / 24)}d`;
|
|
@@ -3463,41 +3461,35 @@ document.getElementById('llm-model').addEventListener('click', async (e) => {
|
|
| 3463 |
return;
|
| 3464 |
}
|
| 3465 |
|
| 3466 |
-
|
| 3467 |
-
const
|
| 3468 |
-
let tableRows = probs.map(pr => {
|
| 3469 |
-
const rt = estimateRuntime(pqForCalc, pr);
|
| 3470 |
-
return `<span style="display:inline-block;width:42px;text-align:right;color:#4ecca3">${Math.round(pr*100)}%</span>` +
|
| 3471 |
-
`<span style="color:#8899aa;margin-left:6px">${rt}</span>`;
|
| 3472 |
-
}).join('<br>');
|
| 3473 |
|
| 3474 |
panel.innerHTML =
|
| 3475 |
`<div style="color:#4ecca3;font-weight:600;margin-bottom:4px">${p.icon} ${p.label}</div>` +
|
| 3476 |
-
`<div style="margin-bottom:
|
| 3477 |
-
`<div style="margin-bottom:6px;font-size:10px;color:#8899aa">
|
| 3478 |
-
|
| 3479 |
-
`<div style="display:flex;align-items:center;gap:8px">` +
|
| 3480 |
`<label style="font-size:11px;color:#8899aa">Probability:</label>` +
|
| 3481 |
-
`<input type="range" min="1" max="100" value="20" style="flex:1;accent-color:#4ecca3"
|
| 3482 |
-
`<span
|
| 3483 |
`</div>` +
|
| 3484 |
-
`<div
|
| 3485 |
-
|
|
|
|
| 3486 |
`background:#4ecca3;color:#0a0a23;font-weight:600;cursor:pointer;font-size:12px">` +
|
| 3487 |
`Switch to ${p.label} at 20%</button>`;
|
| 3488 |
|
| 3489 |
row.after(panel);
|
| 3490 |
|
| 3491 |
-
// Wire up slider
|
| 3492 |
-
const slider = panel.querySelector('
|
| 3493 |
-
const valLabel = panel.querySelector('
|
| 3494 |
-
const
|
| 3495 |
-
const btn = panel.querySelector('#llm-switch-btn');
|
| 3496 |
|
| 3497 |
slider.addEventListener('input', () => {
|
| 3498 |
const pv = parseInt(slider.value);
|
| 3499 |
valLabel.textContent = pv + '%';
|
| 3500 |
-
rtLabel.textContent = 'Runtime: ' + estimateRuntime(pqForCalc, pv / 100);
|
| 3501 |
btn.textContent = `Switch to ${p.label} at ${pv}%`;
|
| 3502 |
});
|
| 3503 |
|
|
@@ -3517,7 +3509,9 @@ document.getElementById('llm-model').addEventListener('click', async (e) => {
|
|
| 3517 |
body: JSON.stringify(body),
|
| 3518 |
});
|
| 3519 |
if (!r.ok) { const err = await r.json(); showToast(`LLM switch failed: ${err.detail}`, 'event'); return; }
|
| 3520 |
-
|
|
|
|
|
|
|
| 3521 |
} catch (err) { showToast('LLM switch error', 'event'); }
|
| 3522 |
});
|
| 3523 |
});
|
|
|
|
| 3402 |
const existing = popup.querySelectorAll('.llm-opt,.llm-quota-panel');
|
| 3403 |
existing.forEach(el => el.remove());
|
| 3404 |
|
| 3405 |
+
// Estimate remaining runtime based on RPM (the real bottleneck, not probability).
|
| 3406 |
+
// With 50 agents, even low probability saturates the RPM rate limiter.
|
| 3407 |
+
function estimateRuntime(q) {
|
| 3408 |
if (!q || q.remaining <= 0) return 'exhausted';
|
| 3409 |
+
const callsPerHour = q.max_calls_per_hour || (q.rpm || 4) * 60;
|
|
|
|
|
|
|
|
|
|
| 3410 |
if (callsPerHour <= 0) return '∞';
|
| 3411 |
const hours = q.remaining / callsPerHour;
|
| 3412 |
if (hours >= 48) return `~${Math.round(hours / 24)}d`;
|
|
|
|
| 3461 |
return;
|
| 3462 |
}
|
| 3463 |
|
| 3464 |
+
const rpm = pqForCalc.rpm || 4;
|
| 3465 |
+
const runtime = estimateRuntime(pqForCalc);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3466 |
|
| 3467 |
panel.innerHTML =
|
| 3468 |
`<div style="color:#4ecca3;font-weight:600;margin-bottom:4px">${p.icon} ${p.label}</div>` +
|
| 3469 |
+
`<div style="margin-bottom:4px">Quota: <b>${rem}</b>/${lim} (${pct}%)</div>` +
|
| 3470 |
+
`<div style="margin-bottom:6px;font-size:10px;color:#8899aa">` +
|
| 3471 |
+
`Rate limit: ${rpm} req/min · Estimated runtime: <b style="color:#4ecca3">${runtime}</b></div>` +
|
| 3472 |
+
`<div style="display:flex;align-items:center;gap:8px;margin-top:6px">` +
|
| 3473 |
`<label style="font-size:11px;color:#8899aa">Probability:</label>` +
|
| 3474 |
+
`<input type="range" min="1" max="100" value="20" style="flex:1;accent-color:#4ecca3" class="popup-prob-slider">` +
|
| 3475 |
+
`<span class="popup-prob-val" style="font-size:12px;color:#4ecca3;min-width:32px">20%</span>` +
|
| 3476 |
`</div>` +
|
| 3477 |
+
`<div style="font-size:10px;color:#8899aa;margin:2px 0 8px 0">` +
|
| 3478 |
+
`Higher = more LLM decisions, lower = more routine behavior</div>` +
|
| 3479 |
+
`<button class="popup-switch-btn" style="width:100%;padding:6px;border:none;border-radius:4px;` +
|
| 3480 |
`background:#4ecca3;color:#0a0a23;font-weight:600;cursor:pointer;font-size:12px">` +
|
| 3481 |
`Switch to ${p.label} at 20%</button>`;
|
| 3482 |
|
| 3483 |
row.after(panel);
|
| 3484 |
|
| 3485 |
+
// Wire up slider (use class selectors to avoid ID collision with top slider)
|
| 3486 |
+
const slider = panel.querySelector('.popup-prob-slider');
|
| 3487 |
+
const valLabel = panel.querySelector('.popup-prob-val');
|
| 3488 |
+
const btn = panel.querySelector('.popup-switch-btn');
|
|
|
|
| 3489 |
|
| 3490 |
slider.addEventListener('input', () => {
|
| 3491 |
const pv = parseInt(slider.value);
|
| 3492 |
valLabel.textContent = pv + '%';
|
|
|
|
| 3493 |
btn.textContent = `Switch to ${p.label} at ${pv}%`;
|
| 3494 |
});
|
| 3495 |
|
|
|
|
| 3509 |
body: JSON.stringify(body),
|
| 3510 |
});
|
| 3511 |
if (!r.ok) { const err = await r.json(); showToast(`LLM switch failed: ${err.detail}`, 'event'); return; }
|
| 3512 |
+
// 3. Sync the top probability slider
|
| 3513 |
+
updateLlmProbUI(probVal);
|
| 3514 |
+
showToast(`Switched to ${p.label} at ${Math.round(probVal*100)}% · ${runtime} runtime`, 'conv');
|
| 3515 |
} catch (err) { showToast('LLM switch error', 'event'); }
|
| 3516 |
});
|
| 3517 |
});
|