Spaces:

RayMelius
/

soci2

Sleeping

RayMelius Claude Opus 4.6 commited on Mar 10

Commit

3f8ff82

1 Parent(s): 7226986

Fix quota runtime estimates: use RPM as bottleneck, sync top probability slider

- Runtime estimate now based on RPM (Gemini 4 RPM = 240 calls/h, ~6.25h for 1500 RPD)
not probability, which barely affects duration with 50 agents
- Remove misleading per-probability runtime table from popup
- Show RPM and single accurate runtime estimate instead
- Fix ID collision between top slider and popup slider (use class selectors)
- Sync top probability slider when switching provider via popup
- Expose RPM and max_calls_per_hour in /api/llm/quota per-provider data
- Update nn_selfimprove budget calculations to use RPM-based math

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (3) hide show

scripts/nn_selfimprove.py +46 -41
src/soci/api/routes.py +7 -4
web/index.html +22 -28

scripts/nn_selfimprove.py CHANGED Viewed

@@ -788,14 +788,12 @@ async def scheduled(
         return True
     async def calculate_probability(client: httpx.AsyncClient, target_minutes: int) -> float:
-        """Query remaining Gemini quota and calculate probability to last target_minutes.
-        Math:
-          - ticks_per_hour ≈ 900 (4s tick delay at 1x speed)
-          - max 2 LLM calls per tick (rate-limited budget)
-          - Each call site rolls random() < probability, ~4 sites per tick
-          - Expected LLM calls/hour = ticks_per_hour × min(sites × prob, max_calls_per_tick)
-          - Solve for prob: remaining_quota / (target_hours × ticks_per_hour × effective_rate)
         """
         resp = await _api_call(client, "get", "/api/llm/quota")
         if not resp or resp.status_code != 200:
@@ -804,33 +802,35 @@ async def scheduled(
         quota = resp.json()
         remaining = quota.get("remaining", 1500)
-        ticks_per_hour = quota.get("ticks_per_hour", 900)
-        max_calls_per_tick = quota.get("max_calls_per_tick", 2)
-        num_agents = quota.get("num_agents", 20)
         if remaining <= 0:
             logger.warning("No Gemini quota remaining!")
             return 0.0
         target_hours = target_minutes / 60.0
-        # There are ~4 LLM call sites per tick (plan, action, social, reflect),
-        # each gated by probability. But max_calls_per_tick caps the actual calls.
-        # Approximate: at prob P, expected calls/tick ≈ min(num_sites × P, max_calls_per_tick)
-        # We want: remaining = target_hours × ticks_per_hour × calls_per_tick
-        # So: calls_per_tick = remaining / (target_hours × ticks_per_hour)
-        # And: P = calls_per_tick / num_call_sites  (since each site independently rolls P)
-        num_call_sites = 4  # plan, action, social, reflect
-        desired_calls_per_tick = remaining / (target_hours * ticks_per_hour)
-        # Clamp to max budget
-        desired_calls_per_tick = min(desired_calls_per_tick, max_calls_per_tick)
-        prob = desired_calls_per_tick / num_call_sites
-        prob = max(0.01, min(1.0, prob))
         logger.info(
-            f"Quota: {remaining} remaining, target {target_minutes} min → "
-            f"~{desired_calls_per_tick:.2f} calls/tick → probability {prob:.2%}"
         )
         return round(prob, 4)
     async def wait_until_reset():
@@ -940,34 +940,39 @@ async def budget(
             return
         quota = resp.json()
-        remaining = quota.get("remaining", 0)
-        daily_limit = quota.get("daily_limit", 1500)
-        daily_requests = quota.get("daily_requests", 0)
-        ticks_per_hour = quota.get("ticks_per_hour", 900)
-        max_calls_per_tick = quota.get("max_calls_per_tick", 2)
         provider = quota.get("provider", "?")
         num_agents = quota.get("num_agents", 0)
         logger.info(f"Provider: {provider}")
         logger.info(f"Daily quota: {daily_requests}/{daily_limit} used, {remaining} remaining")
-        logger.info(f"Sim: {num_agents} agents, ~{ticks_per_hour:.0f} ticks/hour")
         if remaining <= 0:
             logger.warning("No quota remaining! Wait for reset (10:00 AM Athens).")
             return
         target_hours = target_minutes / 60.0
-        num_call_sites = 4
-        desired_calls_per_tick = remaining / (target_hours * ticks_per_hour)
-        desired_calls_per_tick = min(desired_calls_per_tick, max_calls_per_tick)
-        prob = desired_calls_per_tick / num_call_sites
-        prob = max(0.01, min(1.0, prob))
-        prob = round(prob, 4)
-        expected_calls = target_hours * ticks_per_hour * min(num_call_sites * prob, max_calls_per_tick)
         logger.info(
             f"Target: {target_minutes} min → probability {prob:.2%} "
-            f"(~{expected_calls:.0f} calls, {remaining} available)"
         )
         if apply:

         return True
     async def calculate_probability(client: httpx.AsyncClient, target_minutes: int) -> float:
+        """Query remaining Gemini quota and return a reasonable probability.
+        The real bottleneck is RPM (requests per minute), not probability.
+        With 50 agents, even low probability saturates the RPM rate limiter.
+        Gemini: 4 RPM → max 240 calls/hour → 1500 RPD lasts ~6.25h.
+        Probability mainly controls LLM-vs-routine quality, not quota duration.
         """
         resp = await _api_call(client, "get", "/api/llm/quota")
         if not resp or resp.status_code != 200:
         quota = resp.json()
         remaining = quota.get("remaining", 1500)
         if remaining <= 0:
             logger.warning("No Gemini quota remaining!")
             return 0.0
+        # Get per-provider RPM info
+        providers = quota.get("providers", {})
+        gemini_info = providers.get("gemini", {})
+        rpm = gemini_info.get("rpm", 4)
+        max_calls_per_hour = rpm * 60
+        hours_available = remaining / max_calls_per_hour
         target_hours = target_minutes / 60.0
         logger.info(
+            f"Quota: {remaining} remaining, RPM={rpm} → "
+            f"max {max_calls_per_hour} calls/h → ~{hours_available:.1f}h available"
         )
+        if hours_available >= target_hours:
+            prob = gemini_prob
+            logger.info(f"Quota sufficient for {target_minutes}min target → using {prob:.0%}")
+        else:
+            # Quota won't last — reduce probability (marginal help with many agents)
+            prob = max(0.02, 0.10 * (hours_available / target_hours))
+            logger.warning(
+                f"Quota only lasts ~{hours_available:.1f}h but target is {target_hours:.1f}h "
+                f"→ reducing probability to {prob:.1%}"
+            )
         return round(prob, 4)
     async def wait_until_reset():
             return
         quota = resp.json()
         provider = quota.get("provider", "?")
         num_agents = quota.get("num_agents", 0)
+        # Get Gemini-specific quota from providers dict
+        providers = quota.get("providers", {})
+        gemini_info = providers.get("gemini", {})
+        remaining = gemini_info.get("remaining", quota.get("remaining", 0))
+        daily_limit = gemini_info.get("daily_limit", quota.get("daily_limit", 1500))
+        daily_requests = gemini_info.get("daily_requests", quota.get("daily_requests", 0))
+        rpm = gemini_info.get("rpm", 4)
+        max_calls_per_hour = rpm * 60
+        hours_available = remaining / max_calls_per_hour if max_calls_per_hour > 0 else 0
         logger.info(f"Provider: {provider}")
         logger.info(f"Daily quota: {daily_requests}/{daily_limit} used, {remaining} remaining")
+        logger.info(f"Rate limit: {rpm} RPM → max {max_calls_per_hour} calls/hour")
+        logger.info(f"Estimated runtime at max RPM: ~{hours_available:.1f}h")
+        logger.info(f"Sim: {num_agents} agents")
         if remaining <= 0:
             logger.warning("No quota remaining! Wait for reset (10:00 AM Athens).")
             return
         target_hours = target_minutes / 60.0
+        # Probability controls LLM-vs-routine quality, RPM is the real bottleneck
+        if hours_available >= target_hours:
+            prob = 0.20  # moderate: good mix of LLM and routine
+        else:
+            prob = max(0.02, 0.10 * (hours_available / target_hours))
         logger.info(
             f"Target: {target_minutes} min → probability {prob:.2%} "
+            f"(RPM-limited to ~{max_calls_per_hour} calls/h, {remaining} remaining)"
         )
         if apply:

src/soci/api/routes.py CHANGED Viewed

@@ -382,11 +382,14 @@ async def get_llm_quota():
     max_calls_per_tick = 2 if provider in ("gemini", "groq") else 5
     num_agents = len(sim.agents)
-    # Per-provider tick estimates (for frontend runtime calc when switching TO a provider)
     for pid in providers_quota:
-        rl_delay = 4.0 if pid in ("gemini", "groq") else 2.0
-        providers_quota[pid]["ticks_per_hour"] = round(3600.0 / (rl_delay * max(_sim_speed, 0.01)), 1)
-        providers_quota[pid]["max_calls_per_tick"] = 2 if pid in ("gemini", "groq") else 5
     return {
         "provider": provider,

     max_calls_per_tick = 2 if provider in ("gemini", "groq") else 5
     num_agents = len(sim.agents)
+    # Per-provider rate info (RPM is the real bottleneck, not probability)
+    # Gemini: 4 RPM hard limit → max 240 calls/hour
+    # Groq: 28 RPM hard limit → max 1680 calls/hour
+    provider_rpm = {"gemini": 4, "groq": 28}
     for pid in providers_quota:
+        rpm = provider_rpm.get(pid, 30)
+        providers_quota[pid]["rpm"] = rpm
+        providers_quota[pid]["max_calls_per_hour"] = rpm * 60
     return {
         "provider": provider,

web/index.html CHANGED Viewed

@@ -3402,13 +3402,11 @@ document.getElementById('llm-model').addEventListener('click', async (e) => {
     const existing = popup.querySelectorAll('.llm-opt,.llm-quota-panel');
     existing.forEach(el => el.remove());
-    // Estimate remaining runtime at a given probability
-    function estimateRuntime(q, prob) {
       if (!q || q.remaining <= 0) return 'exhausted';
-      const sites = 4;
-      const callsPerTick = Math.min(sites * prob, q.max_calls_per_tick || 2);
-      const ticksH = q.ticks_per_hour || 900;
-      const callsPerHour = ticksH * callsPerTick;
       if (callsPerHour <= 0) return '∞';
       const hours = q.remaining / callsPerHour;
       if (hours >= 48) return `~${Math.round(hours / 24)}d`;
@@ -3463,41 +3461,35 @@ document.getElementById('llm-model').addEventListener('click', async (e) => {
             return;
           }
-          // Runtime estimates table
-          const probs = [0.05, 0.10, 0.20, 0.35, 0.50, 0.75, 1.00];
-          let tableRows = probs.map(pr => {
-            const rt = estimateRuntime(pqForCalc, pr);
-            return `<span style="display:inline-block;width:42px;text-align:right;color:#4ecca3">${Math.round(pr*100)}%</span>` +
-                   `<span style="color:#8899aa;margin-left:6px">${rt}</span>`;
-          }).join('<br>');
           panel.innerHTML =
             `<div style="color:#4ecca3;font-weight:600;margin-bottom:4px">${p.icon} ${p.label}</div>` +
-            `<div style="margin-bottom:6px">Quota: <b>${rem}</b>/${lim} (${pct}%)</div>` +
-            `<div style="margin-bottom:6px;font-size:10px;color:#8899aa">Estimated runtime by probability:</div>` +
-            `<div style="margin-bottom:8px;font-size:11px;line-height:1.6">${tableRows}</div>` +
-            `<div style="display:flex;align-items:center;gap:8px">` +
               `<label style="font-size:11px;color:#8899aa">Probability:</label>` +
-              `<input type="range" min="1" max="100" value="20" style="flex:1;accent-color:#4ecca3" id="llm-prob-slider">` +
-              `<span id="llm-prob-val" style="font-size:12px;color:#4ecca3;min-width:32px">20%</span>` +
             `</div>` +
-            `<div id="llm-prob-runtime" style="font-size:10px;color:#8899aa;margin:4px 0 8px 0">Runtime: ${estimateRuntime(pqForCalc, 0.20)}</div>` +
-            `<button id="llm-switch-btn" style="width:100%;padding:6px;border:none;border-radius:4px;` +
               `background:#4ecca3;color:#0a0a23;font-weight:600;cursor:pointer;font-size:12px">` +
               `Switch to ${p.label} at 20%</button>`;
           row.after(panel);
-          // Wire up slider
-          const slider = panel.querySelector('#llm-prob-slider');
-          const valLabel = panel.querySelector('#llm-prob-val');
-          const rtLabel = panel.querySelector('#llm-prob-runtime');
-          const btn = panel.querySelector('#llm-switch-btn');
           slider.addEventListener('input', () => {
             const pv = parseInt(slider.value);
             valLabel.textContent = pv + '%';
-            rtLabel.textContent = 'Runtime: ' + estimateRuntime(pqForCalc, pv / 100);
             btn.textContent = `Switch to ${p.label} at ${pv}%`;
           });
@@ -3517,7 +3509,9 @@ document.getElementById('llm-model').addEventListener('click', async (e) => {
                 body: JSON.stringify(body),
               });
               if (!r.ok) { const err = await r.json(); showToast(`LLM switch failed: ${err.detail}`, 'event'); return; }
-              showToast(`Switched to ${p.label} at ${Math.round(probVal*100)}% · ${estimateRuntime(pqForCalc, probVal)} runtime`, 'conv');
             } catch (err) { showToast('LLM switch error', 'event'); }
           });
         });

     const existing = popup.querySelectorAll('.llm-opt,.llm-quota-panel');
     existing.forEach(el => el.remove());
+    // Estimate remaining runtime based on RPM (the real bottleneck, not probability).
+    // With 50 agents, even low probability saturates the RPM rate limiter.
+    function estimateRuntime(q) {
       if (!q || q.remaining <= 0) return 'exhausted';
+      const callsPerHour = q.max_calls_per_hour || (q.rpm || 4) * 60;
       if (callsPerHour <= 0) return '∞';
       const hours = q.remaining / callsPerHour;
       if (hours >= 48) return `~${Math.round(hours / 24)}d`;
             return;
           }
+          const rpm = pqForCalc.rpm || 4;
+          const runtime = estimateRuntime(pqForCalc);
           panel.innerHTML =
             `<div style="color:#4ecca3;font-weight:600;margin-bottom:4px">${p.icon} ${p.label}</div>` +
+            `<div style="margin-bottom:4px">Quota: <b>${rem}</b>/${lim} (${pct}%)</div>` +
+            `<div style="margin-bottom:6px;font-size:10px;color:#8899aa">` +
+              `Rate limit: ${rpm} req/min · Estimated runtime: <b style="color:#4ecca3">${runtime}</b></div>` +
+            `<div style="display:flex;align-items:center;gap:8px;margin-top:6px">` +
               `<label style="font-size:11px;color:#8899aa">Probability:</label>` +
+              `<input type="range" min="1" max="100" value="20" style="flex:1;accent-color:#4ecca3" class="popup-prob-slider">` +
+              `<span class="popup-prob-val" style="font-size:12px;color:#4ecca3;min-width:32px">20%</span>` +
             `</div>` +
+            `<div style="font-size:10px;color:#8899aa;margin:2px 0 8px 0">` +
+              `Higher = more LLM decisions, lower = more routine behavior</div>` +
+            `<button class="popup-switch-btn" style="width:100%;padding:6px;border:none;border-radius:4px;` +
               `background:#4ecca3;color:#0a0a23;font-weight:600;cursor:pointer;font-size:12px">` +
               `Switch to ${p.label} at 20%</button>`;
           row.after(panel);
+          // Wire up slider (use class selectors to avoid ID collision with top slider)
+          const slider = panel.querySelector('.popup-prob-slider');
+          const valLabel = panel.querySelector('.popup-prob-val');
+          const btn = panel.querySelector('.popup-switch-btn');
           slider.addEventListener('input', () => {
             const pv = parseInt(slider.value);
             valLabel.textContent = pv + '%';
             btn.textContent = `Switch to ${p.label} at ${pv}%`;
           });
                 body: JSON.stringify(body),
               });
               if (!r.ok) { const err = await r.json(); showToast(`LLM switch failed: ${err.detail}`, 'event'); return; }
+              // 3. Sync the top probability slider
+              updateLlmProbUI(probVal);
+              showToast(`Switched to ${p.label} at ${Math.round(probVal*100)}% · ${runtime} runtime`, 'conv');
             } catch (err) { showToast('LLM switch error', 'event'); }
           });
         });