Add LLM probability slider: controls AI usage per call site
Browse filesAdds a 0-100% slider (brain emoji, in the toolbar between speed and
zoom controls) that gates every LLM call in the simulation:
- Action decisions, conversation turns, reflections, social starts,
daily plan generation β each rolls random() < llm_call_probability.
Provider defaults (tuned for free-tier daily quotas):
Gemini β 45% (4 RPM + budget=1 β ~150 calls/h β ~10h at 1500 RPD)
Groq β 70% (conserves daily token budget)
HF β 45%
Ollama/Claude β 100% (no quota)
Also reduces Gemini's _max_llm_calls_this_tick from 4 β 1 so the
rate limiter (15s/call) naturally paces the simulation without
needing to burn all 4 budget slots every tick.
Override default via SOCI_LLM_PROB env var (0.0β1.0).
API: GET /api/controls β llm_call_probability field
POST /api/controls/llm_probability?value=0.45
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- src/soci/api/routes.py +12 -2
- src/soci/api/server.py +33 -2
- src/soci/engine/simulation.py +11 -6
- web/index.html +27 -1
|
@@ -693,8 +693,18 @@ async def get_events(limit: int = 50):
|
|
| 693 |
@router.get("/controls")
|
| 694 |
async def get_controls():
|
| 695 |
"""Get current simulation control state."""
|
| 696 |
-
from soci.api.server import _sim_paused, _sim_speed
|
| 697 |
-
return {"paused": _sim_paused, "speed": _sim_speed}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 698 |
|
| 699 |
|
| 700 |
@router.post("/controls/pause")
|
|
|
|
| 693 |
@router.get("/controls")
|
| 694 |
async def get_controls():
|
| 695 |
"""Get current simulation control state."""
|
| 696 |
+
from soci.api.server import _sim_paused, _sim_speed, _llm_call_probability
|
| 697 |
+
return {"paused": _sim_paused, "speed": _sim_speed, "llm_call_probability": _llm_call_probability}
|
| 698 |
+
|
| 699 |
+
|
| 700 |
+
@router.post("/controls/llm_probability")
|
| 701 |
+
async def set_llm_probability(value: float = 1.0):
|
| 702 |
+
"""Set LLM call probability (0.0β1.0). Controls how often agents use LLM vs. routine behaviour.
|
| 703 |
+
At 0.45 with Gemini free tier: ~150 calls/h β ~10h daily runtime."""
|
| 704 |
+
from soci.api.server import set_llm_call_probability
|
| 705 |
+
set_llm_call_probability(value)
|
| 706 |
+
from soci.api.server import _llm_call_probability
|
| 707 |
+
return {"llm_call_probability": _llm_call_probability}
|
| 708 |
|
| 709 |
|
| 710 |
@router.post("/controls/pause")
|
|
@@ -41,6 +41,7 @@ _sim_task: Optional[asyncio.Task] = None
|
|
| 41 |
_sim_paused: bool = False
|
| 42 |
_sim_speed: float = 1.0 # 1.0 = normal, 0.5 = fast, 2.0 = slow
|
| 43 |
_llm_provider: str = "" # Track which provider is active
|
|
|
|
| 44 |
|
| 45 |
|
| 46 |
def get_simulation() -> Simulation:
|
|
@@ -57,6 +58,17 @@ def get_llm_provider() -> str:
|
|
| 57 |
return _llm_provider
|
| 58 |
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
async def switch_llm_provider(provider: str, model: Optional[str] = None) -> None:
|
| 61 |
"""Hot-swap the LLM client on the running simulation."""
|
| 62 |
global _llm_provider, _simulation
|
|
@@ -97,14 +109,18 @@ async def simulation_loop(sim: Simulation, db: Database, tick_delay: float = 2.0
|
|
| 97 |
else:
|
| 98 |
sim._skip_llm_this_tick = False
|
| 99 |
if is_rate_limited:
|
| 100 |
-
# Rate-limited providers
|
|
|
|
| 101 |
sim._max_convos_this_tick = 1
|
| 102 |
-
sim._max_llm_calls_this_tick =
|
| 103 |
else:
|
| 104 |
# Ollama / Claude: soft cap to keep ticks responsive
|
| 105 |
sim._max_convos_this_tick = 3
|
| 106 |
sim._max_llm_calls_this_tick = 10
|
| 107 |
|
|
|
|
|
|
|
|
|
|
| 108 |
await sim.tick()
|
| 109 |
|
| 110 |
# Auto-save every 24 ticks (~6 sim-hours)
|
|
@@ -306,6 +322,21 @@ async def lifespan(app: FastAPI):
|
|
| 306 |
llm = create_llm_client(provider=_llm_provider)
|
| 307 |
logger.info(f"LLM provider: {_llm_provider} ({llm.__class__.__name__})")
|
| 308 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
db = Database()
|
| 310 |
await db.connect()
|
| 311 |
_database = db
|
|
|
|
| 41 |
_sim_paused: bool = False
|
| 42 |
_sim_speed: float = 1.0 # 1.0 = normal, 0.5 = fast, 2.0 = slow
|
| 43 |
_llm_provider: str = "" # Track which provider is active
|
| 44 |
+
_llm_call_probability: float = 1.0 # 0.0β1.0; set per-provider on startup, adjustable via slider
|
| 45 |
|
| 46 |
|
| 47 |
def get_simulation() -> Simulation:
|
|
|
|
| 58 |
return _llm_provider
|
| 59 |
|
| 60 |
|
| 61 |
+
def get_llm_call_probability() -> float:
|
| 62 |
+
return _llm_call_probability
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def set_llm_call_probability(value: float) -> None:
|
| 66 |
+
global _llm_call_probability, _simulation
|
| 67 |
+
_llm_call_probability = max(0.0, min(1.0, value))
|
| 68 |
+
if _simulation is not None:
|
| 69 |
+
_simulation.llm_call_probability = _llm_call_probability
|
| 70 |
+
|
| 71 |
+
|
| 72 |
async def switch_llm_provider(provider: str, model: Optional[str] = None) -> None:
|
| 73 |
"""Hot-swap the LLM client on the running simulation."""
|
| 74 |
global _llm_provider, _simulation
|
|
|
|
| 109 |
else:
|
| 110 |
sim._skip_llm_this_tick = False
|
| 111 |
if is_rate_limited:
|
| 112 |
+
# Rate-limited providers: tight budget β probability slider does the fine-tuning.
|
| 113 |
+
# Gemini free tier: 4 RPM, ~1500 RPD β budget=1 + prob=0.45 β 150 calls/h (10h).
|
| 114 |
sim._max_convos_this_tick = 1
|
| 115 |
+
sim._max_llm_calls_this_tick = 1
|
| 116 |
else:
|
| 117 |
# Ollama / Claude: soft cap to keep ticks responsive
|
| 118 |
sim._max_convos_this_tick = 3
|
| 119 |
sim._max_llm_calls_this_tick = 10
|
| 120 |
|
| 121 |
+
# Apply the runtime probability slider every tick
|
| 122 |
+
sim.llm_call_probability = _llm_call_probability
|
| 123 |
+
|
| 124 |
await sim.tick()
|
| 125 |
|
| 126 |
# Auto-save every 24 ticks (~6 sim-hours)
|
|
|
|
| 322 |
llm = create_llm_client(provider=_llm_provider)
|
| 323 |
logger.info(f"LLM provider: {_llm_provider} ({llm.__class__.__name__})")
|
| 324 |
|
| 325 |
+
# Default LLM call probability β tuned per provider to stay within free-tier daily quotas.
|
| 326 |
+
# Gemini free tier: 4 RPM, ~1500 RPD β 0.45 β 150 calls/h β ~10h runtime per day.
|
| 327 |
+
# Groq free tier: 30 RPM, limited daily tokens β 0.70 to conserve budget.
|
| 328 |
+
# Ollama / Claude: no quota β 1.0 (full fidelity).
|
| 329 |
+
# Override via SOCI_LLM_PROB env var (0.0β1.0).
|
| 330 |
+
_provider_default_prob = {
|
| 331 |
+
PROVIDER_GEMINI: 0.45,
|
| 332 |
+
PROVIDER_GROQ: 0.70,
|
| 333 |
+
PROVIDER_HF: 0.45,
|
| 334 |
+
}
|
| 335 |
+
_llm_call_probability = float(
|
| 336 |
+
os.environ.get("SOCI_LLM_PROB", str(_provider_default_prob.get(_llm_provider, 1.0)))
|
| 337 |
+
)
|
| 338 |
+
logger.info(f"LLM call probability: {_llm_call_probability:.0%}")
|
| 339 |
+
|
| 340 |
db = Database()
|
| 341 |
await db.connect()
|
| 342 |
_database = db
|
|
@@ -64,6 +64,9 @@ class Simulation:
|
|
| 64 |
self._max_convos_this_tick: int = 0 # 0 = no limit
|
| 65 |
self._max_llm_calls_this_tick: int = 0 # 0 = no limit; global budget across all categories
|
| 66 |
self._llm_calls_this_tick: int = 0 # counter, reset each tick
|
|
|
|
|
|
|
|
|
|
| 67 |
# Callback for real-time output
|
| 68 |
self.on_event: Optional[Callable[[str], None]] = None
|
| 69 |
|
|
@@ -169,7 +172,7 @@ class Simulation:
|
|
| 169 |
plan_items[:8], self.clock.day,
|
| 170 |
self.clock.total_ticks, self.clock.time_str,
|
| 171 |
)
|
| 172 |
-
|
| 173 |
plan_coros.append(self._generate_daily_plan(agent))
|
| 174 |
plan_agents.append(agent)
|
| 175 |
|
|
@@ -233,7 +236,7 @@ class Simulation:
|
|
| 233 |
continue
|
| 234 |
|
| 235 |
# No routine slot β fallback to LLM (rare), skip in fast-forward
|
| 236 |
-
if not self._skip_llm_this_tick:
|
| 237 |
action_coros.append(self._decide_action(agent))
|
| 238 |
action_agents.append(agent)
|
| 239 |
|
|
@@ -276,7 +279,7 @@ class Simulation:
|
|
| 276 |
if next_speaker_id:
|
| 277 |
responder = self.agents.get(next_speaker_id[0])
|
| 278 |
other = self.agents.get(last_speaker) if last_speaker else None
|
| 279 |
-
if responder and other:
|
| 280 |
conv_coros.append(
|
| 281 |
continue_conversation(conv, responder, other, self.llm, self.clock)
|
| 282 |
)
|
|
@@ -303,7 +306,8 @@ class Simulation:
|
|
| 303 |
# 7. Social: maybe start new conversations (respect speed limits + budget)
|
| 304 |
if not self._skip_llm_this_tick and self._llm_budget_remaining() > 0:
|
| 305 |
if self._max_convos_this_tick == 0 or len(self.active_conversations) < self._max_convos_this_tick:
|
| 306 |
-
|
|
|
|
| 307 |
|
| 308 |
# 8. Reflections for agents with enough accumulated importance
|
| 309 |
if not self._skip_llm_this_tick and self._llm_budget_remaining() > 0:
|
|
@@ -311,8 +315,9 @@ class Simulation:
|
|
| 311 |
reflect_agents = []
|
| 312 |
for agent in ordered_agents:
|
| 313 |
if agent.memory.should_reflect() and not agent.is_player:
|
| 314 |
-
|
| 315 |
-
|
|
|
|
| 316 |
|
| 317 |
# Limit by speed cap and global budget
|
| 318 |
reflect_cap = min(
|
|
|
|
| 64 |
self._max_convos_this_tick: int = 0 # 0 = no limit
|
| 65 |
self._max_llm_calls_this_tick: int = 0 # 0 = no limit; global budget across all categories
|
| 66 |
self._llm_calls_this_tick: int = 0 # counter, reset each tick
|
| 67 |
+
# LLM call probability: 0.0 = never use LLM (routine only), 1.0 = always (default).
|
| 68 |
+
# Applied per potential LLM call site. Tuned at 0.45 for ~10h Gemini free-tier runtime.
|
| 69 |
+
self.llm_call_probability: float = 1.0
|
| 70 |
# Callback for real-time output
|
| 71 |
self.on_event: Optional[Callable[[str], None]] = None
|
| 72 |
|
|
|
|
| 172 |
plan_items[:8], self.clock.day,
|
| 173 |
self.clock.total_ticks, self.clock.time_str,
|
| 174 |
)
|
| 175 |
+
elif random.random() < self.llm_call_probability:
|
| 176 |
plan_coros.append(self._generate_daily_plan(agent))
|
| 177 |
plan_agents.append(agent)
|
| 178 |
|
|
|
|
| 236 |
continue
|
| 237 |
|
| 238 |
# No routine slot β fallback to LLM (rare), skip in fast-forward
|
| 239 |
+
if not self._skip_llm_this_tick and random.random() < self.llm_call_probability:
|
| 240 |
action_coros.append(self._decide_action(agent))
|
| 241 |
action_agents.append(agent)
|
| 242 |
|
|
|
|
| 279 |
if next_speaker_id:
|
| 280 |
responder = self.agents.get(next_speaker_id[0])
|
| 281 |
other = self.agents.get(last_speaker) if last_speaker else None
|
| 282 |
+
if responder and other and random.random() < self.llm_call_probability:
|
| 283 |
conv_coros.append(
|
| 284 |
continue_conversation(conv, responder, other, self.llm, self.clock)
|
| 285 |
)
|
|
|
|
| 306 |
# 7. Social: maybe start new conversations (respect speed limits + budget)
|
| 307 |
if not self._skip_llm_this_tick and self._llm_budget_remaining() > 0:
|
| 308 |
if self._max_convos_this_tick == 0 or len(self.active_conversations) < self._max_convos_this_tick:
|
| 309 |
+
if random.random() < self.llm_call_probability:
|
| 310 |
+
await self._handle_social_interactions(ordered_agents)
|
| 311 |
|
| 312 |
# 8. Reflections for agents with enough accumulated importance
|
| 313 |
if not self._skip_llm_this_tick and self._llm_budget_remaining() > 0:
|
|
|
|
| 315 |
reflect_agents = []
|
| 316 |
for agent in ordered_agents:
|
| 317 |
if agent.memory.should_reflect() and not agent.is_player:
|
| 318 |
+
if random.random() < self.llm_call_probability:
|
| 319 |
+
reflect_coros.append(self._generate_reflection(agent))
|
| 320 |
+
reflect_agents.append(agent)
|
| 321 |
|
| 322 |
# Limit by speed cap and global budget
|
| 323 |
reflect_cap = min(
|
|
@@ -272,7 +272,13 @@
|
|
| 272 |
<button class="ctrl-btn" id="btn-10x" onclick="setSpeed(0.1)" title="10x speed">10x</button>
|
| 273 |
<button class="ctrl-btn" id="btn-50x" onclick="setSpeed(0.02)" title="50x speed">50x</button>
|
| 274 |
<span class="speed-label" id="speed-label">1x</span>
|
| 275 |
-
<span style="color:#1a3a6e;margin:0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
<button class="ctrl-btn" id="btn-rect-zoom" onclick="toggleRectZoom()" title="Draw a rectangle to zoom into that area (Shift+drag)">β¬</button>
|
| 277 |
<button class="ctrl-btn" onclick="zoomBy(1.3)" title="Zoom In (scroll up)">οΌ</button>
|
| 278 |
<button class="ctrl-btn" onclick="zoomBy(1/1.3)" title="Zoom Out (scroll down)">οΌ</button>
|
|
@@ -3013,6 +3019,7 @@ async function fetchState() {
|
|
| 3013 |
// ============================================================
|
| 3014 |
let simPaused = false;
|
| 3015 |
let simSpeed = 1.0;
|
|
|
|
| 3016 |
|
| 3017 |
async function togglePause() {
|
| 3018 |
try {
|
|
@@ -3069,6 +3076,24 @@ function updateControlsUI() {
|
|
| 3069 |
document.getElementById('speed-label').textContent = label;
|
| 3070 |
}
|
| 3071 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3072 |
async function fetchControls() {
|
| 3073 |
try {
|
| 3074 |
const res = await fetch(`${API_BASE}/controls`);
|
|
@@ -3076,6 +3101,7 @@ async function fetchControls() {
|
|
| 3076 |
const data = await res.json();
|
| 3077 |
simPaused = data.paused;
|
| 3078 |
simSpeed = data.speed;
|
|
|
|
| 3079 |
updateControlsUI();
|
| 3080 |
}
|
| 3081 |
} catch(e) {}
|
|
|
|
| 272 |
<button class="ctrl-btn" id="btn-10x" onclick="setSpeed(0.1)" title="10x speed">10x</button>
|
| 273 |
<button class="ctrl-btn" id="btn-50x" onclick="setSpeed(0.02)" title="50x speed">50x</button>
|
| 274 |
<span class="speed-label" id="speed-label">1x</span>
|
| 275 |
+
<span style="color:#1a3a6e;margin:0 4px">β</span>
|
| 276 |
+
<span style="font-size:10px;color:#666;white-space:nowrap" title="LLM call probability: controls how often agents use AI reasoning vs. routine behaviour. At 45% with Gemini free tier β 10h daily runtime.">π§ </span>
|
| 277 |
+
<input type="range" id="llm-prob-slider" min="0" max="100" value="100" step="5"
|
| 278 |
+
style="width:64px;height:6px;accent-color:#4ecca3;cursor:pointer;vertical-align:middle;"
|
| 279 |
+
oninput="onLlmProbSlider(this.value)" title="LLM usage probability">
|
| 280 |
+
<span id="llm-prob-label" style="font-size:10px;color:#4ecca3;min-width:28px;text-align:right;">100%</span>
|
| 281 |
+
<span style="color:#1a3a6e;margin:0 4px">β</span>
|
| 282 |
<button class="ctrl-btn" id="btn-rect-zoom" onclick="toggleRectZoom()" title="Draw a rectangle to zoom into that area (Shift+drag)">β¬</button>
|
| 283 |
<button class="ctrl-btn" onclick="zoomBy(1.3)" title="Zoom In (scroll up)">οΌ</button>
|
| 284 |
<button class="ctrl-btn" onclick="zoomBy(1/1.3)" title="Zoom Out (scroll down)">οΌ</button>
|
|
|
|
| 3019 |
// ============================================================
|
| 3020 |
let simPaused = false;
|
| 3021 |
let simSpeed = 1.0;
|
| 3022 |
+
let llmCallProbability = 1.0;
|
| 3023 |
|
| 3024 |
async function togglePause() {
|
| 3025 |
try {
|
|
|
|
| 3076 |
document.getElementById('speed-label').textContent = label;
|
| 3077 |
}
|
| 3078 |
|
| 3079 |
+
async function onLlmProbSlider(val) {
|
| 3080 |
+
const pct = parseInt(val);
|
| 3081 |
+
document.getElementById('llm-prob-label').textContent = pct + '%';
|
| 3082 |
+
llmCallProbability = pct / 100;
|
| 3083 |
+
try {
|
| 3084 |
+
await fetch(`${API_BASE}/controls/llm_probability?value=${llmCallProbability}`, { method: 'POST' });
|
| 3085 |
+
} catch(e) {}
|
| 3086 |
+
}
|
| 3087 |
+
|
| 3088 |
+
function updateLlmProbUI(prob) {
|
| 3089 |
+
llmCallProbability = prob;
|
| 3090 |
+
const pct = Math.round(prob * 100);
|
| 3091 |
+
const slider = document.getElementById('llm-prob-slider');
|
| 3092 |
+
const label = document.getElementById('llm-prob-label');
|
| 3093 |
+
if (slider) slider.value = pct;
|
| 3094 |
+
if (label) label.textContent = pct + '%';
|
| 3095 |
+
}
|
| 3096 |
+
|
| 3097 |
async function fetchControls() {
|
| 3098 |
try {
|
| 3099 |
const res = await fetch(`${API_BASE}/controls`);
|
|
|
|
| 3101 |
const data = await res.json();
|
| 3102 |
simPaused = data.paused;
|
| 3103 |
simSpeed = data.speed;
|
| 3104 |
+
if (data.llm_call_probability !== undefined) updateLlmProbUI(data.llm_call_probability);
|
| 3105 |
updateControlsUI();
|
| 3106 |
}
|
| 3107 |
} catch(e) {}
|