RayMelius Claude Sonnet 4.6 commited on
Commit
507f045
Β·
1 Parent(s): 89df68d

Add LLM probability slider: controls AI usage per call site

Browse files

Adds a 0-100% slider (brain emoji, in the toolbar between speed and
zoom controls) that gates every LLM call in the simulation:
- Action decisions, conversation turns, reflections, social starts,
daily plan generation β€” each rolls random() < llm_call_probability.

Provider defaults (tuned for free-tier daily quotas):
Gemini β†’ 45% (4 RPM + budget=1 β†’ ~150 calls/h β†’ ~10h at 1500 RPD)
Groq β†’ 70% (conserves daily token budget)
HF β†’ 45%
Ollama/Claude β†’ 100% (no quota)

Also reduces Gemini's _max_llm_calls_this_tick from 4 β†’ 1 so the
rate limiter (15s/call) naturally paces the simulation without
needing to burn all 4 budget slots every tick.

Override default via SOCI_LLM_PROB env var (0.0–1.0).
API: GET /api/controls β†’ llm_call_probability field
POST /api/controls/llm_probability?value=0.45

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

src/soci/api/routes.py CHANGED
@@ -693,8 +693,18 @@ async def get_events(limit: int = 50):
693
  @router.get("/controls")
694
  async def get_controls():
695
  """Get current simulation control state."""
696
- from soci.api.server import _sim_paused, _sim_speed
697
- return {"paused": _sim_paused, "speed": _sim_speed}
 
 
 
 
 
 
 
 
 
 
698
 
699
 
700
  @router.post("/controls/pause")
 
693
  @router.get("/controls")
694
  async def get_controls():
695
  """Get current simulation control state."""
696
+ from soci.api.server import _sim_paused, _sim_speed, _llm_call_probability
697
+ return {"paused": _sim_paused, "speed": _sim_speed, "llm_call_probability": _llm_call_probability}
698
+
699
+
700
+ @router.post("/controls/llm_probability")
701
+ async def set_llm_probability(value: float = 1.0):
702
+ """Set LLM call probability (0.0–1.0). Controls how often agents use LLM vs. routine behaviour.
703
+ At 0.45 with Gemini free tier: ~150 calls/h β†’ ~10h daily runtime."""
704
+ from soci.api.server import set_llm_call_probability
705
+ set_llm_call_probability(value)
706
+ from soci.api.server import _llm_call_probability
707
+ return {"llm_call_probability": _llm_call_probability}
708
 
709
 
710
  @router.post("/controls/pause")
src/soci/api/server.py CHANGED
@@ -41,6 +41,7 @@ _sim_task: Optional[asyncio.Task] = None
41
  _sim_paused: bool = False
42
  _sim_speed: float = 1.0 # 1.0 = normal, 0.5 = fast, 2.0 = slow
43
  _llm_provider: str = "" # Track which provider is active
 
44
 
45
 
46
  def get_simulation() -> Simulation:
@@ -57,6 +58,17 @@ def get_llm_provider() -> str:
57
  return _llm_provider
58
 
59
 
 
 
 
 
 
 
 
 
 
 
 
60
  async def switch_llm_provider(provider: str, model: Optional[str] = None) -> None:
61
  """Hot-swap the LLM client on the running simulation."""
62
  global _llm_provider, _simulation
@@ -97,14 +109,18 @@ async def simulation_loop(sim: Simulation, db: Database, tick_delay: float = 2.0
97
  else:
98
  sim._skip_llm_this_tick = False
99
  if is_rate_limited:
100
- # Rate-limited providers (Groq 30 RPM, Gemini 15 RPM, HF) β€” budget 4 calls/tick
 
101
  sim._max_convos_this_tick = 1
102
- sim._max_llm_calls_this_tick = 4
103
  else:
104
  # Ollama / Claude: soft cap to keep ticks responsive
105
  sim._max_convos_this_tick = 3
106
  sim._max_llm_calls_this_tick = 10
107
 
 
 
 
108
  await sim.tick()
109
 
110
  # Auto-save every 24 ticks (~6 sim-hours)
@@ -306,6 +322,21 @@ async def lifespan(app: FastAPI):
306
  llm = create_llm_client(provider=_llm_provider)
307
  logger.info(f"LLM provider: {_llm_provider} ({llm.__class__.__name__})")
308
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  db = Database()
310
  await db.connect()
311
  _database = db
 
41
  _sim_paused: bool = False
42
  _sim_speed: float = 1.0 # 1.0 = normal, 0.5 = fast, 2.0 = slow
43
  _llm_provider: str = "" # Track which provider is active
44
+ _llm_call_probability: float = 1.0 # 0.0–1.0; set per-provider on startup, adjustable via slider
45
 
46
 
47
  def get_simulation() -> Simulation:
 
58
  return _llm_provider
59
 
60
 
61
+ def get_llm_call_probability() -> float:
62
+ return _llm_call_probability
63
+
64
+
65
+ def set_llm_call_probability(value: float) -> None:
66
+ global _llm_call_probability, _simulation
67
+ _llm_call_probability = max(0.0, min(1.0, value))
68
+ if _simulation is not None:
69
+ _simulation.llm_call_probability = _llm_call_probability
70
+
71
+
72
  async def switch_llm_provider(provider: str, model: Optional[str] = None) -> None:
73
  """Hot-swap the LLM client on the running simulation."""
74
  global _llm_provider, _simulation
 
109
  else:
110
  sim._skip_llm_this_tick = False
111
  if is_rate_limited:
112
+ # Rate-limited providers: tight budget β€” probability slider does the fine-tuning.
113
+ # Gemini free tier: 4 RPM, ~1500 RPD β†’ budget=1 + prob=0.45 β‰ˆ 150 calls/h (10h).
114
  sim._max_convos_this_tick = 1
115
+ sim._max_llm_calls_this_tick = 1
116
  else:
117
  # Ollama / Claude: soft cap to keep ticks responsive
118
  sim._max_convos_this_tick = 3
119
  sim._max_llm_calls_this_tick = 10
120
 
121
+ # Apply the runtime probability slider every tick
122
+ sim.llm_call_probability = _llm_call_probability
123
+
124
  await sim.tick()
125
 
126
  # Auto-save every 24 ticks (~6 sim-hours)
 
322
  llm = create_llm_client(provider=_llm_provider)
323
  logger.info(f"LLM provider: {_llm_provider} ({llm.__class__.__name__})")
324
 
325
+ # Default LLM call probability β€” tuned per provider to stay within free-tier daily quotas.
326
+ # Gemini free tier: 4 RPM, ~1500 RPD β†’ 0.45 β‰ˆ 150 calls/h β†’ ~10h runtime per day.
327
+ # Groq free tier: 30 RPM, limited daily tokens β†’ 0.70 to conserve budget.
328
+ # Ollama / Claude: no quota β†’ 1.0 (full fidelity).
329
+ # Override via SOCI_LLM_PROB env var (0.0–1.0).
330
+ _provider_default_prob = {
331
+ PROVIDER_GEMINI: 0.45,
332
+ PROVIDER_GROQ: 0.70,
333
+ PROVIDER_HF: 0.45,
334
+ }
335
+ _llm_call_probability = float(
336
+ os.environ.get("SOCI_LLM_PROB", str(_provider_default_prob.get(_llm_provider, 1.0)))
337
+ )
338
+ logger.info(f"LLM call probability: {_llm_call_probability:.0%}")
339
+
340
  db = Database()
341
  await db.connect()
342
  _database = db
src/soci/engine/simulation.py CHANGED
@@ -64,6 +64,9 @@ class Simulation:
64
  self._max_convos_this_tick: int = 0 # 0 = no limit
65
  self._max_llm_calls_this_tick: int = 0 # 0 = no limit; global budget across all categories
66
  self._llm_calls_this_tick: int = 0 # counter, reset each tick
 
 
 
67
  # Callback for real-time output
68
  self.on_event: Optional[Callable[[str], None]] = None
69
 
@@ -169,7 +172,7 @@ class Simulation:
169
  plan_items[:8], self.clock.day,
170
  self.clock.total_ticks, self.clock.time_str,
171
  )
172
- else:
173
  plan_coros.append(self._generate_daily_plan(agent))
174
  plan_agents.append(agent)
175
 
@@ -233,7 +236,7 @@ class Simulation:
233
  continue
234
 
235
  # No routine slot β€” fallback to LLM (rare), skip in fast-forward
236
- if not self._skip_llm_this_tick:
237
  action_coros.append(self._decide_action(agent))
238
  action_agents.append(agent)
239
 
@@ -276,7 +279,7 @@ class Simulation:
276
  if next_speaker_id:
277
  responder = self.agents.get(next_speaker_id[0])
278
  other = self.agents.get(last_speaker) if last_speaker else None
279
- if responder and other:
280
  conv_coros.append(
281
  continue_conversation(conv, responder, other, self.llm, self.clock)
282
  )
@@ -303,7 +306,8 @@ class Simulation:
303
  # 7. Social: maybe start new conversations (respect speed limits + budget)
304
  if not self._skip_llm_this_tick and self._llm_budget_remaining() > 0:
305
  if self._max_convos_this_tick == 0 or len(self.active_conversations) < self._max_convos_this_tick:
306
- await self._handle_social_interactions(ordered_agents)
 
307
 
308
  # 8. Reflections for agents with enough accumulated importance
309
  if not self._skip_llm_this_tick and self._llm_budget_remaining() > 0:
@@ -311,8 +315,9 @@ class Simulation:
311
  reflect_agents = []
312
  for agent in ordered_agents:
313
  if agent.memory.should_reflect() and not agent.is_player:
314
- reflect_coros.append(self._generate_reflection(agent))
315
- reflect_agents.append(agent)
 
316
 
317
  # Limit by speed cap and global budget
318
  reflect_cap = min(
 
64
  self._max_convos_this_tick: int = 0 # 0 = no limit
65
  self._max_llm_calls_this_tick: int = 0 # 0 = no limit; global budget across all categories
66
  self._llm_calls_this_tick: int = 0 # counter, reset each tick
67
+ # LLM call probability: 0.0 = never use LLM (routine only), 1.0 = always (default).
68
+ # Applied per potential LLM call site. Tuned at 0.45 for ~10h Gemini free-tier runtime.
69
+ self.llm_call_probability: float = 1.0
70
  # Callback for real-time output
71
  self.on_event: Optional[Callable[[str], None]] = None
72
 
 
172
  plan_items[:8], self.clock.day,
173
  self.clock.total_ticks, self.clock.time_str,
174
  )
175
+ elif random.random() < self.llm_call_probability:
176
  plan_coros.append(self._generate_daily_plan(agent))
177
  plan_agents.append(agent)
178
 
 
236
  continue
237
 
238
  # No routine slot β€” fallback to LLM (rare), skip in fast-forward
239
+ if not self._skip_llm_this_tick and random.random() < self.llm_call_probability:
240
  action_coros.append(self._decide_action(agent))
241
  action_agents.append(agent)
242
 
 
279
  if next_speaker_id:
280
  responder = self.agents.get(next_speaker_id[0])
281
  other = self.agents.get(last_speaker) if last_speaker else None
282
+ if responder and other and random.random() < self.llm_call_probability:
283
  conv_coros.append(
284
  continue_conversation(conv, responder, other, self.llm, self.clock)
285
  )
 
306
  # 7. Social: maybe start new conversations (respect speed limits + budget)
307
  if not self._skip_llm_this_tick and self._llm_budget_remaining() > 0:
308
  if self._max_convos_this_tick == 0 or len(self.active_conversations) < self._max_convos_this_tick:
309
+ if random.random() < self.llm_call_probability:
310
+ await self._handle_social_interactions(ordered_agents)
311
 
312
  # 8. Reflections for agents with enough accumulated importance
313
  if not self._skip_llm_this_tick and self._llm_budget_remaining() > 0:
 
315
  reflect_agents = []
316
  for agent in ordered_agents:
317
  if agent.memory.should_reflect() and not agent.is_player:
318
+ if random.random() < self.llm_call_probability:
319
+ reflect_coros.append(self._generate_reflection(agent))
320
+ reflect_agents.append(agent)
321
 
322
  # Limit by speed cap and global budget
323
  reflect_cap = min(
web/index.html CHANGED
@@ -272,7 +272,13 @@
272
  <button class="ctrl-btn" id="btn-10x" onclick="setSpeed(0.1)" title="10x speed">10x</button>
273
  <button class="ctrl-btn" id="btn-50x" onclick="setSpeed(0.02)" title="50x speed">50x</button>
274
  <span class="speed-label" id="speed-label">1x</span>
275
- <span style="color:#1a3a6e;margin:0 2px">β”‚</span>
 
 
 
 
 
 
276
  <button class="ctrl-btn" id="btn-rect-zoom" onclick="toggleRectZoom()" title="Draw a rectangle to zoom into that area (Shift+drag)">⬚</button>
277
  <button class="ctrl-btn" onclick="zoomBy(1.3)" title="Zoom In (scroll up)">οΌ‹</button>
278
  <button class="ctrl-btn" onclick="zoomBy(1/1.3)" title="Zoom Out (scroll down)">-</button>
@@ -3013,6 +3019,7 @@ async function fetchState() {
3013
  // ============================================================
3014
  let simPaused = false;
3015
  let simSpeed = 1.0;
 
3016
 
3017
  async function togglePause() {
3018
  try {
@@ -3069,6 +3076,24 @@ function updateControlsUI() {
3069
  document.getElementById('speed-label').textContent = label;
3070
  }
3071
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3072
  async function fetchControls() {
3073
  try {
3074
  const res = await fetch(`${API_BASE}/controls`);
@@ -3076,6 +3101,7 @@ async function fetchControls() {
3076
  const data = await res.json();
3077
  simPaused = data.paused;
3078
  simSpeed = data.speed;
 
3079
  updateControlsUI();
3080
  }
3081
  } catch(e) {}
 
272
  <button class="ctrl-btn" id="btn-10x" onclick="setSpeed(0.1)" title="10x speed">10x</button>
273
  <button class="ctrl-btn" id="btn-50x" onclick="setSpeed(0.02)" title="50x speed">50x</button>
274
  <span class="speed-label" id="speed-label">1x</span>
275
+ <span style="color:#1a3a6e;margin:0 4px">β”‚</span>
276
+ <span style="font-size:10px;color:#666;white-space:nowrap" title="LLM call probability: controls how often agents use AI reasoning vs. routine behaviour. At 45% with Gemini free tier β‰ˆ 10h daily runtime.">🧠</span>
277
+ <input type="range" id="llm-prob-slider" min="0" max="100" value="100" step="5"
278
+ style="width:64px;height:6px;accent-color:#4ecca3;cursor:pointer;vertical-align:middle;"
279
+ oninput="onLlmProbSlider(this.value)" title="LLM usage probability">
280
+ <span id="llm-prob-label" style="font-size:10px;color:#4ecca3;min-width:28px;text-align:right;">100%</span>
281
+ <span style="color:#1a3a6e;margin:0 4px">β”‚</span>
282
  <button class="ctrl-btn" id="btn-rect-zoom" onclick="toggleRectZoom()" title="Draw a rectangle to zoom into that area (Shift+drag)">⬚</button>
283
  <button class="ctrl-btn" onclick="zoomBy(1.3)" title="Zoom In (scroll up)">οΌ‹</button>
284
  <button class="ctrl-btn" onclick="zoomBy(1/1.3)" title="Zoom Out (scroll down)">-</button>
 
3019
  // ============================================================
3020
  let simPaused = false;
3021
  let simSpeed = 1.0;
3022
+ let llmCallProbability = 1.0;
3023
 
3024
  async function togglePause() {
3025
  try {
 
3076
  document.getElementById('speed-label').textContent = label;
3077
  }
3078
 
3079
+ async function onLlmProbSlider(val) {
3080
+ const pct = parseInt(val);
3081
+ document.getElementById('llm-prob-label').textContent = pct + '%';
3082
+ llmCallProbability = pct / 100;
3083
+ try {
3084
+ await fetch(`${API_BASE}/controls/llm_probability?value=${llmCallProbability}`, { method: 'POST' });
3085
+ } catch(e) {}
3086
+ }
3087
+
3088
+ function updateLlmProbUI(prob) {
3089
+ llmCallProbability = prob;
3090
+ const pct = Math.round(prob * 100);
3091
+ const slider = document.getElementById('llm-prob-slider');
3092
+ const label = document.getElementById('llm-prob-label');
3093
+ if (slider) slider.value = pct;
3094
+ if (label) label.textContent = pct + '%';
3095
+ }
3096
+
3097
  async function fetchControls() {
3098
  try {
3099
  const res = await fetch(`${API_BASE}/controls`);
 
3101
  const data = await res.json();
3102
  simPaused = data.paused;
3103
  simSpeed = data.speed;
3104
+ if (data.llm_call_probability !== undefined) updateLlmProbUI(data.llm_call_probability);
3105
  updateControlsUI();
3106
  }
3107
  } catch(e) {}