RayMelius Claude Sonnet 4.6 commited on
Commit
ccac7ac
Β·
1 Parent(s): 5347dfd

Show live LLM status in header (active/limited/skipped/idle)

Browse files

Each LLM client now exposes an llm_status property:
- ClaudeClient: tracks _rate_limited_until on RateLimitError
- GroqClient / GeminiClient: reports circuit-breaker state
- OllamaClient: flags recent connection failures

get_state_summary() now includes llm_status, llm_calls_last_tick, and
llm_skipped so the WebSocket state carries everything the UI needs.

Header model pill gains a coloured dot:
green β€” calls are happening this tick
yellow β€” idle (no LLM calls needed)
orange/red β€” quota or rate limit hit
grey β€” LLM skipped (fast / 50x mode)
Tooltip shows provider, full model name, and a short status reason.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

src/soci/engine/llm.py CHANGED
@@ -138,6 +138,7 @@ class ClaudeClient:
138
  self.max_retries = max_retries
139
  self.usage = LLMUsage()
140
  self.provider = PROVIDER_CLAUDE
 
141
 
142
  async def complete(
143
  self,
@@ -168,6 +169,7 @@ class ClaudeClient:
168
 
169
  except anthropic.RateLimitError:
170
  wait = 2 ** attempt
 
171
  logger.warning(f"Rate limited, waiting {wait}s (attempt {attempt + 1})")
172
  time.sleep(wait)
173
  except anthropic.APIError as e:
@@ -175,8 +177,15 @@ class ClaudeClient:
175
  if attempt == self.max_retries - 1:
176
  raise
177
  time.sleep(1)
 
178
  return ""
179
 
 
 
 
 
 
 
180
  async def complete_json(
181
  self,
182
  system: str,
@@ -223,6 +232,13 @@ class OllamaClient:
223
  self.usage = LLMUsage()
224
  self.provider = PROVIDER_OLLAMA
225
  self._http = httpx.AsyncClient(timeout=180.0)
 
 
 
 
 
 
 
226
 
227
  async def complete(
228
  self,
@@ -265,6 +281,7 @@ class OllamaClient:
265
  return data.get("message", {}).get("content", "")
266
 
267
  except httpx.ConnectError:
 
268
  msg = (
269
  f"Cannot connect to Ollama at {self.base_url}. "
270
  "Make sure Ollama is running: 'ollama serve'"
@@ -587,6 +604,10 @@ class GroqClient:
587
  }
588
  return mapping.get(model, model)
589
 
 
 
 
 
590
 
591
  # ============================================================
592
  # Google Gemini Client (free tier via OpenAI-compatible endpoint)
@@ -651,6 +672,10 @@ class GeminiClient:
651
  }
652
  return mapping.get(model, model)
653
 
 
 
 
 
654
  async def complete(
655
  self,
656
  system: str,
 
138
  self.max_retries = max_retries
139
  self.usage = LLMUsage()
140
  self.provider = PROVIDER_CLAUDE
141
+ self._rate_limited_until: float = 0.0 # monotonic timestamp
142
 
143
  async def complete(
144
  self,
 
169
 
170
  except anthropic.RateLimitError:
171
  wait = 2 ** attempt
172
+ self._rate_limited_until = time.monotonic() + wait
173
  logger.warning(f"Rate limited, waiting {wait}s (attempt {attempt + 1})")
174
  time.sleep(wait)
175
  except anthropic.APIError as e:
 
177
  if attempt == self.max_retries - 1:
178
  raise
179
  time.sleep(1)
180
+ self._rate_limited_until = time.monotonic() + 60 # mark as limited after all retries failed
181
  return ""
182
 
183
+ @property
184
+ def llm_status(self) -> str:
185
+ if time.monotonic() < self._rate_limited_until:
186
+ return "limited"
187
+ return "active"
188
+
189
  async def complete_json(
190
  self,
191
  system: str,
 
232
  self.usage = LLMUsage()
233
  self.provider = PROVIDER_OLLAMA
234
  self._http = httpx.AsyncClient(timeout=180.0)
235
+ self._last_error: float = 0.0 # monotonic timestamp of last connection failure
236
+
237
+ @property
238
+ def llm_status(self) -> str:
239
+ if time.monotonic() - self._last_error < 30:
240
+ return "limited" # recent connection error
241
+ return "active"
242
 
243
  async def complete(
244
  self,
 
281
  return data.get("message", {}).get("content", "")
282
 
283
  except httpx.ConnectError:
284
+ self._last_error = time.monotonic()
285
  msg = (
286
  f"Cannot connect to Ollama at {self.base_url}. "
287
  "Make sure Ollama is running: 'ollama serve'"
 
604
  }
605
  return mapping.get(model, model)
606
 
607
+ @property
608
+ def llm_status(self) -> str:
609
+ return "limited" if self._is_quota_exhausted() else "active"
610
+
611
 
612
  # ============================================================
613
  # Google Gemini Client (free tier via OpenAI-compatible endpoint)
 
672
  }
673
  return mapping.get(model, model)
674
 
675
+ @property
676
+ def llm_status(self) -> str:
677
+ return "limited" if self._is_quota_exhausted() else "active"
678
+
679
  async def complete(
680
  self,
681
  system: str,
src/soci/engine/simulation.py CHANGED
@@ -838,6 +838,9 @@ class Simulation:
838
  "active_conversations": len(self.active_conversations),
839
  "llm_provider": getattr(self.llm, "provider", "unknown"),
840
  "llm_model": getattr(self.llm, "default_model", "unknown"),
 
 
 
841
  "llm_usage": self.llm.usage.summary(),
842
  }
843
 
 
838
  "active_conversations": len(self.active_conversations),
839
  "llm_provider": getattr(self.llm, "provider", "unknown"),
840
  "llm_model": getattr(self.llm, "default_model", "unknown"),
841
+ "llm_status": getattr(self.llm, "llm_status", "active"),
842
+ "llm_calls_last_tick": self._llm_calls_this_tick,
843
+ "llm_skipped": self._skip_llm_this_tick,
844
  "llm_usage": self.llm.usage.summary(),
845
  }
846
 
web/index.html CHANGED
@@ -2871,9 +2871,20 @@ function processStateData(data) {
2871
  .replace(/^gemini-/, ''); // "gemini-2.0-flash" β†’ "2.0-flash"
2872
  const providerIcon = { gemini: '✦', groq: '⚑', claude: 'β—†', ollama: 'πŸ¦™' };
2873
  const icon = providerIcon[data.llm_provider] || '⚑';
 
 
 
 
 
 
 
 
 
 
 
2874
  const el = document.getElementById('llm-model');
2875
- el.textContent = `${icon} ${label}`;
2876
- el.title = `${data.llm_provider}: ${data.llm_model}`;
2877
  }
2878
 
2879
  agents = data.agents || {};
 
2871
  .replace(/^gemini-/, ''); // "gemini-2.0-flash" β†’ "2.0-flash"
2872
  const providerIcon = { gemini: '✦', groq: '⚑', claude: 'β—†', ollama: 'πŸ¦™' };
2873
  const icon = providerIcon[data.llm_provider] || '⚑';
2874
+
2875
+ // Status: limited > skipped > idle > active (calls happening)
2876
+ const isLimited = data.llm_status === 'limited';
2877
+ const isSkipped = data.llm_skipped === true;
2878
+ const hasCalls = (data.llm_calls_last_tick || 0) > 0;
2879
+ let dotColor, statusTip;
2880
+ if (isLimited) { dotColor = '#e94560'; statusTip = 'quota / rate limit hit'; }
2881
+ else if (isSkipped) { dotColor = '#666'; statusTip = 'LLM skipped (fast mode)'; }
2882
+ else if (hasCalls) { dotColor = '#4ecca3'; statusTip = `${data.llm_calls_last_tick} calls this tick`; }
2883
+ else { dotColor = '#f0c040'; statusTip = 'idle β€” no calls needed'; }
2884
+
2885
  const el = document.getElementById('llm-model');
2886
+ el.innerHTML = `${icon} ${label} <span style="display:inline-block;width:7px;height:7px;border-radius:50%;background:${dotColor};vertical-align:middle;margin-left:2px"></span>`;
2887
+ el.title = `${data.llm_provider}: ${data.llm_model} β€” ${statusTip}`;
2888
  }
2889
 
2890
  agents = data.agents || {};