Show live LLM status in header (active/limited/skipped/idle)
Browse filesEach LLM client now exposes an llm_status property:
- ClaudeClient: tracks _rate_limited_until on RateLimitError
- GroqClient / GeminiClient: reports circuit-breaker state
- OllamaClient: flags recent connection failures
get_state_summary() now includes llm_status, llm_calls_last_tick, and
llm_skipped so the WebSocket state carries everything the UI needs.
Header model pill gains a coloured dot:
green β calls are happening this tick
yellow β idle (no LLM calls needed)
orange/red β quota or rate limit hit
grey β LLM skipped (fast / 50x mode)
Tooltip shows provider, full model name, and a short status reason.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- src/soci/engine/llm.py +25 -0
- src/soci/engine/simulation.py +3 -0
- web/index.html +13 -2
src/soci/engine/llm.py
CHANGED
|
@@ -138,6 +138,7 @@ class ClaudeClient:
|
|
| 138 |
self.max_retries = max_retries
|
| 139 |
self.usage = LLMUsage()
|
| 140 |
self.provider = PROVIDER_CLAUDE
|
|
|
|
| 141 |
|
| 142 |
async def complete(
|
| 143 |
self,
|
|
@@ -168,6 +169,7 @@ class ClaudeClient:
|
|
| 168 |
|
| 169 |
except anthropic.RateLimitError:
|
| 170 |
wait = 2 ** attempt
|
|
|
|
| 171 |
logger.warning(f"Rate limited, waiting {wait}s (attempt {attempt + 1})")
|
| 172 |
time.sleep(wait)
|
| 173 |
except anthropic.APIError as e:
|
|
@@ -175,8 +177,15 @@ class ClaudeClient:
|
|
| 175 |
if attempt == self.max_retries - 1:
|
| 176 |
raise
|
| 177 |
time.sleep(1)
|
|
|
|
| 178 |
return ""
|
| 179 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
async def complete_json(
|
| 181 |
self,
|
| 182 |
system: str,
|
|
@@ -223,6 +232,13 @@ class OllamaClient:
|
|
| 223 |
self.usage = LLMUsage()
|
| 224 |
self.provider = PROVIDER_OLLAMA
|
| 225 |
self._http = httpx.AsyncClient(timeout=180.0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
|
| 227 |
async def complete(
|
| 228 |
self,
|
|
@@ -265,6 +281,7 @@ class OllamaClient:
|
|
| 265 |
return data.get("message", {}).get("content", "")
|
| 266 |
|
| 267 |
except httpx.ConnectError:
|
|
|
|
| 268 |
msg = (
|
| 269 |
f"Cannot connect to Ollama at {self.base_url}. "
|
| 270 |
"Make sure Ollama is running: 'ollama serve'"
|
|
@@ -587,6 +604,10 @@ class GroqClient:
|
|
| 587 |
}
|
| 588 |
return mapping.get(model, model)
|
| 589 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 590 |
|
| 591 |
# ============================================================
|
| 592 |
# Google Gemini Client (free tier via OpenAI-compatible endpoint)
|
|
@@ -651,6 +672,10 @@ class GeminiClient:
|
|
| 651 |
}
|
| 652 |
return mapping.get(model, model)
|
| 653 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 654 |
async def complete(
|
| 655 |
self,
|
| 656 |
system: str,
|
|
|
|
| 138 |
self.max_retries = max_retries
|
| 139 |
self.usage = LLMUsage()
|
| 140 |
self.provider = PROVIDER_CLAUDE
|
| 141 |
+
self._rate_limited_until: float = 0.0 # monotonic timestamp
|
| 142 |
|
| 143 |
async def complete(
|
| 144 |
self,
|
|
|
|
| 169 |
|
| 170 |
except anthropic.RateLimitError:
|
| 171 |
wait = 2 ** attempt
|
| 172 |
+
self._rate_limited_until = time.monotonic() + wait
|
| 173 |
logger.warning(f"Rate limited, waiting {wait}s (attempt {attempt + 1})")
|
| 174 |
time.sleep(wait)
|
| 175 |
except anthropic.APIError as e:
|
|
|
|
| 177 |
if attempt == self.max_retries - 1:
|
| 178 |
raise
|
| 179 |
time.sleep(1)
|
| 180 |
+
self._rate_limited_until = time.monotonic() + 60 # mark as limited after all retries failed
|
| 181 |
return ""
|
| 182 |
|
| 183 |
+
@property
|
| 184 |
+
def llm_status(self) -> str:
|
| 185 |
+
if time.monotonic() < self._rate_limited_until:
|
| 186 |
+
return "limited"
|
| 187 |
+
return "active"
|
| 188 |
+
|
| 189 |
async def complete_json(
|
| 190 |
self,
|
| 191 |
system: str,
|
|
|
|
| 232 |
self.usage = LLMUsage()
|
| 233 |
self.provider = PROVIDER_OLLAMA
|
| 234 |
self._http = httpx.AsyncClient(timeout=180.0)
|
| 235 |
+
self._last_error: float = 0.0 # monotonic timestamp of last connection failure
|
| 236 |
+
|
| 237 |
+
@property
|
| 238 |
+
def llm_status(self) -> str:
|
| 239 |
+
if time.monotonic() - self._last_error < 30:
|
| 240 |
+
return "limited" # recent connection error
|
| 241 |
+
return "active"
|
| 242 |
|
| 243 |
async def complete(
|
| 244 |
self,
|
|
|
|
| 281 |
return data.get("message", {}).get("content", "")
|
| 282 |
|
| 283 |
except httpx.ConnectError:
|
| 284 |
+
self._last_error = time.monotonic()
|
| 285 |
msg = (
|
| 286 |
f"Cannot connect to Ollama at {self.base_url}. "
|
| 287 |
"Make sure Ollama is running: 'ollama serve'"
|
|
|
|
| 604 |
}
|
| 605 |
return mapping.get(model, model)
|
| 606 |
|
| 607 |
+
@property
|
| 608 |
+
def llm_status(self) -> str:
|
| 609 |
+
return "limited" if self._is_quota_exhausted() else "active"
|
| 610 |
+
|
| 611 |
|
| 612 |
# ============================================================
|
| 613 |
# Google Gemini Client (free tier via OpenAI-compatible endpoint)
|
|
|
|
| 672 |
}
|
| 673 |
return mapping.get(model, model)
|
| 674 |
|
| 675 |
+
@property
|
| 676 |
+
def llm_status(self) -> str:
|
| 677 |
+
return "limited" if self._is_quota_exhausted() else "active"
|
| 678 |
+
|
| 679 |
async def complete(
|
| 680 |
self,
|
| 681 |
system: str,
|
src/soci/engine/simulation.py
CHANGED
|
@@ -838,6 +838,9 @@ class Simulation:
|
|
| 838 |
"active_conversations": len(self.active_conversations),
|
| 839 |
"llm_provider": getattr(self.llm, "provider", "unknown"),
|
| 840 |
"llm_model": getattr(self.llm, "default_model", "unknown"),
|
|
|
|
|
|
|
|
|
|
| 841 |
"llm_usage": self.llm.usage.summary(),
|
| 842 |
}
|
| 843 |
|
|
|
|
| 838 |
"active_conversations": len(self.active_conversations),
|
| 839 |
"llm_provider": getattr(self.llm, "provider", "unknown"),
|
| 840 |
"llm_model": getattr(self.llm, "default_model", "unknown"),
|
| 841 |
+
"llm_status": getattr(self.llm, "llm_status", "active"),
|
| 842 |
+
"llm_calls_last_tick": self._llm_calls_this_tick,
|
| 843 |
+
"llm_skipped": self._skip_llm_this_tick,
|
| 844 |
"llm_usage": self.llm.usage.summary(),
|
| 845 |
}
|
| 846 |
|
web/index.html
CHANGED
|
@@ -2871,9 +2871,20 @@ function processStateData(data) {
|
|
| 2871 |
.replace(/^gemini-/, ''); // "gemini-2.0-flash" β "2.0-flash"
|
| 2872 |
const providerIcon = { gemini: 'β¦', groq: 'β‘', claude: 'β', ollama: 'π¦' };
|
| 2873 |
const icon = providerIcon[data.llm_provider] || 'β‘';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2874 |
const el = document.getElementById('llm-model');
|
| 2875 |
-
el.
|
| 2876 |
-
el.title = `${data.llm_provider}: ${data.llm_model}`;
|
| 2877 |
}
|
| 2878 |
|
| 2879 |
agents = data.agents || {};
|
|
|
|
| 2871 |
.replace(/^gemini-/, ''); // "gemini-2.0-flash" β "2.0-flash"
|
| 2872 |
const providerIcon = { gemini: 'β¦', groq: 'β‘', claude: 'β', ollama: 'π¦' };
|
| 2873 |
const icon = providerIcon[data.llm_provider] || 'β‘';
|
| 2874 |
+
|
| 2875 |
+
// Status: limited > skipped > idle > active (calls happening)
|
| 2876 |
+
const isLimited = data.llm_status === 'limited';
|
| 2877 |
+
const isSkipped = data.llm_skipped === true;
|
| 2878 |
+
const hasCalls = (data.llm_calls_last_tick || 0) > 0;
|
| 2879 |
+
let dotColor, statusTip;
|
| 2880 |
+
if (isLimited) { dotColor = '#e94560'; statusTip = 'quota / rate limit hit'; }
|
| 2881 |
+
else if (isSkipped) { dotColor = '#666'; statusTip = 'LLM skipped (fast mode)'; }
|
| 2882 |
+
else if (hasCalls) { dotColor = '#4ecca3'; statusTip = `${data.llm_calls_last_tick} calls this tick`; }
|
| 2883 |
+
else { dotColor = '#f0c040'; statusTip = 'idle β no calls needed'; }
|
| 2884 |
+
|
| 2885 |
const el = document.getElementById('llm-model');
|
| 2886 |
+
el.innerHTML = `${icon} ${label} <span style="display:inline-block;width:7px;height:7px;border-radius:50%;background:${dotColor};vertical-align:middle;margin-left:2px"></span>`;
|
| 2887 |
+
el.title = `${data.llm_provider}: ${data.llm_model} β ${statusTip}`;
|
| 2888 |
}
|
| 2889 |
|
| 2890 |
agents = data.agents || {};
|