Spaces:

RayMelius
/

soci2

Paused

RayMelius Claude Sonnet 4.6 commited on Feb 22

Commit

ccac7ac

1 Parent(s): 5347dfd

Show live LLM status in header (active/limited/skipped/idle)

Each LLM client now exposes an llm_status property:
- ClaudeClient: tracks _rate_limited_until on RateLimitError
- GroqClient / GeminiClient: reports circuit-breaker state
- OllamaClient: flags recent connection failures

get_state_summary() now includes llm_status, llm_calls_last_tick, and
llm_skipped so the WebSocket state carries everything the UI needs.

Header model pill gains a coloured dot:
green — calls are happening this tick
yellow — idle (no LLM calls needed)
orange/red — quota or rate limit hit
grey — LLM skipped (fast / 50x mode)
Tooltip shows provider, full model name, and a short status reason.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (3) hide show

src/soci/engine/llm.py +25 -0
src/soci/engine/simulation.py +3 -0
web/index.html +13 -2

src/soci/engine/llm.py CHANGED Viewed

@@ -138,6 +138,7 @@ class ClaudeClient:
         self.max_retries = max_retries
         self.usage = LLMUsage()
         self.provider = PROVIDER_CLAUDE
     async def complete(
         self,
@@ -168,6 +169,7 @@ class ClaudeClient:
             except anthropic.RateLimitError:
                 wait = 2 ** attempt
                 logger.warning(f"Rate limited, waiting {wait}s (attempt {attempt + 1})")
                 time.sleep(wait)
             except anthropic.APIError as e:
@@ -175,8 +177,15 @@ class ClaudeClient:
                 if attempt == self.max_retries - 1:
                     raise
                 time.sleep(1)
         return ""
     async def complete_json(
         self,
         system: str,
@@ -223,6 +232,13 @@ class OllamaClient:
         self.usage = LLMUsage()
         self.provider = PROVIDER_OLLAMA
         self._http = httpx.AsyncClient(timeout=180.0)
     async def complete(
         self,
@@ -265,6 +281,7 @@ class OllamaClient:
                 return data.get("message", {}).get("content", "")
             except httpx.ConnectError:
                 msg = (
                     f"Cannot connect to Ollama at {self.base_url}. "
                     "Make sure Ollama is running: 'ollama serve'"
@@ -587,6 +604,10 @@ class GroqClient:
         }
         return mapping.get(model, model)
 # ============================================================
 # Google Gemini Client (free tier via OpenAI-compatible endpoint)
@@ -651,6 +672,10 @@ class GeminiClient:
         }
         return mapping.get(model, model)
     async def complete(
         self,
         system: str,

         self.max_retries = max_retries
         self.usage = LLMUsage()
         self.provider = PROVIDER_CLAUDE
+        self._rate_limited_until: float = 0.0  # monotonic timestamp
     async def complete(
         self,
             except anthropic.RateLimitError:
                 wait = 2 ** attempt
+                self._rate_limited_until = time.monotonic() + wait
                 logger.warning(f"Rate limited, waiting {wait}s (attempt {attempt + 1})")
                 time.sleep(wait)
             except anthropic.APIError as e:
                 if attempt == self.max_retries - 1:
                     raise
                 time.sleep(1)
+        self._rate_limited_until = time.monotonic() + 60  # mark as limited after all retries failed
         return ""
+    @property
+    def llm_status(self) -> str:
+        if time.monotonic() < self._rate_limited_until:
+            return "limited"
+        return "active"
     async def complete_json(
         self,
         system: str,
         self.usage = LLMUsage()
         self.provider = PROVIDER_OLLAMA
         self._http = httpx.AsyncClient(timeout=180.0)
+        self._last_error: float = 0.0  # monotonic timestamp of last connection failure
+    @property
+    def llm_status(self) -> str:
+        if time.monotonic() - self._last_error < 30:
+            return "limited"   # recent connection error
+        return "active"
     async def complete(
         self,
                 return data.get("message", {}).get("content", "")
             except httpx.ConnectError:
+                self._last_error = time.monotonic()
                 msg = (
                     f"Cannot connect to Ollama at {self.base_url}. "
                     "Make sure Ollama is running: 'ollama serve'"
         }
         return mapping.get(model, model)
+    @property
+    def llm_status(self) -> str:
+        return "limited" if self._is_quota_exhausted() else "active"
 # ============================================================
 # Google Gemini Client (free tier via OpenAI-compatible endpoint)
         }
         return mapping.get(model, model)
+    @property
+    def llm_status(self) -> str:
+        return "limited" if self._is_quota_exhausted() else "active"
     async def complete(
         self,
         system: str,

src/soci/engine/simulation.py CHANGED Viewed

@@ -838,6 +838,9 @@ class Simulation:
             "active_conversations": len(self.active_conversations),
             "llm_provider": getattr(self.llm, "provider", "unknown"),
             "llm_model": getattr(self.llm, "default_model", "unknown"),
             "llm_usage": self.llm.usage.summary(),
         }

             "active_conversations": len(self.active_conversations),
             "llm_provider": getattr(self.llm, "provider", "unknown"),
             "llm_model": getattr(self.llm, "default_model", "unknown"),
+            "llm_status": getattr(self.llm, "llm_status", "active"),
+            "llm_calls_last_tick": self._llm_calls_this_tick,
+            "llm_skipped": self._skip_llm_this_tick,
             "llm_usage": self.llm.usage.summary(),
         }

web/index.html CHANGED Viewed

@@ -2871,9 +2871,20 @@ function processStateData(data) {
       .replace(/^gemini-/, '');        // "gemini-2.0-flash" → "2.0-flash"
     const providerIcon = { gemini: '✦', groq: '⚡', claude: '◆', ollama: '🦙' };
     const icon = providerIcon[data.llm_provider] || '⚡';
     const el = document.getElementById('llm-model');
-    el.textContent = `${icon} ${label}`;
-    el.title = `${data.llm_provider}: ${data.llm_model}`;
   }
   agents = data.agents || {};

       .replace(/^gemini-/, '');        // "gemini-2.0-flash" → "2.0-flash"
     const providerIcon = { gemini: '✦', groq: '⚡', claude: '◆', ollama: '🦙' };
     const icon = providerIcon[data.llm_provider] || '⚡';
+    // Status: limited > skipped > idle > active (calls happening)
+    const isLimited  = data.llm_status === 'limited';
+    const isSkipped  = data.llm_skipped === true;
+    const hasCalls   = (data.llm_calls_last_tick || 0) > 0;
+    let dotColor, statusTip;
+    if (isLimited)       { dotColor = '#e94560'; statusTip = 'quota / rate limit hit'; }
+    else if (isSkipped)  { dotColor = '#666';    statusTip = 'LLM skipped (fast mode)'; }
+    else if (hasCalls)   { dotColor = '#4ecca3'; statusTip = `${data.llm_calls_last_tick} calls this tick`; }
+    else                 { dotColor = '#f0c040'; statusTip = 'idle — no calls needed'; }
     const el = document.getElementById('llm-model');
+    el.innerHTML = `${icon} ${label} <span style="display:inline-block;width:7px;height:7px;border-radius:50%;background:${dotColor};vertical-align:middle;margin-left:2px"></span>`;
+    el.title = `${data.llm_provider}: ${data.llm_model} — ${statusTip}`;
   }
   agents = data.agents || {};