Spaces:

RayMelius
/

soci2

Running

RayMelius Claude Sonnet 4.6 commited on 16 days ago

Commit

0e4c818

1 Parent(s): da342a7

Persist LLM probability across restarts; fix Gemini model fallback

- Add `settings` table to SQLite DB for key-value config persistence
- On startup: load saved llm_call_probability from DB (env var still wins);
env var value is also written back so other workstations inherit it
- On slider change: persist new value to DB immediately
- GeminiClient: auto-fallback to gemini-1.5-flash when configured model
returns 400/404 "not found" on the serverless endpoint; tracks unavailable
models per-instance so future calls skip straight to fallback
- Add _GEMINI_FALLBACK_CHAIN and MODEL_GEMINI_FLASH_FALLBACK constants

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (4) hide show

src/soci/api/routes.py +4 -1
src/soci/api/server.py +14 -4
src/soci/engine/llm.py +69 -14
src/soci/persistence/database.py +23 -0

src/soci/api/routes.py CHANGED Viewed

@@ -701,9 +701,12 @@ async def get_controls():
 async def set_llm_probability(value: float = 1.0):
     """Set LLM call probability (0.0–1.0). Controls how often agents use LLM vs. routine behaviour.
     At 0.45 with Gemini free tier: ~150 calls/h → ~10h daily runtime."""
-    from soci.api.server import set_llm_call_probability
     set_llm_call_probability(value)
     from soci.api.server import _llm_call_probability
     return {"llm_call_probability": _llm_call_probability}

 async def set_llm_probability(value: float = 1.0):
     """Set LLM call probability (0.0–1.0). Controls how often agents use LLM vs. routine behaviour.
     At 0.45 with Gemini free tier: ~150 calls/h → ~10h daily runtime."""
+    from soci.api.server import set_llm_call_probability, get_database
     set_llm_call_probability(value)
     from soci.api.server import _llm_call_probability
+    # Persist so the value survives restarts and is shared across workstations via the DB
+    db = get_database()
+    await db.set_setting("llm_call_probability", str(_llm_call_probability))
     return {"llm_call_probability": _llm_call_probability}

src/soci/api/server.py CHANGED Viewed

@@ -332,15 +332,25 @@ async def lifespan(app: FastAPI):
         PROVIDER_GROQ: 0.70,
         PROVIDER_HF: 0.45,
     }
-    _llm_call_probability = float(
-        os.environ.get("SOCI_LLM_PROB", str(_provider_default_prob.get(_llm_provider, 1.0)))
-    )
-    logger.info(f"LLM call probability: {_llm_call_probability:.0%}")
     db = Database()
     await db.connect()
     _database = db
     # Pull saved state from GitHub before trying to load locally
     data_dir = Path(os.environ.get("SOCI_DATA_DIR", "data"))
     await load_state_from_github(data_dir)

         PROVIDER_GROQ: 0.70,
         PROVIDER_HF: 0.45,
     }
+    env_prob = os.environ.get("SOCI_LLM_PROB")
     db = Database()
     await db.connect()
     _database = db
+    if env_prob is not None:
+        # Env var always wins; also save it so other workstations inherit it
+        _llm_call_probability = float(env_prob)
+        await db.set_setting("llm_call_probability", str(_llm_call_probability))
+    else:
+        # Prefer the last slider value saved in the DB, fall back to provider default
+        saved = await db.get_setting("llm_call_probability")
+        if saved is not None:
+            _llm_call_probability = float(saved)
+        else:
+            _llm_call_probability = _provider_default_prob.get(_llm_provider, 1.0)
+    logger.info(f"LLM call probability: {_llm_call_probability:.0%}")
     # Pull saved state from GitHub before trying to load locally
     data_dir = Path(os.environ.get("SOCI_DATA_DIR", "data"))
     await load_state_from_github(data_dir)

src/soci/engine/llm.py CHANGED Viewed

@@ -39,8 +39,16 @@ MODEL_GROQ_MIXTRAL = "mixtral-8x7b-32768"
 # Google Gemini model IDs (free tier via AI Studio)
 MODEL_GEMINI_FLASH = "gemini-2.0-flash"
 MODEL_GEMINI_PRO = "gemini-1.5-pro"
 # Hugging Face router model IDs (router.huggingface.co/v1 — auto-routes to best provider)
 MODEL_HF_QWEN = "Qwen/Qwen2.5-7B-Instruct"          # default — auto-routed, great quality
 MODEL_HF_LLAMA = "meta-llama/Llama-3.2-3B-Instruct"
@@ -669,6 +677,9 @@ class GeminiClient:
         self._last_request_time: float = 0.0
         self._rate_lock = asyncio.Lock()
         self._rate_limited_until: float = 0.0
         # Daily usage tracking — resets at midnight Pacific (UTC-8/-7)
         self._daily_limit: int = int(os.environ.get("GEMINI_DAILY_LIMIT", str(daily_limit)))
         self._daily_requests: int = 0
@@ -731,7 +742,29 @@ class GeminiClient:
             MODEL_HAIKU: self.default_model,
             MODEL_GROQ_LLAMA_8B: MODEL_GEMINI_FLASH,
         }
-        return mapping.get(model, model)
     @property
     def llm_status(self) -> str:
@@ -772,27 +805,38 @@ class GeminiClient:
                 self._track_daily_request()
                 return data["choices"][0]["message"]["content"]
             except httpx.HTTPStatusError as e:
-                if e.response.status_code == 429:
                     retry_after = e.response.headers.get("retry-after", "5")
                     try:
                         wait = float(retry_after)
                     except (ValueError, TypeError):
                         wait = 5.0
-                    body_raw = e.response.text or ""
                     # Daily quota exhausted — Gemini sends retry-after:5 even for daily limits,
                     # so detect via message body and circuit-break until midnight Pacific.
                     if "quota" in body_raw.lower() or wait > 30:
                         circuit_wait = self._secs_until_pacific_midnight()
                         self._rate_limited_until = time.monotonic() + circuit_wait
-                        body = body_raw[:200].replace("{", "(").replace("}", ")")
                         logger.warning(f"Gemini daily quota exhausted — circuit-breaking for {circuit_wait/3600:.1f}h (until midnight Pacific): {body}")
                         return ""
-                    body = body_raw[:200].replace("{", "(").replace("}", ")")
                     logger.warning(f"Gemini 429: {body} — waiting {wait}s")
                     await asyncio.sleep(wait)
                 else:
-                    body = e.response.text[:200].replace("{", "(").replace("}", ")")
-                    logger.error(f"Gemini HTTP error: {e.response.status_code} {body}")
                     if attempt == self.max_retries - 1:
                         return ""
                     await asyncio.sleep(1)
@@ -844,25 +888,36 @@ class GeminiClient:
                 text = data["choices"][0]["message"]["content"]
                 return _parse_json_response(text)
             except httpx.HTTPStatusError as e:
-                if e.response.status_code == 429:
                     retry_after = e.response.headers.get("retry-after", "5")
                     try:
                         wait = float(retry_after)
                     except (ValueError, TypeError):
                         wait = 5.0
-                    body_raw = e.response.text or ""
                     if "quota" in body_raw.lower() or wait > 30:
-                        circuit_wait = max(wait, 28800)  # 8 hours
                         self._rate_limited_until = time.monotonic() + circuit_wait
-                        body = body_raw[:200].replace("{", "(").replace("}", ")")
                         logger.warning(f"Gemini daily quota exhausted — circuit-breaking for {circuit_wait/3600:.1f}h: {body}")
                         return {}
-                    body = body_raw[:200].replace("{", "(").replace("}", ")")
                     logger.warning(f"Gemini 429 (json): {body} — waiting {wait}s")
                     await asyncio.sleep(wait)
                 else:
-                    body = e.response.text[:200].replace("{", "(").replace("}", ")")
-                    logger.error(f"Gemini JSON error: {e.response.status_code} {body}")
                     if attempt == self.max_retries - 1:
                         return {}
                     await asyncio.sleep(1)

 # Google Gemini model IDs (free tier via AI Studio)
 MODEL_GEMINI_FLASH = "gemini-2.0-flash"
+MODEL_GEMINI_FLASH_FALLBACK = "gemini-1.5-flash"  # fallback if 2.0-flash unavailable
 MODEL_GEMINI_PRO = "gemini-1.5-pro"
+# Models to try in order if a model is not available on the serverless endpoint
+_GEMINI_FALLBACK_CHAIN: dict[str, str] = {
+    "gemini-2.0-flash": MODEL_GEMINI_FLASH_FALLBACK,
+    "gemini-2.0-flash-exp": MODEL_GEMINI_FLASH_FALLBACK,
+    "gemini-2.0-flash-001": MODEL_GEMINI_FLASH_FALLBACK,
+}
 # Hugging Face router model IDs (router.huggingface.co/v1 — auto-routes to best provider)
 MODEL_HF_QWEN = "Qwen/Qwen2.5-7B-Instruct"          # default — auto-routed, great quality
 MODEL_HF_LLAMA = "meta-llama/Llama-3.2-3B-Instruct"
         self._last_request_time: float = 0.0
         self._rate_lock = asyncio.Lock()
         self._rate_limited_until: float = 0.0
+        # Automatic model fallback: if the configured model is unavailable on the endpoint,
+        # we silently downgrade to the next in the chain (e.g. 2.0-flash → 1.5-flash).
+        self._unavailable_models: set[str] = set()
         # Daily usage tracking — resets at midnight Pacific (UTC-8/-7)
         self._daily_limit: int = int(os.environ.get("GEMINI_DAILY_LIMIT", str(daily_limit)))
         self._daily_requests: int = 0
             MODEL_HAIKU: self.default_model,
             MODEL_GROQ_LLAMA_8B: MODEL_GEMINI_FLASH,
         }
+        mapped = mapping.get(model, model)
+        # If the mapped model is known unavailable, walk the fallback chain
+        while mapped in self._unavailable_models:
+            fallback = _GEMINI_FALLBACK_CHAIN.get(mapped)
+            if fallback is None or fallback == mapped:
+                break
+            mapped = fallback
+        return mapped
+    def _handle_model_not_found(self, model: str) -> Optional[str]:
+        """Mark model unavailable and return the fallback model ID, or None if no fallback."""
+        self._unavailable_models.add(model)
+        # Update default_model so future calls skip straight to the fallback
+        if self.default_model == model:
+            fallback = _GEMINI_FALLBACK_CHAIN.get(model)
+            if fallback:
+                self.default_model = fallback
+                logger.warning(
+                    f"Gemini model '{model}' not available on this endpoint — "
+                    f"switching to '{fallback}' for all future calls"
+                )
+                return fallback
+        return None
     @property
     def llm_status(self) -> str:
                 self._track_daily_request()
                 return data["choices"][0]["message"]["content"]
             except httpx.HTTPStatusError as e:
+                status = e.response.status_code
+                body_raw = e.response.text or ""
+                body = body_raw[:200].replace("{", "(").replace("}", ")")
+                if status == 429:
                     retry_after = e.response.headers.get("retry-after", "5")
                     try:
                         wait = float(retry_after)
                     except (ValueError, TypeError):
                         wait = 5.0
                     # Daily quota exhausted — Gemini sends retry-after:5 even for daily limits,
                     # so detect via message body and circuit-break until midnight Pacific.
                     if "quota" in body_raw.lower() or wait > 30:
                         circuit_wait = self._secs_until_pacific_midnight()
                         self._rate_limited_until = time.monotonic() + circuit_wait
                         logger.warning(f"Gemini daily quota exhausted — circuit-breaking for {circuit_wait/3600:.1f}h (until midnight Pacific): {body}")
                         return ""
                     logger.warning(f"Gemini 429: {body} — waiting {wait}s")
                     await asyncio.sleep(wait)
+                elif status in (400, 404) and any(
+                    kw in body_raw.lower()
+                    for kw in ("not found", "not supported", "invalid argument", "does not exist", "unavailable")
+                ):
+                    # Model not available on this endpoint — try fallback
+                    fallback = self._handle_model_not_found(model)
+                    if fallback:
+                        model = fallback
+                        payload["model"] = model
+                        continue  # retry immediately with fallback model
+                    logger.error(f"Gemini model '{model}' not found and no fallback: {body}")
+                    return ""
                 else:
+                    logger.error(f"Gemini HTTP error: {status} {body}")
                     if attempt == self.max_retries - 1:
                         return ""
                     await asyncio.sleep(1)
                 text = data["choices"][0]["message"]["content"]
                 return _parse_json_response(text)
             except httpx.HTTPStatusError as e:
+                status = e.response.status_code
+                body_raw = e.response.text or ""
+                body = body_raw[:200].replace("{", "(").replace("}", ")")
+                if status == 429:
                     retry_after = e.response.headers.get("retry-after", "5")
                     try:
                         wait = float(retry_after)
                     except (ValueError, TypeError):
                         wait = 5.0
                     if "quota" in body_raw.lower() or wait > 30:
+                        circuit_wait = self._secs_until_pacific_midnight()
                         self._rate_limited_until = time.monotonic() + circuit_wait
                         logger.warning(f"Gemini daily quota exhausted — circuit-breaking for {circuit_wait/3600:.1f}h: {body}")
                         return {}
                     logger.warning(f"Gemini 429 (json): {body} — waiting {wait}s")
                     await asyncio.sleep(wait)
+                elif status in (400, 404) and any(
+                    kw in body_raw.lower()
+                    for kw in ("not found", "not supported", "invalid argument", "does not exist", "unavailable")
+                ):
+                    # Model not available on this endpoint — try fallback
+                    fallback = self._handle_model_not_found(model)
+                    if fallback:
+                        model = fallback
+                        payload["model"] = model
+                        continue  # retry immediately with fallback model
+                    logger.error(f"Gemini model '{model}' not found and no fallback: {body}")
+                    return {}
                 else:
+                    logger.error(f"Gemini JSON error: {status} {body}")
                     if attempt == self.max_retries - 1:
                         return {}
                     await asyncio.sleep(1)

src/soci/persistence/database.py CHANGED Viewed

@@ -68,6 +68,11 @@ CREATE TABLE IF NOT EXISTS users (
     agent_id      TEXT,
     created_at    TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 );
 """
@@ -252,3 +257,21 @@ class Database:
         assert self._db is not None
         await self._db.execute("UPDATE users SET token = NULL WHERE token = ?", (token,))
         await self._db.commit()

     agent_id      TEXT,
     created_at    TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 );
+CREATE TABLE IF NOT EXISTS settings (
+    key   TEXT PRIMARY KEY,
+    value TEXT NOT NULL
+);
 """
         assert self._db is not None
         await self._db.execute("UPDATE users SET token = NULL WHERE token = ?", (token,))
         await self._db.commit()
+    # ── Settings / persistent config ─────────────────────────────────────────
+    async def get_setting(self, key: str, default: Optional[str] = None) -> Optional[str]:
+        """Read a persisted setting by key."""
+        assert self._db is not None
+        cursor = await self._db.execute("SELECT value FROM settings WHERE key = ?", (key,))
+        row = await cursor.fetchone()
+        return row[0] if row else default
+    async def set_setting(self, key: str, value: str) -> None:
+        """Upsert a persisted setting."""
+        assert self._db is not None
+        await self._db.execute(
+            "INSERT INTO settings (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value = excluded.value",
+            (key, value),
+        )
+        await self._db.commit()