RayMelius Claude Sonnet 4.6 commited on
Commit
0e4c818
Β·
1 Parent(s): da342a7

Persist LLM probability across restarts; fix Gemini model fallback

Browse files

- Add `settings` table to SQLite DB for key-value config persistence
- On startup: load saved llm_call_probability from DB (env var still wins);
env var value is also written back so other workstations inherit it
- On slider change: persist new value to DB immediately
- GeminiClient: auto-fallback to gemini-1.5-flash when configured model
returns 400/404 "not found" on the serverless endpoint; tracks unavailable
models per-instance so future calls skip straight to fallback
- Add _GEMINI_FALLBACK_CHAIN and MODEL_GEMINI_FLASH_FALLBACK constants

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

src/soci/api/routes.py CHANGED
@@ -701,9 +701,12 @@ async def get_controls():
701
  async def set_llm_probability(value: float = 1.0):
702
  """Set LLM call probability (0.0–1.0). Controls how often agents use LLM vs. routine behaviour.
703
  At 0.45 with Gemini free tier: ~150 calls/h β†’ ~10h daily runtime."""
704
- from soci.api.server import set_llm_call_probability
705
  set_llm_call_probability(value)
706
  from soci.api.server import _llm_call_probability
 
 
 
707
  return {"llm_call_probability": _llm_call_probability}
708
 
709
 
 
701
  async def set_llm_probability(value: float = 1.0):
702
  """Set LLM call probability (0.0–1.0). Controls how often agents use LLM vs. routine behaviour.
703
  At 0.45 with Gemini free tier: ~150 calls/h β†’ ~10h daily runtime."""
704
+ from soci.api.server import set_llm_call_probability, get_database
705
  set_llm_call_probability(value)
706
  from soci.api.server import _llm_call_probability
707
+ # Persist so the value survives restarts and is shared across workstations via the DB
708
+ db = get_database()
709
+ await db.set_setting("llm_call_probability", str(_llm_call_probability))
710
  return {"llm_call_probability": _llm_call_probability}
711
 
712
 
src/soci/api/server.py CHANGED
@@ -332,15 +332,25 @@ async def lifespan(app: FastAPI):
332
  PROVIDER_GROQ: 0.70,
333
  PROVIDER_HF: 0.45,
334
  }
335
- _llm_call_probability = float(
336
- os.environ.get("SOCI_LLM_PROB", str(_provider_default_prob.get(_llm_provider, 1.0)))
337
- )
338
- logger.info(f"LLM call probability: {_llm_call_probability:.0%}")
339
 
340
  db = Database()
341
  await db.connect()
342
  _database = db
343
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
  # Pull saved state from GitHub before trying to load locally
345
  data_dir = Path(os.environ.get("SOCI_DATA_DIR", "data"))
346
  await load_state_from_github(data_dir)
 
332
  PROVIDER_GROQ: 0.70,
333
  PROVIDER_HF: 0.45,
334
  }
335
+ env_prob = os.environ.get("SOCI_LLM_PROB")
 
 
 
336
 
337
  db = Database()
338
  await db.connect()
339
  _database = db
340
 
341
+ if env_prob is not None:
342
+ # Env var always wins; also save it so other workstations inherit it
343
+ _llm_call_probability = float(env_prob)
344
+ await db.set_setting("llm_call_probability", str(_llm_call_probability))
345
+ else:
346
+ # Prefer the last slider value saved in the DB, fall back to provider default
347
+ saved = await db.get_setting("llm_call_probability")
348
+ if saved is not None:
349
+ _llm_call_probability = float(saved)
350
+ else:
351
+ _llm_call_probability = _provider_default_prob.get(_llm_provider, 1.0)
352
+ logger.info(f"LLM call probability: {_llm_call_probability:.0%}")
353
+
354
  # Pull saved state from GitHub before trying to load locally
355
  data_dir = Path(os.environ.get("SOCI_DATA_DIR", "data"))
356
  await load_state_from_github(data_dir)
src/soci/engine/llm.py CHANGED
@@ -39,8 +39,16 @@ MODEL_GROQ_MIXTRAL = "mixtral-8x7b-32768"
39
 
40
  # Google Gemini model IDs (free tier via AI Studio)
41
  MODEL_GEMINI_FLASH = "gemini-2.0-flash"
 
42
  MODEL_GEMINI_PRO = "gemini-1.5-pro"
43
 
 
 
 
 
 
 
 
44
  # Hugging Face router model IDs (router.huggingface.co/v1 β€” auto-routes to best provider)
45
  MODEL_HF_QWEN = "Qwen/Qwen2.5-7B-Instruct" # default β€” auto-routed, great quality
46
  MODEL_HF_LLAMA = "meta-llama/Llama-3.2-3B-Instruct"
@@ -669,6 +677,9 @@ class GeminiClient:
669
  self._last_request_time: float = 0.0
670
  self._rate_lock = asyncio.Lock()
671
  self._rate_limited_until: float = 0.0
 
 
 
672
  # Daily usage tracking β€” resets at midnight Pacific (UTC-8/-7)
673
  self._daily_limit: int = int(os.environ.get("GEMINI_DAILY_LIMIT", str(daily_limit)))
674
  self._daily_requests: int = 0
@@ -731,7 +742,29 @@ class GeminiClient:
731
  MODEL_HAIKU: self.default_model,
732
  MODEL_GROQ_LLAMA_8B: MODEL_GEMINI_FLASH,
733
  }
734
- return mapping.get(model, model)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
735
 
736
  @property
737
  def llm_status(self) -> str:
@@ -772,27 +805,38 @@ class GeminiClient:
772
  self._track_daily_request()
773
  return data["choices"][0]["message"]["content"]
774
  except httpx.HTTPStatusError as e:
775
- if e.response.status_code == 429:
 
 
 
776
  retry_after = e.response.headers.get("retry-after", "5")
777
  try:
778
  wait = float(retry_after)
779
  except (ValueError, TypeError):
780
  wait = 5.0
781
- body_raw = e.response.text or ""
782
  # Daily quota exhausted β€” Gemini sends retry-after:5 even for daily limits,
783
  # so detect via message body and circuit-break until midnight Pacific.
784
  if "quota" in body_raw.lower() or wait > 30:
785
  circuit_wait = self._secs_until_pacific_midnight()
786
  self._rate_limited_until = time.monotonic() + circuit_wait
787
- body = body_raw[:200].replace("{", "(").replace("}", ")")
788
  logger.warning(f"Gemini daily quota exhausted β€” circuit-breaking for {circuit_wait/3600:.1f}h (until midnight Pacific): {body}")
789
  return ""
790
- body = body_raw[:200].replace("{", "(").replace("}", ")")
791
  logger.warning(f"Gemini 429: {body} β€” waiting {wait}s")
792
  await asyncio.sleep(wait)
 
 
 
 
 
 
 
 
 
 
 
 
793
  else:
794
- body = e.response.text[:200].replace("{", "(").replace("}", ")")
795
- logger.error(f"Gemini HTTP error: {e.response.status_code} {body}")
796
  if attempt == self.max_retries - 1:
797
  return ""
798
  await asyncio.sleep(1)
@@ -844,25 +888,36 @@ class GeminiClient:
844
  text = data["choices"][0]["message"]["content"]
845
  return _parse_json_response(text)
846
  except httpx.HTTPStatusError as e:
847
- if e.response.status_code == 429:
 
 
 
848
  retry_after = e.response.headers.get("retry-after", "5")
849
  try:
850
  wait = float(retry_after)
851
  except (ValueError, TypeError):
852
  wait = 5.0
853
- body_raw = e.response.text or ""
854
  if "quota" in body_raw.lower() or wait > 30:
855
- circuit_wait = max(wait, 28800) # 8 hours
856
  self._rate_limited_until = time.monotonic() + circuit_wait
857
- body = body_raw[:200].replace("{", "(").replace("}", ")")
858
  logger.warning(f"Gemini daily quota exhausted β€” circuit-breaking for {circuit_wait/3600:.1f}h: {body}")
859
  return {}
860
- body = body_raw[:200].replace("{", "(").replace("}", ")")
861
  logger.warning(f"Gemini 429 (json): {body} β€” waiting {wait}s")
862
  await asyncio.sleep(wait)
 
 
 
 
 
 
 
 
 
 
 
 
863
  else:
864
- body = e.response.text[:200].replace("{", "(").replace("}", ")")
865
- logger.error(f"Gemini JSON error: {e.response.status_code} {body}")
866
  if attempt == self.max_retries - 1:
867
  return {}
868
  await asyncio.sleep(1)
 
39
 
40
  # Google Gemini model IDs (free tier via AI Studio)
41
  MODEL_GEMINI_FLASH = "gemini-2.0-flash"
42
+ MODEL_GEMINI_FLASH_FALLBACK = "gemini-1.5-flash" # fallback if 2.0-flash unavailable
43
  MODEL_GEMINI_PRO = "gemini-1.5-pro"
44
 
45
+ # Models to try in order if a model is not available on the serverless endpoint
46
+ _GEMINI_FALLBACK_CHAIN: dict[str, str] = {
47
+ "gemini-2.0-flash": MODEL_GEMINI_FLASH_FALLBACK,
48
+ "gemini-2.0-flash-exp": MODEL_GEMINI_FLASH_FALLBACK,
49
+ "gemini-2.0-flash-001": MODEL_GEMINI_FLASH_FALLBACK,
50
+ }
51
+
52
  # Hugging Face router model IDs (router.huggingface.co/v1 β€” auto-routes to best provider)
53
  MODEL_HF_QWEN = "Qwen/Qwen2.5-7B-Instruct" # default β€” auto-routed, great quality
54
  MODEL_HF_LLAMA = "meta-llama/Llama-3.2-3B-Instruct"
 
677
  self._last_request_time: float = 0.0
678
  self._rate_lock = asyncio.Lock()
679
  self._rate_limited_until: float = 0.0
680
+ # Automatic model fallback: if the configured model is unavailable on the endpoint,
681
+ # we silently downgrade to the next in the chain (e.g. 2.0-flash β†’ 1.5-flash).
682
+ self._unavailable_models: set[str] = set()
683
  # Daily usage tracking β€” resets at midnight Pacific (UTC-8/-7)
684
  self._daily_limit: int = int(os.environ.get("GEMINI_DAILY_LIMIT", str(daily_limit)))
685
  self._daily_requests: int = 0
 
742
  MODEL_HAIKU: self.default_model,
743
  MODEL_GROQ_LLAMA_8B: MODEL_GEMINI_FLASH,
744
  }
745
+ mapped = mapping.get(model, model)
746
+ # If the mapped model is known unavailable, walk the fallback chain
747
+ while mapped in self._unavailable_models:
748
+ fallback = _GEMINI_FALLBACK_CHAIN.get(mapped)
749
+ if fallback is None or fallback == mapped:
750
+ break
751
+ mapped = fallback
752
+ return mapped
753
+
754
+ def _handle_model_not_found(self, model: str) -> Optional[str]:
755
+ """Mark model unavailable and return the fallback model ID, or None if no fallback."""
756
+ self._unavailable_models.add(model)
757
+ # Update default_model so future calls skip straight to the fallback
758
+ if self.default_model == model:
759
+ fallback = _GEMINI_FALLBACK_CHAIN.get(model)
760
+ if fallback:
761
+ self.default_model = fallback
762
+ logger.warning(
763
+ f"Gemini model '{model}' not available on this endpoint β€” "
764
+ f"switching to '{fallback}' for all future calls"
765
+ )
766
+ return fallback
767
+ return None
768
 
769
  @property
770
  def llm_status(self) -> str:
 
805
  self._track_daily_request()
806
  return data["choices"][0]["message"]["content"]
807
  except httpx.HTTPStatusError as e:
808
+ status = e.response.status_code
809
+ body_raw = e.response.text or ""
810
+ body = body_raw[:200].replace("{", "(").replace("}", ")")
811
+ if status == 429:
812
  retry_after = e.response.headers.get("retry-after", "5")
813
  try:
814
  wait = float(retry_after)
815
  except (ValueError, TypeError):
816
  wait = 5.0
 
817
  # Daily quota exhausted β€” Gemini sends retry-after:5 even for daily limits,
818
  # so detect via message body and circuit-break until midnight Pacific.
819
  if "quota" in body_raw.lower() or wait > 30:
820
  circuit_wait = self._secs_until_pacific_midnight()
821
  self._rate_limited_until = time.monotonic() + circuit_wait
 
822
  logger.warning(f"Gemini daily quota exhausted β€” circuit-breaking for {circuit_wait/3600:.1f}h (until midnight Pacific): {body}")
823
  return ""
 
824
  logger.warning(f"Gemini 429: {body} β€” waiting {wait}s")
825
  await asyncio.sleep(wait)
826
+ elif status in (400, 404) and any(
827
+ kw in body_raw.lower()
828
+ for kw in ("not found", "not supported", "invalid argument", "does not exist", "unavailable")
829
+ ):
830
+ # Model not available on this endpoint β€” try fallback
831
+ fallback = self._handle_model_not_found(model)
832
+ if fallback:
833
+ model = fallback
834
+ payload["model"] = model
835
+ continue # retry immediately with fallback model
836
+ logger.error(f"Gemini model '{model}' not found and no fallback: {body}")
837
+ return ""
838
  else:
839
+ logger.error(f"Gemini HTTP error: {status} {body}")
 
840
  if attempt == self.max_retries - 1:
841
  return ""
842
  await asyncio.sleep(1)
 
888
  text = data["choices"][0]["message"]["content"]
889
  return _parse_json_response(text)
890
  except httpx.HTTPStatusError as e:
891
+ status = e.response.status_code
892
+ body_raw = e.response.text or ""
893
+ body = body_raw[:200].replace("{", "(").replace("}", ")")
894
+ if status == 429:
895
  retry_after = e.response.headers.get("retry-after", "5")
896
  try:
897
  wait = float(retry_after)
898
  except (ValueError, TypeError):
899
  wait = 5.0
 
900
  if "quota" in body_raw.lower() or wait > 30:
901
+ circuit_wait = self._secs_until_pacific_midnight()
902
  self._rate_limited_until = time.monotonic() + circuit_wait
 
903
  logger.warning(f"Gemini daily quota exhausted β€” circuit-breaking for {circuit_wait/3600:.1f}h: {body}")
904
  return {}
 
905
  logger.warning(f"Gemini 429 (json): {body} β€” waiting {wait}s")
906
  await asyncio.sleep(wait)
907
+ elif status in (400, 404) and any(
908
+ kw in body_raw.lower()
909
+ for kw in ("not found", "not supported", "invalid argument", "does not exist", "unavailable")
910
+ ):
911
+ # Model not available on this endpoint β€” try fallback
912
+ fallback = self._handle_model_not_found(model)
913
+ if fallback:
914
+ model = fallback
915
+ payload["model"] = model
916
+ continue # retry immediately with fallback model
917
+ logger.error(f"Gemini model '{model}' not found and no fallback: {body}")
918
+ return {}
919
  else:
920
+ logger.error(f"Gemini JSON error: {status} {body}")
 
921
  if attempt == self.max_retries - 1:
922
  return {}
923
  await asyncio.sleep(1)
src/soci/persistence/database.py CHANGED
@@ -68,6 +68,11 @@ CREATE TABLE IF NOT EXISTS users (
68
  agent_id TEXT,
69
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
70
  );
 
 
 
 
 
71
  """
72
 
73
 
@@ -252,3 +257,21 @@ class Database:
252
  assert self._db is not None
253
  await self._db.execute("UPDATE users SET token = NULL WHERE token = ?", (token,))
254
  await self._db.commit()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  agent_id TEXT,
69
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
70
  );
71
+
72
+ CREATE TABLE IF NOT EXISTS settings (
73
+ key TEXT PRIMARY KEY,
74
+ value TEXT NOT NULL
75
+ );
76
  """
77
 
78
 
 
257
  assert self._db is not None
258
  await self._db.execute("UPDATE users SET token = NULL WHERE token = ?", (token,))
259
  await self._db.commit()
260
+
261
+ # ── Settings / persistent config ─────────────────────────────────────────
262
+
263
+ async def get_setting(self, key: str, default: Optional[str] = None) -> Optional[str]:
264
+ """Read a persisted setting by key."""
265
+ assert self._db is not None
266
+ cursor = await self._db.execute("SELECT value FROM settings WHERE key = ?", (key,))
267
+ row = await cursor.fetchone()
268
+ return row[0] if row else default
269
+
270
+ async def set_setting(self, key: str, value: str) -> None:
271
+ """Upsert a persisted setting."""
272
+ assert self._db is not None
273
+ await self._db.execute(
274
+ "INSERT INTO settings (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value = excluded.value",
275
+ (key, value),
276
+ )
277
+ await self._db.commit()