Default LLM probability to 10% for cloud providers, ignore stale DB value
Browse filesThe DB-saved slider value (often 1.0 from local Ollama sessions) was
overriding the provider default on HF deploys, causing Gemini to burn
through its daily quota in minutes. Now always starts with the provider
default (10% for cloud, 100% for Ollama) — users can still adjust via
the UI slider during the session.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- src/soci/api/server.py +10 -12
src/soci/api/server.py
CHANGED
|
@@ -358,15 +358,16 @@ async def lifespan(app: FastAPI):
|
|
| 358 |
if hasattr(llm, "_rate_limited_until"):
|
| 359 |
llm._rate_limited_until = 0.0
|
| 360 |
|
| 361 |
-
# Default LLM call probability
|
| 362 |
-
#
|
| 363 |
-
#
|
|
|
|
| 364 |
_provider_default_prob = {
|
| 365 |
PROVIDER_GEMINI: 0.10,
|
| 366 |
PROVIDER_GROQ: 0.10,
|
| 367 |
PROVIDER_HF: 0.10,
|
| 368 |
PROVIDER_CLAUDE: 0.10,
|
| 369 |
-
PROVIDER_OLLAMA:
|
| 370 |
}
|
| 371 |
env_prob = os.environ.get("SOCI_LLM_PROB")
|
| 372 |
|
|
@@ -375,16 +376,13 @@ async def lifespan(app: FastAPI):
|
|
| 375 |
_database = db
|
| 376 |
|
| 377 |
if env_prob is not None:
|
| 378 |
-
# Env var always wins
|
| 379 |
_llm_call_probability = float(env_prob)
|
| 380 |
-
await db.set_setting("llm_call_probability", str(_llm_call_probability))
|
| 381 |
else:
|
| 382 |
-
#
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
else:
|
| 387 |
-
_llm_call_probability = _provider_default_prob.get(_llm_provider, 0.10)
|
| 388 |
logger.info(f"LLM call probability: {_llm_call_probability:.0%}")
|
| 389 |
|
| 390 |
# Pull saved state from GitHub before trying to load locally
|
|
|
|
| 358 |
if hasattr(llm, "_rate_limited_until"):
|
| 359 |
llm._rate_limited_until = 0.0
|
| 360 |
|
| 361 |
+
# Default LLM call probability per provider.
|
| 362 |
+
# Cloud providers default to 0.10 (10%) to conserve daily quotas.
|
| 363 |
+
# Ollama is local so it defaults to 1.0 (100%).
|
| 364 |
+
# Override via SOCI_LLM_PROB env var or the UI slider.
|
| 365 |
_provider_default_prob = {
|
| 366 |
PROVIDER_GEMINI: 0.10,
|
| 367 |
PROVIDER_GROQ: 0.10,
|
| 368 |
PROVIDER_HF: 0.10,
|
| 369 |
PROVIDER_CLAUDE: 0.10,
|
| 370 |
+
PROVIDER_OLLAMA: 1.0,
|
| 371 |
}
|
| 372 |
env_prob = os.environ.get("SOCI_LLM_PROB")
|
| 373 |
|
|
|
|
| 376 |
_database = db
|
| 377 |
|
| 378 |
if env_prob is not None:
|
| 379 |
+
# Env var always wins
|
| 380 |
_llm_call_probability = float(env_prob)
|
|
|
|
| 381 |
else:
|
| 382 |
+
# Always start with provider default — the DB-saved slider value from a
|
| 383 |
+
# previous session may have been tuned for a different provider or context.
|
| 384 |
+
# Users can adjust via the UI slider during the session.
|
| 385 |
+
_llm_call_probability = _provider_default_prob.get(_llm_provider, 0.10)
|
|
|
|
|
|
|
| 386 |
logger.info(f"LLM call probability: {_llm_call_probability:.0%}")
|
| 387 |
|
| 388 |
# Pull saved state from GitHub before trying to load locally
|