RayMelius Claude Opus 4.6 commited on
Commit
b036f25
·
1 Parent(s): 766b49e

Default LLM probability to 10% for cloud providers, ignore stale DB value

Browse files

The DB-saved slider value (often 1.0 from local Ollama sessions) was
overriding the provider default on HF deploys, causing Gemini to burn
through its daily quota in minutes. Now always starts with the provider
default (10% for cloud, 100% for Ollama) — users can still adjust via
the UI slider during the session.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. src/soci/api/server.py +10 -12
src/soci/api/server.py CHANGED
@@ -358,15 +358,16 @@ async def lifespan(app: FastAPI):
358
  if hasattr(llm, "_rate_limited_until"):
359
  llm._rate_limited_until = 0.0
360
 
361
- # Default LLM call probability 0.10 for all providers to conserve daily quotas.
362
- # At 0.10: ~15 calls/h with Gemini (5 RPM) stays well within 1500 RPD limit.
363
- # Raise via slider in the UI or SOCI_LLM_PROB env var (0.0–1.0).
 
364
  _provider_default_prob = {
365
  PROVIDER_GEMINI: 0.10,
366
  PROVIDER_GROQ: 0.10,
367
  PROVIDER_HF: 0.10,
368
  PROVIDER_CLAUDE: 0.10,
369
- PROVIDER_OLLAMA: 0.10,
370
  }
371
  env_prob = os.environ.get("SOCI_LLM_PROB")
372
 
@@ -375,16 +376,13 @@ async def lifespan(app: FastAPI):
375
  _database = db
376
 
377
  if env_prob is not None:
378
- # Env var always wins; also save it so other workstations inherit it
379
  _llm_call_probability = float(env_prob)
380
- await db.set_setting("llm_call_probability", str(_llm_call_probability))
381
  else:
382
- # Prefer the last slider value saved in the DB, fall back to provider default
383
- saved = await db.get_setting("llm_call_probability")
384
- if saved is not None:
385
- _llm_call_probability = float(saved)
386
- else:
387
- _llm_call_probability = _provider_default_prob.get(_llm_provider, 0.10)
388
  logger.info(f"LLM call probability: {_llm_call_probability:.0%}")
389
 
390
  # Pull saved state from GitHub before trying to load locally
 
358
  if hasattr(llm, "_rate_limited_until"):
359
  llm._rate_limited_until = 0.0
360
 
361
+ # Default LLM call probability per provider.
362
+ # Cloud providers default to 0.10 (10%) to conserve daily quotas.
363
+ # Ollama is local so it defaults to 1.0 (100%).
364
+ # Override via SOCI_LLM_PROB env var or the UI slider.
365
  _provider_default_prob = {
366
  PROVIDER_GEMINI: 0.10,
367
  PROVIDER_GROQ: 0.10,
368
  PROVIDER_HF: 0.10,
369
  PROVIDER_CLAUDE: 0.10,
370
+ PROVIDER_OLLAMA: 1.0,
371
  }
372
  env_prob = os.environ.get("SOCI_LLM_PROB")
373
 
 
376
  _database = db
377
 
378
  if env_prob is not None:
379
+ # Env var always wins
380
  _llm_call_probability = float(env_prob)
 
381
  else:
382
+ # Always start with provider default the DB-saved slider value from a
383
+ # previous session may have been tuned for a different provider or context.
384
+ # Users can adjust via the UI slider during the session.
385
+ _llm_call_probability = _provider_default_prob.get(_llm_provider, 0.10)
 
 
386
  logger.info(f"LLM call probability: {_llm_call_probability:.0%}")
387
 
388
  # Pull saved state from GitHub before trying to load locally