RayMelius Claude Sonnet 4.6 commited on
Commit
7db8fcd
·
1 Parent(s): 45734ba

Fix Gemini default: use gemini-2.0-flash-lite (confirmed free-tier working)

Browse files

Live testing showed:
- gemini-1.5-flash* models → 404 on current API keys (deprecated/unavailable)
- gemini-2.0-flash-lite → reaches API, only fails when daily quota exhausted
- gemini-2.0-flash-001 → same (valid model, quota-limited)

Update default to gemini-2.0-flash-lite, fallback chain to 2.x models only,
and fix provider label in UI. Switch live Space to Groq (working now).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (2) hide show
  1. src/soci/api/routes.py +1 -1
  2. src/soci/engine/llm.py +9 -9
src/soci/api/routes.py CHANGED
@@ -279,7 +279,7 @@ async def get_llm_providers():
279
  if os.environ.get("GROQ_API_KEY"):
280
  providers.append({"id": "groq", "label": "Groq Llama 8B", "icon": "⚡", "model": ""})
281
  if os.environ.get("GEMINI_API_KEY"):
282
- providers.append({"id": "gemini", "label": "Gemini 1.5 Flash", "icon": "✦", "model": ""})
283
  has_hf = bool(
284
  os.environ.get("HF_TOKEN")
285
  or os.environ.get("HUGGINGFACE_TOKEN")
 
279
  if os.environ.get("GROQ_API_KEY"):
280
  providers.append({"id": "groq", "label": "Groq Llama 8B", "icon": "⚡", "model": ""})
281
  if os.environ.get("GEMINI_API_KEY"):
282
+ providers.append({"id": "gemini", "label": "Gemini 2.0 Flash Lite", "icon": "✦", "model": ""})
283
  has_hf = bool(
284
  os.environ.get("HF_TOKEN")
285
  or os.environ.get("HUGGINGFACE_TOKEN")
src/soci/engine/llm.py CHANGED
@@ -38,19 +38,19 @@ MODEL_GROQ_LLAMA_70B = "llama-3.3-70b-versatile"
38
  MODEL_GROQ_MIXTRAL = "mixtral-8x7b-32768"
39
 
40
  # Google Gemini model IDs (free tier via AI Studio)
41
- # gemini-1.5-flash is the reliable default on the OpenAI-compatible endpoint.
42
- # gemini-2.0-flash can be enabled via GEMINI_MODEL env var if your key supports it.
43
- MODEL_GEMINI_FLASH = "gemini-1.5-flash"
44
- MODEL_GEMINI_FLASH_FALLBACK = "gemini-1.5-flash" # final fallback
45
- MODEL_GEMINI_FLASH_V2 = "gemini-2.0-flash" # opt-in via GEMINI_MODEL env var
46
  MODEL_GEMINI_PRO = "gemini-1.5-pro"
47
 
48
- # Models to try in order if a model is not available on the serverless endpoint
49
  _GEMINI_FALLBACK_CHAIN: dict[str, str] = {
50
- "gemini-2.0-flash": MODEL_GEMINI_FLASH_FALLBACK,
51
- "gemini-2.0-flash-exp": MODEL_GEMINI_FLASH_FALLBACK,
52
- "gemini-2.0-flash-001": MODEL_GEMINI_FLASH_FALLBACK,
53
  "gemini-2.0-flash-lite": MODEL_GEMINI_FLASH_FALLBACK,
 
54
  }
55
 
56
  # Keywords in any Gemini error body that indicate the model is unavailable on this endpoint
 
38
  MODEL_GROQ_MIXTRAL = "mixtral-8x7b-32768"
39
 
40
  # Google Gemini model IDs (free tier via AI Studio)
41
+ # gemini-2.0-flash-lite is the reliable free-tier default on the OpenAI-compatible endpoint.
42
+ # gemini-1.5-flash* models return 404 on current API keys do not use them.
43
+ MODEL_GEMINI_FLASH = "gemini-2.0-flash-lite" # free tier, confirmed working
44
+ MODEL_GEMINI_FLASH_FALLBACK = "gemini-2.0-flash-001" # versioned fallback
 
45
  MODEL_GEMINI_PRO = "gemini-1.5-pro"
46
 
47
+ # Fallback chain: tried in order when a model returns a not-available error
48
  _GEMINI_FALLBACK_CHAIN: dict[str, str] = {
49
+ "gemini-2.0-flash": "gemini-2.0-flash-lite",
50
+ "gemini-2.0-flash-exp": "gemini-2.0-flash-lite",
51
+ "gemini-2.0-flash-001": "gemini-2.0-flash-lite",
52
  "gemini-2.0-flash-lite": MODEL_GEMINI_FLASH_FALLBACK,
53
+ # 1.5-flash models return 404 on current API keys — skip the entire 1.5 family
54
  }
55
 
56
  # Keywords in any Gemini error body that indicate the model is unavailable on this endpoint