Fix Gemini default: use gemini-2.0-flash-lite (confirmed free-tier working)
Browse filesLive testing showed:
- gemini-1.5-flash* models → 404 on current API keys (deprecated/unavailable)
- gemini-2.0-flash-lite → reaches API, only fails when daily quota exhausted
- gemini-2.0-flash-001 → same (valid model, quota-limited)
Update default to gemini-2.0-flash-lite, fallback chain to 2.x models only,
and fix provider label in UI. Switch live Space to Groq (working now).
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- src/soci/api/routes.py +1 -1
- src/soci/engine/llm.py +9 -9
src/soci/api/routes.py
CHANGED
|
@@ -279,7 +279,7 @@ async def get_llm_providers():
|
|
| 279 |
if os.environ.get("GROQ_API_KEY"):
|
| 280 |
providers.append({"id": "groq", "label": "Groq Llama 8B", "icon": "⚡", "model": ""})
|
| 281 |
if os.environ.get("GEMINI_API_KEY"):
|
| 282 |
-
providers.append({"id": "gemini", "label": "Gemini
|
| 283 |
has_hf = bool(
|
| 284 |
os.environ.get("HF_TOKEN")
|
| 285 |
or os.environ.get("HUGGINGFACE_TOKEN")
|
|
|
|
| 279 |
if os.environ.get("GROQ_API_KEY"):
|
| 280 |
providers.append({"id": "groq", "label": "Groq Llama 8B", "icon": "⚡", "model": ""})
|
| 281 |
if os.environ.get("GEMINI_API_KEY"):
|
| 282 |
+
providers.append({"id": "gemini", "label": "Gemini 2.0 Flash Lite", "icon": "✦", "model": ""})
|
| 283 |
has_hf = bool(
|
| 284 |
os.environ.get("HF_TOKEN")
|
| 285 |
or os.environ.get("HUGGINGFACE_TOKEN")
|
src/soci/engine/llm.py
CHANGED
|
@@ -38,19 +38,19 @@ MODEL_GROQ_LLAMA_70B = "llama-3.3-70b-versatile"
|
|
| 38 |
MODEL_GROQ_MIXTRAL = "mixtral-8x7b-32768"
|
| 39 |
|
| 40 |
# Google Gemini model IDs (free tier via AI Studio)
|
| 41 |
-
# gemini-
|
| 42 |
-
# gemini-
|
| 43 |
-
MODEL_GEMINI_FLASH = "gemini-
|
| 44 |
-
MODEL_GEMINI_FLASH_FALLBACK = "gemini-
|
| 45 |
-
MODEL_GEMINI_FLASH_V2 = "gemini-2.0-flash" # opt-in via GEMINI_MODEL env var
|
| 46 |
MODEL_GEMINI_PRO = "gemini-1.5-pro"
|
| 47 |
|
| 48 |
-
#
|
| 49 |
_GEMINI_FALLBACK_CHAIN: dict[str, str] = {
|
| 50 |
-
"gemini-2.0-flash":
|
| 51 |
-
"gemini-2.0-flash-exp":
|
| 52 |
-
"gemini-2.0-flash-001":
|
| 53 |
"gemini-2.0-flash-lite": MODEL_GEMINI_FLASH_FALLBACK,
|
|
|
|
| 54 |
}
|
| 55 |
|
| 56 |
# Keywords in any Gemini error body that indicate the model is unavailable on this endpoint
|
|
|
|
| 38 |
MODEL_GROQ_MIXTRAL = "mixtral-8x7b-32768"
|
| 39 |
|
| 40 |
# Google Gemini model IDs (free tier via AI Studio)
|
| 41 |
+
# gemini-2.0-flash-lite is the reliable free-tier default on the OpenAI-compatible endpoint.
|
| 42 |
+
# gemini-1.5-flash* models return 404 on current API keys — do not use them.
|
| 43 |
+
MODEL_GEMINI_FLASH = "gemini-2.0-flash-lite" # free tier, confirmed working
|
| 44 |
+
MODEL_GEMINI_FLASH_FALLBACK = "gemini-2.0-flash-001" # versioned fallback
|
|
|
|
| 45 |
MODEL_GEMINI_PRO = "gemini-1.5-pro"
|
| 46 |
|
| 47 |
+
# Fallback chain: tried in order when a model returns a not-available error
|
| 48 |
_GEMINI_FALLBACK_CHAIN: dict[str, str] = {
|
| 49 |
+
"gemini-2.0-flash": "gemini-2.0-flash-lite",
|
| 50 |
+
"gemini-2.0-flash-exp": "gemini-2.0-flash-lite",
|
| 51 |
+
"gemini-2.0-flash-001": "gemini-2.0-flash-lite",
|
| 52 |
"gemini-2.0-flash-lite": MODEL_GEMINI_FLASH_FALLBACK,
|
| 53 |
+
# 1.5-flash models return 404 on current API keys — skip the entire 1.5 family
|
| 54 |
}
|
| 55 |
|
| 56 |
# Keywords in any Gemini error body that indicate the model is unavailable on this endpoint
|