Fix Gemini model: default to gemini-1.5-flash, robust fallback detection
Browse files- Change default from gemini-2.0-flash to gemini-1.5-flash (reliably
available on the OpenAI-compatible endpoint; 2.0 opt-in via GEMINI_MODEL)
- Model-unavailable detection now triggers on ANY non-429 status code
where the body contains an unavailability keyword (not just 400/404),
covering 403/422/etc. that Gemini may return for unsupported models
- Extract keywords into _GEMINI_MODEL_UNAVAILABLE_KWS constant
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- src/soci/engine/llm.py +16 -12
src/soci/engine/llm.py
CHANGED
|
@@ -38,8 +38,11 @@ MODEL_GROQ_LLAMA_70B = "llama-3.3-70b-versatile"
|
|
| 38 |
MODEL_GROQ_MIXTRAL = "mixtral-8x7b-32768"
|
| 39 |
|
| 40 |
# Google Gemini model IDs (free tier via AI Studio)
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
| 43 |
MODEL_GEMINI_PRO = "gemini-1.5-pro"
|
| 44 |
|
| 45 |
# Models to try in order if a model is not available on the serverless endpoint
|
|
@@ -47,8 +50,15 @@ _GEMINI_FALLBACK_CHAIN: dict[str, str] = {
|
|
| 47 |
"gemini-2.0-flash": MODEL_GEMINI_FLASH_FALLBACK,
|
| 48 |
"gemini-2.0-flash-exp": MODEL_GEMINI_FLASH_FALLBACK,
|
| 49 |
"gemini-2.0-flash-001": MODEL_GEMINI_FLASH_FALLBACK,
|
|
|
|
| 50 |
}
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
# Hugging Face router model IDs (router.huggingface.co/v1 β auto-routes to best provider)
|
| 53 |
MODEL_HF_QWEN = "Qwen/Qwen2.5-7B-Instruct" # default β auto-routed, great quality
|
| 54 |
MODEL_HF_LLAMA = "meta-llama/Llama-3.2-3B-Instruct"
|
|
@@ -823,11 +833,8 @@ class GeminiClient:
|
|
| 823 |
return ""
|
| 824 |
logger.warning(f"Gemini 429: {body} β waiting {wait}s")
|
| 825 |
await asyncio.sleep(wait)
|
| 826 |
-
elif
|
| 827 |
-
|
| 828 |
-
for kw in ("not found", "not supported", "invalid argument", "does not exist", "unavailable", "serverless")
|
| 829 |
-
):
|
| 830 |
-
# Model not available on this endpoint β try fallback
|
| 831 |
fallback = self._handle_model_not_found(model)
|
| 832 |
if fallback:
|
| 833 |
model = fallback
|
|
@@ -904,11 +911,8 @@ class GeminiClient:
|
|
| 904 |
return {}
|
| 905 |
logger.warning(f"Gemini 429 (json): {body} β waiting {wait}s")
|
| 906 |
await asyncio.sleep(wait)
|
| 907 |
-
elif
|
| 908 |
-
|
| 909 |
-
for kw in ("not found", "not supported", "invalid argument", "does not exist", "unavailable", "serverless")
|
| 910 |
-
):
|
| 911 |
-
# Model not available on this endpoint β try fallback
|
| 912 |
fallback = self._handle_model_not_found(model)
|
| 913 |
if fallback:
|
| 914 |
model = fallback
|
|
|
|
| 38 |
MODEL_GROQ_MIXTRAL = "mixtral-8x7b-32768"
|
| 39 |
|
| 40 |
# Google Gemini model IDs (free tier via AI Studio)
|
| 41 |
+
# gemini-1.5-flash is the reliable default on the OpenAI-compatible endpoint.
|
| 42 |
+
# gemini-2.0-flash can be enabled via GEMINI_MODEL env var if your key supports it.
|
| 43 |
+
MODEL_GEMINI_FLASH = "gemini-1.5-flash"
|
| 44 |
+
MODEL_GEMINI_FLASH_FALLBACK = "gemini-1.5-flash" # final fallback
|
| 45 |
+
MODEL_GEMINI_FLASH_V2 = "gemini-2.0-flash" # opt-in via GEMINI_MODEL env var
|
| 46 |
MODEL_GEMINI_PRO = "gemini-1.5-pro"
|
| 47 |
|
| 48 |
# Models to try in order if a model is not available on the serverless endpoint
|
|
|
|
| 50 |
"gemini-2.0-flash": MODEL_GEMINI_FLASH_FALLBACK,
|
| 51 |
"gemini-2.0-flash-exp": MODEL_GEMINI_FLASH_FALLBACK,
|
| 52 |
"gemini-2.0-flash-001": MODEL_GEMINI_FLASH_FALLBACK,
|
| 53 |
+
"gemini-2.0-flash-lite": MODEL_GEMINI_FLASH_FALLBACK,
|
| 54 |
}
|
| 55 |
|
| 56 |
+
# Keywords in any Gemini error body that indicate the model is unavailable on this endpoint
|
| 57 |
+
_GEMINI_MODEL_UNAVAILABLE_KWS = (
|
| 58 |
+
"not found", "not supported", "invalid argument",
|
| 59 |
+
"does not exist", "unavailable", "serverless",
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
# Hugging Face router model IDs (router.huggingface.co/v1 β auto-routes to best provider)
|
| 63 |
MODEL_HF_QWEN = "Qwen/Qwen2.5-7B-Instruct" # default β auto-routed, great quality
|
| 64 |
MODEL_HF_LLAMA = "meta-llama/Llama-3.2-3B-Instruct"
|
|
|
|
| 833 |
return ""
|
| 834 |
logger.warning(f"Gemini 429: {body} β waiting {wait}s")
|
| 835 |
await asyncio.sleep(wait)
|
| 836 |
+
elif any(kw in body_raw.lower() for kw in _GEMINI_MODEL_UNAVAILABLE_KWS):
|
| 837 |
+
# Model not available on this endpoint (any status code) β try fallback
|
|
|
|
|
|
|
|
|
|
| 838 |
fallback = self._handle_model_not_found(model)
|
| 839 |
if fallback:
|
| 840 |
model = fallback
|
|
|
|
| 911 |
return {}
|
| 912 |
logger.warning(f"Gemini 429 (json): {body} β waiting {wait}s")
|
| 913 |
await asyncio.sleep(wait)
|
| 914 |
+
elif any(kw in body_raw.lower() for kw in _GEMINI_MODEL_UNAVAILABLE_KWS):
|
| 915 |
+
# Model not available on this endpoint (any status code) β try fallback
|
|
|
|
|
|
|
|
|
|
| 916 |
fallback = self._handle_model_not_found(model)
|
| 917 |
if fallback:
|
| 918 |
model = fallback
|