RayMelius Claude Sonnet 4.6 commited on
Commit
ecac5f6
Β·
1 Parent(s): 3b571c9

Fix Gemini model: default to gemini-1.5-flash, robust fallback detection

Browse files

- Change default from gemini-2.0-flash to gemini-1.5-flash (reliably
available on the OpenAI-compatible endpoint; 2.0 opt-in via GEMINI_MODEL)
- Model-unavailable detection now triggers on ANY non-429 status code
where the body contains an unavailability keyword (not just 400/404),
covering 403/422/etc. that Gemini may return for unsupported models
- Extract keywords into _GEMINI_MODEL_UNAVAILABLE_KWS constant

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. src/soci/engine/llm.py +16 -12
src/soci/engine/llm.py CHANGED
@@ -38,8 +38,11 @@ MODEL_GROQ_LLAMA_70B = "llama-3.3-70b-versatile"
38
  MODEL_GROQ_MIXTRAL = "mixtral-8x7b-32768"
39
 
40
  # Google Gemini model IDs (free tier via AI Studio)
41
- MODEL_GEMINI_FLASH = "gemini-2.0-flash"
42
- MODEL_GEMINI_FLASH_FALLBACK = "gemini-1.5-flash" # fallback if 2.0-flash unavailable
 
 
 
43
  MODEL_GEMINI_PRO = "gemini-1.5-pro"
44
 
45
  # Models to try in order if a model is not available on the serverless endpoint
@@ -47,8 +50,15 @@ _GEMINI_FALLBACK_CHAIN: dict[str, str] = {
47
  "gemini-2.0-flash": MODEL_GEMINI_FLASH_FALLBACK,
48
  "gemini-2.0-flash-exp": MODEL_GEMINI_FLASH_FALLBACK,
49
  "gemini-2.0-flash-001": MODEL_GEMINI_FLASH_FALLBACK,
 
50
  }
51
 
 
 
 
 
 
 
52
  # Hugging Face router model IDs (router.huggingface.co/v1 β€” auto-routes to best provider)
53
  MODEL_HF_QWEN = "Qwen/Qwen2.5-7B-Instruct" # default β€” auto-routed, great quality
54
  MODEL_HF_LLAMA = "meta-llama/Llama-3.2-3B-Instruct"
@@ -823,11 +833,8 @@ class GeminiClient:
823
  return ""
824
  logger.warning(f"Gemini 429: {body} β€” waiting {wait}s")
825
  await asyncio.sleep(wait)
826
- elif status in (400, 404) and any(
827
- kw in body_raw.lower()
828
- for kw in ("not found", "not supported", "invalid argument", "does not exist", "unavailable", "serverless")
829
- ):
830
- # Model not available on this endpoint β€” try fallback
831
  fallback = self._handle_model_not_found(model)
832
  if fallback:
833
  model = fallback
@@ -904,11 +911,8 @@ class GeminiClient:
904
  return {}
905
  logger.warning(f"Gemini 429 (json): {body} β€” waiting {wait}s")
906
  await asyncio.sleep(wait)
907
- elif status in (400, 404) and any(
908
- kw in body_raw.lower()
909
- for kw in ("not found", "not supported", "invalid argument", "does not exist", "unavailable", "serverless")
910
- ):
911
- # Model not available on this endpoint β€” try fallback
912
  fallback = self._handle_model_not_found(model)
913
  if fallback:
914
  model = fallback
 
38
  MODEL_GROQ_MIXTRAL = "mixtral-8x7b-32768"
39
 
40
  # Google Gemini model IDs (free tier via AI Studio)
41
+ # gemini-1.5-flash is the reliable default on the OpenAI-compatible endpoint.
42
+ # gemini-2.0-flash can be enabled via GEMINI_MODEL env var if your key supports it.
43
+ MODEL_GEMINI_FLASH = "gemini-1.5-flash"
44
+ MODEL_GEMINI_FLASH_FALLBACK = "gemini-1.5-flash" # final fallback
45
+ MODEL_GEMINI_FLASH_V2 = "gemini-2.0-flash" # opt-in via GEMINI_MODEL env var
46
  MODEL_GEMINI_PRO = "gemini-1.5-pro"
47
 
48
  # Models to try in order if a model is not available on the serverless endpoint
 
50
  "gemini-2.0-flash": MODEL_GEMINI_FLASH_FALLBACK,
51
  "gemini-2.0-flash-exp": MODEL_GEMINI_FLASH_FALLBACK,
52
  "gemini-2.0-flash-001": MODEL_GEMINI_FLASH_FALLBACK,
53
+ "gemini-2.0-flash-lite": MODEL_GEMINI_FLASH_FALLBACK,
54
  }
55
 
56
+ # Keywords in any Gemini error body that indicate the model is unavailable on this endpoint
57
+ _GEMINI_MODEL_UNAVAILABLE_KWS = (
58
+ "not found", "not supported", "invalid argument",
59
+ "does not exist", "unavailable", "serverless",
60
+ )
61
+
62
  # Hugging Face router model IDs (router.huggingface.co/v1 β€” auto-routes to best provider)
63
  MODEL_HF_QWEN = "Qwen/Qwen2.5-7B-Instruct" # default β€” auto-routed, great quality
64
  MODEL_HF_LLAMA = "meta-llama/Llama-3.2-3B-Instruct"
 
833
  return ""
834
  logger.warning(f"Gemini 429: {body} β€” waiting {wait}s")
835
  await asyncio.sleep(wait)
836
+ elif any(kw in body_raw.lower() for kw in _GEMINI_MODEL_UNAVAILABLE_KWS):
837
+ # Model not available on this endpoint (any status code) β€” try fallback
 
 
 
838
  fallback = self._handle_model_not_found(model)
839
  if fallback:
840
  model = fallback
 
911
  return {}
912
  logger.warning(f"Gemini 429 (json): {body} β€” waiting {wait}s")
913
  await asyncio.sleep(wait)
914
+ elif any(kw in body_raw.lower() for kw in _GEMINI_MODEL_UNAVAILABLE_KWS):
915
+ # Model not available on this endpoint (any status code) β€” try fallback
 
 
 
916
  fallback = self._handle_model_not_found(model)
917
  if fallback:
918
  model = fallback