Spaces:
Paused
Paused
Feat: Fallback to Google gemini-3-flash-preview on primary LLM proxy error
Browse files- tinytroupe/openai_utils.py +30 -20
tinytroupe/openai_utils.py
CHANGED
|
@@ -261,27 +261,37 @@ class OpenAIClient:
|
|
| 261 |
# To make the log cleaner, we remove the messages from the logged parameters
|
| 262 |
logged_params = {k: v for k, v in chat_api_params.items() if k != "messages"}
|
| 263 |
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
if "stream" in chat_api_params:
|
| 268 |
-
del chat_api_params["stream"]
|
| 269 |
-
|
| 270 |
-
logger.debug(f"Calling LLM model (using .parse too) with these parameters: {logged_params}. Not showing 'messages' parameter.")
|
| 271 |
-
# complete message
|
| 272 |
-
logger.debug(f" --> Complete messages sent to LLM: {chat_api_params['messages']}")
|
| 273 |
-
|
| 274 |
-
result_message = self.client.beta.chat.completions.parse(
|
| 275 |
-
**chat_api_params
|
| 276 |
-
)
|
| 277 |
-
|
| 278 |
-
return result_message
|
| 279 |
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
|
| 286 |
def _is_reasoning_model(self, model):
|
| 287 |
return "o1" in model or "o3" in model
|
|
|
|
| 261 |
# To make the log cleaner, we remove the messages from the logged parameters
|
| 262 |
logged_params = {k: v for k, v in chat_api_params.items() if k != "messages"}
|
| 263 |
|
| 264 |
+
# --- GOOGLE FALLBACK INJECTION ---
|
| 265 |
+
import os
|
| 266 |
+
from openai import OpenAI
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 267 |
|
| 268 |
+
try:
|
| 269 |
+
if "response_format" in chat_api_params:
|
| 270 |
+
if "stream" in chat_api_params: del chat_api_params["stream"]
|
| 271 |
+
result_message = self.client.beta.chat.completions.parse(**chat_api_params)
|
| 272 |
+
return result_message
|
| 273 |
+
else:
|
| 274 |
+
return self.client.chat.completions.create(**chat_api_params)
|
| 275 |
+
|
| 276 |
+
except Exception as e:
|
| 277 |
+
logger.warning(f"Primary model call failed ({e}). Falling back to Google gemini-3-flash-preview...")
|
| 278 |
+
|
| 279 |
+
google_client = OpenAI(
|
| 280 |
+
api_key=os.environ.get("GOOGLE_API_KEY", "missing_google_key"),
|
| 281 |
+
base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
|
| 282 |
+
)
|
| 283 |
+
chat_api_params["model"] = "gemini-3-flash-preview"
|
| 284 |
+
|
| 285 |
+
# Remove parameters Google OpenAI compat endpoint might not support
|
| 286 |
+
if "response_format" in chat_api_params:
|
| 287 |
+
del chat_api_params["response_format"]
|
| 288 |
+
if "reasoning_effort" in chat_api_params:
|
| 289 |
+
del chat_api_params["reasoning_effort"]
|
| 290 |
+
if "max_completion_tokens" in chat_api_params:
|
| 291 |
+
chat_api_params["max_tokens"] = chat_api_params.pop("max_completion_tokens")
|
| 292 |
+
|
| 293 |
+
return google_client.chat.completions.create(**chat_api_params)
|
| 294 |
+
# ----------------------------------
|
| 295 |
|
| 296 |
def _is_reasoning_model(self, model):
|
| 297 |
return "o1" in model or "o3" in model
|