vertex-pay

Paused

App Files Files Community

bibibi12345 commited on May 16, 2025

Commit

2a81a94

1 Parent(s): 9fde8ed

smart pay prefix

Browse files

Files changed (2) hide show

app/routes/chat_api.py +7 -5
app/routes/models_api.py +26 -14

app/routes/chat_api.py CHANGED Viewed

@@ -62,11 +62,8 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
         # Determine base_model_name by stripping known suffixes
         # This order matters if a model could have multiple (e.g. -encrypt-auto, though not currently a pattern)
         if is_openai_direct_model:
-            temp_base_name = request.model[:-len(OPENAI_DIRECT_SUFFIX)]
-            if temp_base_name.startswith(PAY_PREFIX):
-                base_model_name = temp_base_name[len(PAY_PREFIX):]
-            else:
-                base_model_name = temp_base_name
         elif is_auto_model: base_model_name = request.model[:-len("-auto")]
         elif is_grounded_search: base_model_name = request.model[:-len("-search")]
         elif is_encrypted_full_model: base_model_name = request.model[:-len("-encrypt-full")] # Must be before -encrypt
@@ -74,6 +71,11 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
         elif is_nothinking_model: base_model_name = request.model[:-len("-nothinking")]
         elif is_max_thinking_model: base_model_name = request.model[:-len("-max")]
         # Specific model variant checks (if any remain exclusive and not covered dynamically)
         if is_nothinking_model and base_model_name != "gemini-2.5-flash-preview-04-17":
             return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-nothinking) is only supported for 'gemini-2.5-flash-preview-04-17'.", "invalid_request_error"))

         # Determine base_model_name by stripping known suffixes
         # This order matters if a model could have multiple (e.g. -encrypt-auto, though not currently a pattern)
         if is_openai_direct_model:
+            # The general PAY_PREFIX stripper later will handle if this result starts with [PAY]
+            base_model_name = request.model[:-len(OPENAI_DIRECT_SUFFIX)]
         elif is_auto_model: base_model_name = request.model[:-len("-auto")]
         elif is_grounded_search: base_model_name = request.model[:-len("-search")]
         elif is_encrypted_full_model: base_model_name = request.model[:-len("-encrypt-full")] # Must be before -encrypt
         elif is_nothinking_model: base_model_name = request.model[:-len("-nothinking")]
         elif is_max_thinking_model: base_model_name = request.model[:-len("-max")]
+        # After all suffix stripping, if PAY_PREFIX is still at the start of base_model_name, remove it.
+        # This handles cases like "[PAY]model-id-search" correctly.
+        if base_model_name.startswith(PAY_PREFIX):
+            base_model_name = base_model_name[len(PAY_PREFIX):]
         # Specific model variant checks (if any remain exclusive and not covered dynamically)
         if is_nothinking_model and base_model_name != "gemini-2.5-flash-preview-04-17":
             return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-nothinking) is only supported for 'gemini-2.5-flash-preview-04-17'.", "invalid_request_error"))

app/routes/models_api.py CHANGED Viewed

@@ -54,33 +54,45 @@ async def list_models(fastapi_request: Request, api_key: str = Depends(get_api_k
     current_time = int(time.time())
     # Add base models and their variations
-    for model_id in sorted(list(all_model_ids)):
         dynamic_models_data.append({
-            "id": model_id, "object": "model", "created": current_time, "owned_by": "google",
-            "permission": [], "root": model_id, "parent": None
         })
         # Conditionally add common variations (standard suffixes)
-        if not model_id.startswith("gemini-2.0"):
             standard_suffixes = ["-search", "-encrypt", "-encrypt-full", "-auto"]
             for suffix in standard_suffixes:
-                suffixed_id = f"{model_id}{suffix}"
-                # Check if this suffixed ID is already in all_model_ids (fetched from remote) or already added to dynamic_models_data
-                if suffixed_id not in all_model_ids and not any(m['id'] == suffixed_id for m in dynamic_models_data):
                     dynamic_models_data.append({
-                        "id": suffixed_id, "object": "model", "created": current_time, "owned_by": "google",
-                        "permission": [], "root": model_id, "parent": None
                     })
         # Apply special suffixes for models starting with "gemini-2.5-flash"
-        if model_id.startswith("gemini-2.5-flash"):
             special_flash_suffixes = ["-nothinking", "-max"]
             for special_suffix in special_flash_suffixes:
-                suffixed_id = f"{model_id}{special_suffix}"
-                if suffixed_id not in all_model_ids and not any(m['id'] == suffixed_id for m in dynamic_models_data):
                     dynamic_models_data.append({
-                        "id": suffixed_id, "object": "model", "created": current_time, "owned_by": "google",
-                        "permission": [], "root": model_id, "parent": None
                     })
         # Ensure uniqueness again after adding suffixes

     current_time = int(time.time())
     # Add base models and their variations
+    for original_model_id in sorted(list(all_model_ids)):
+        current_display_prefix = ""
+        if has_sa_creds and not has_express_key and EXPERIMENTAL_MARKER not in original_model_id:
+            current_display_prefix = PAY_PREFIX
+        base_display_id = f"{current_display_prefix}{original_model_id}"
         dynamic_models_data.append({
+            "id": base_display_id, "object": "model", "created": current_time, "owned_by": "google",
+            "permission": [], "root": original_model_id, "parent": None
         })
         # Conditionally add common variations (standard suffixes)
+        if not original_model_id.startswith("gemini-2.0"): # Suffix rules based on original_model_id
             standard_suffixes = ["-search", "-encrypt", "-encrypt-full", "-auto"]
             for suffix in standard_suffixes:
+                # Suffix is applied to the original model ID part
+                suffixed_model_part = f"{original_model_id}{suffix}"
+                # Then the whole thing is prefixed
+                final_suffixed_display_id = f"{current_display_prefix}{suffixed_model_part}"
+                # Check if this suffixed ID is already in all_model_ids (unlikely with prefix) or already added
+                if final_suffixed_display_id not in all_model_ids and not any(m['id'] == final_suffixed_display_id for m in dynamic_models_data):
                     dynamic_models_data.append({
+                        "id": final_suffixed_display_id, "object": "model", "created": current_time, "owned_by": "google",
+                        "permission": [], "root": original_model_id, "parent": None
                     })
         # Apply special suffixes for models starting with "gemini-2.5-flash"
+        if original_model_id.startswith("gemini-2.5-flash"): # Suffix rules based on original_model_id
             special_flash_suffixes = ["-nothinking", "-max"]
             for special_suffix in special_flash_suffixes:
+                suffixed_model_part = f"{original_model_id}{special_suffix}"
+                final_special_suffixed_display_id = f"{current_display_prefix}{suffixed_model_part}"
+                if final_special_suffixed_display_id not in all_model_ids and not any(m['id'] == final_special_suffixed_display_id for m in dynamic_models_data):
                     dynamic_models_data.append({
+                        "id": final_special_suffixed_display_id, "object": "model", "created": current_time, "owned_by": "google",
+                        "permission": [], "root": original_model_id, "parent": None
                     })
         # Ensure uniqueness again after adding suffixes