Spaces:

elmerzole
/

llm-api-proxy

Paused

Mirrowel commited on Oct 30, 2025

Commit

8cf8bab

1 Parent(s): 2ccd2a1

feat(providers): ✨ inject NVIDIA 'thinking' flag for DeepSeek v3.1+ models

Add automatic injection of a chat-template "thinking" toggle for NVIDIA NIM requests when DeepSeek v3.1+ variants are used and a reasoning budget is provided.

- Implement NvidiaProvider.handle_thinking_parameter(payload, model):
- Recognizes DeepSeek v3.1+ model names (v3.1, v3.1-terminus, v3.2)
- Checks payload's reasoning_effort for "low", "medium", or "high"
- Ensures extra_body.chat_template_kwargs exists and sets thinking = True
- Logs when the flag is enabled
- Call the new handler from RotatingClient for provider == "nvidia_nim" at the same payload-prep points used for gemini

This change ensures outgoing NVIDIA NIM payloads include the required internal flag so DeepSeek models can enable their optimized chat template behavior based on the requested reasoning budget

Files changed (2) hide show

src/rotator_library/client.py +4 -0
src/rotator_library/providers/nvidia_provider.py +25 -1

src/rotator_library/client.py CHANGED Viewed

@@ -490,6 +490,8 @@ class RotatingClient:
                     if provider == "gemini" and provider_instance:
                         provider_instance.handle_thinking_parameter(litellm_kwargs, model)
                     if "gemma-3" in model and "messages" in litellm_kwargs:
                         litellm_kwargs["messages"] = [{"role": "user", "content": m["content"]} if m.get("role") == "system" else m for m in litellm_kwargs["messages"]]
@@ -757,6 +759,8 @@ class RotatingClient:
                     if provider == "gemini" and provider_instance:
                         provider_instance.handle_thinking_parameter(litellm_kwargs, model)
                     if "gemma-3" in model and "messages" in litellm_kwargs:
                         litellm_kwargs["messages"] = [{"role": "user", "content": m["content"]} if m.get("role") == "system" else m for m in litellm_kwargs["messages"]]

                     if provider == "gemini" and provider_instance:
                         provider_instance.handle_thinking_parameter(litellm_kwargs, model)
+                    if provider == "nvidia_nim" and provider_instance:
+                        provider_instance.handle_thinking_parameter(litellm_kwargs, model)
                     if "gemma-3" in model and "messages" in litellm_kwargs:
                         litellm_kwargs["messages"] = [{"role": "user", "content": m["content"]} if m.get("role") == "system" else m for m in litellm_kwargs["messages"]]
                     if provider == "gemini" and provider_instance:
                         provider_instance.handle_thinking_parameter(litellm_kwargs, model)
+                    if provider == "nvidia_nim" and provider_instance:
+                        provider_instance.handle_thinking_parameter(litellm_kwargs, model)
                     if "gemma-3" in model and "messages" in litellm_kwargs:
                         litellm_kwargs["messages"] = [{"role": "user", "content": m["content"]} if m.get("role") == "system" else m for m in litellm_kwargs["messages"]]

src/rotator_library/providers/nvidia_provider.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import httpx
 import logging
-from typing import List
 import litellm
 from .provider_interface import ProviderInterface
@@ -29,3 +29,27 @@ class NvidiaProvider(ProviderInterface):
         except httpx.RequestError as e:
             lib_logger.error(f"Failed to fetch NVIDIA models: {e}")
             return []

 import httpx
 import logging
+from typing import List, Dict, Any
 import litellm
 from .provider_interface import ProviderInterface
         except httpx.RequestError as e:
             lib_logger.error(f"Failed to fetch NVIDIA models: {e}")
             return []
+    def handle_thinking_parameter(self, payload: Dict[str, Any], model: str):
+        """
+        Adds the 'thinking' parameter for specific DeepSeek models on the NVIDIA provider,
+        only if reasoning_effort is set to low, medium, or high.
+        """
+        deepseek_models = [
+            "deepseek-ai/deepseek-v3.1",
+            "deepseek-ai/deepseek-v3.1-terminus",
+            "deepseek-ai/deepseek-v3.2"
+        ]
+        # The model name in the payload is prefixed with 'nvidia_nim/'
+        model_name = model.split('/', 1)[1] if '/' in model else model
+        reasoning_effort = payload.get("reasoning_effort")
+        if model_name in deepseek_models and reasoning_effort in ["low", "medium", "high"]:
+            if "extra_body" not in payload:
+                payload["extra_body"] = {}
+            if "chat_template_kwargs" not in payload["extra_body"]:
+                payload["extra_body"]["chat_template_kwargs"] = {}
+            payload["extra_body"]["chat_template_kwargs"]["thinking"] = True
+            lib_logger.info(f"Enabled 'thinking' parameter for model: {model_name} due to reasoning_effort: '{reasoning_effort}'")