Spaces:

elmerzole
/

llm-api-proxy

Paused

Mirrowel commited on Oct 30, 2025

Commit

2ccd2a1

1 Parent(s): 1980665

feat(client): ✨ add header fallback for custom_reasoning_budget

Allow RotatingClient to source the custom reasoning budget flag from an HTTP header when the caller does not provide the flag in the request body.

- Prefer the explicit kwarg if present; fall back to request.headers only when missing
- Safely inspect request.headers and interpret a case-insensitive `true` as enabled
- Apply the header-based fallback in both the provider delegation path and the main execution path
- Preserve existing behavior when neither the kwarg nor the header is supplied

Files changed (1) hide show

src/rotator_library/client.py +21 -0

src/rotator_library/client.py CHANGED Viewed

@@ -448,6 +448,18 @@ class RotatingClient:
                 if provider_plugin and provider_plugin.has_custom_logic():
                     lib_logger.debug(f"Provider '{provider}' has custom logic. Delegating call.")
                     litellm_kwargs["credential_identifier"] = current_cred
                     # The plugin handles the entire call, including retries on 401, etc.
                     # The main retry loop here is for key rotation on other errors.
@@ -631,8 +643,17 @@ class RotatingClient:
                     litellm_kwargs = self.all_providers.get_provider_kwargs(**kwargs.copy())
                     if "reasoning_effort" in kwargs:
                         litellm_kwargs["reasoning_effort"] = kwargs["reasoning_effort"]
                     if "custom_reasoning_budget" in kwargs:
                         litellm_kwargs["custom_reasoning_budget"] = kwargs["custom_reasoning_budget"]
                     # [NEW] Merge provider-specific params
                     if provider in self.litellm_provider_params:

                 if provider_plugin and provider_plugin.has_custom_logic():
                     lib_logger.debug(f"Provider '{provider}' has custom logic. Delegating call.")
                     litellm_kwargs["credential_identifier"] = current_cred
+                    # Check body first for custom_reasoning_budget
+                    if "custom_reasoning_budget" in kwargs:
+                        litellm_kwargs["custom_reasoning_budget"] = kwargs["custom_reasoning_budget"]
+                    else:
+                        custom_budget_header = None
+                        if request and hasattr(request, 'headers'):
+                            custom_budget_header = request.headers.get("custom_reasoning_budget")
+                        if custom_budget_header is not None:
+                            is_budget_enabled = custom_budget_header.lower() == 'true'
+                            litellm_kwargs["custom_reasoning_budget"] = is_budget_enabled
                     # The plugin handles the entire call, including retries on 401, etc.
                     # The main retry loop here is for key rotation on other errors.
                     litellm_kwargs = self.all_providers.get_provider_kwargs(**kwargs.copy())
                     if "reasoning_effort" in kwargs:
                         litellm_kwargs["reasoning_effort"] = kwargs["reasoning_effort"]
+                    # Check body first for custom_reasoning_budget
                     if "custom_reasoning_budget" in kwargs:
                         litellm_kwargs["custom_reasoning_budget"] = kwargs["custom_reasoning_budget"]
+                    else:
+                        custom_budget_header = None
+                        if request and hasattr(request, 'headers'):
+                            custom_budget_header = request.headers.get("custom_reasoning_budget")
+                        if custom_budget_header is not None:
+                            is_budget_enabled = custom_budget_header.lower() == 'true'
+                            litellm_kwargs["custom_reasoning_budget"] = is_budget_enabled
                     # [NEW] Merge provider-specific params
                     if provider in self.litellm_provider_params: