Spaces:
Running
Running
File size: 9,892 Bytes
0157ac7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 | """Request builder for NVIDIA NIM provider."""
from collections.abc import Callable
from copy import deepcopy
from typing import Any
from loguru import logger
from config.nim import NimSettings
from core.anthropic import (
ReasoningReplayMode,
build_base_request_body,
set_if_not_none,
)
from core.anthropic.conversion import OpenAIConversionError
from providers.exceptions import InvalidRequestError
# Models known to support NIM chat_template_kwargs for thinking/reasoning.
# Models NOT in this set will get thinking disabled to avoid BadRequestError.
_THINKING_CAPABLE_MODEL_PREFIXES: tuple[str, ...] = (
"qwen/qwen3-",
"mistralai/mistral-nemotron",
)
_SCHEMA_VALUE_KEYS = frozenset(
{
"additionalProperties",
"additionalItems",
"unevaluatedProperties",
"unevaluatedItems",
"items",
"contains",
"propertyNames",
"if",
"then",
"else",
"not",
}
)
_SCHEMA_LIST_KEYS = frozenset({"allOf", "anyOf", "oneOf", "prefixItems"})
_SCHEMA_MAP_KEYS = frozenset(
{"properties", "patternProperties", "$defs", "definitions", "dependentSchemas"}
)
def _clone_strip_extra_body(
body: dict[str, Any],
strip: Callable[[dict[str, Any]], bool],
) -> dict[str, Any] | None:
"""Deep-clone ``body`` and remove fields via ``strip`` on ``extra_body`` only.
Returns ``None`` when there is no ``extra_body`` dict or ``strip`` reports no change.
"""
cloned_body = deepcopy(body)
extra_body = cloned_body.get("extra_body")
if not isinstance(extra_body, dict):
return None
if not strip(extra_body):
return None
if not extra_body:
cloned_body.pop("extra_body", None)
return cloned_body
def _strip_reasoning_budget_fields(extra_body: dict[str, Any]) -> bool:
removed = extra_body.pop("reasoning_budget", None) is not None
chat_template_kwargs = extra_body.get("chat_template_kwargs")
if (
isinstance(chat_template_kwargs, dict)
and chat_template_kwargs.pop("reasoning_budget", None) is not None
):
removed = True
return removed
def _strip_chat_template_field(extra_body: dict[str, Any]) -> bool:
return extra_body.pop("chat_template", None) is not None
def _strip_message_reasoning_content(body: dict[str, Any]) -> bool:
removed = False
messages = body.get("messages")
if not isinstance(messages, list):
return False
for message in messages:
if (
isinstance(message, dict)
and message.pop("reasoning_content", None) is not None
):
removed = True
return removed
def _sanitize_nim_schema_node(value: Any) -> tuple[bool, Any]:
"""Remove boolean JSON Schema subschemas that hosted NIM rejects."""
if isinstance(value, bool):
return False, None
if isinstance(value, dict):
sanitized: dict[str, Any] = {}
for key, item in value.items():
if key in _SCHEMA_VALUE_KEYS:
keep, sanitized_item = _sanitize_nim_schema_node(item)
if keep:
sanitized[key] = sanitized_item
elif key in _SCHEMA_LIST_KEYS and isinstance(item, list):
sanitized_items: list[Any] = []
for schema_item in item:
keep, sanitized_item = _sanitize_nim_schema_node(schema_item)
if keep:
sanitized_items.append(sanitized_item)
if sanitized_items:
sanitized[key] = sanitized_items
elif key in _SCHEMA_MAP_KEYS and isinstance(item, dict):
sanitized_map: dict[str, Any] = {}
for map_key, schema_item in item.items():
keep, sanitized_item = _sanitize_nim_schema_node(schema_item)
if keep:
sanitized_map[map_key] = sanitized_item
sanitized[key] = sanitized_map
else:
sanitized[key] = item
return True, sanitized
if isinstance(value, list):
sanitized_items = []
for item in value:
keep, sanitized_item = _sanitize_nim_schema_node(item)
if keep:
sanitized_items.append(sanitized_item)
return True, sanitized_items
return True, value
def _sanitize_nim_tool_schemas(body: dict[str, Any]) -> None:
"""Sanitize only tool parameter schemas, preserving tool calls/history."""
tools = body.get("tools")
if not isinstance(tools, list):
return
sanitized_tools: list[Any] = []
for tool in tools:
if not isinstance(tool, dict):
sanitized_tools.append(tool)
continue
sanitized_tool = dict(tool)
function = tool.get("function")
if isinstance(function, dict):
sanitized_function = dict(function)
parameters = function.get("parameters")
if isinstance(parameters, dict):
_, sanitized_parameters = _sanitize_nim_schema_node(parameters)
sanitized_function["parameters"] = sanitized_parameters
sanitized_tool["function"] = sanitized_function
sanitized_tools.append(sanitized_tool)
body["tools"] = sanitized_tools
def _set_extra(
extra_body: dict[str, Any], key: str, value: Any, ignore_value: Any = None
) -> None:
if key in extra_body:
return
if value is None:
return
if ignore_value is not None and value == ignore_value:
return
extra_body[key] = value
def clone_body_without_reasoning_budget(body: dict[str, Any]) -> dict[str, Any] | None:
"""Clone a request body and strip only reasoning_budget fields."""
return _clone_strip_extra_body(body, _strip_reasoning_budget_fields)
def clone_body_without_chat_template(body: dict[str, Any]) -> dict[str, Any] | None:
"""Clone a request body and strip only chat_template."""
return _clone_strip_extra_body(body, _strip_chat_template_field)
def clone_body_without_reasoning_content(body: dict[str, Any]) -> dict[str, Any] | None:
"""Clone a request body and strip assistant message ``reasoning_content`` fields."""
cloned_body = deepcopy(body)
if not _strip_message_reasoning_content(cloned_body):
return None
return cloned_body
def build_request_body(
request_data: Any, nim: NimSettings, *, thinking_enabled: bool
) -> dict:
"""Build OpenAI-format request body from Anthropic request."""
logger.debug(
"NIM_REQUEST: conversion start model={} msgs={}",
getattr(request_data, "model", "?"),
len(getattr(request_data, "messages", [])),
)
try:
body = build_base_request_body(
request_data,
reasoning_replay=ReasoningReplayMode.REASONING_CONTENT
if thinking_enabled
else ReasoningReplayMode.DISABLED,
)
except OpenAIConversionError as exc:
raise InvalidRequestError(str(exc)) from exc
_sanitize_nim_tool_schemas(body)
# NIM-specific max_tokens: cap against nim.max_tokens
max_tokens = body.get("max_tokens") or getattr(request_data, "max_tokens", None)
if max_tokens is None:
max_tokens = nim.max_tokens
elif nim.max_tokens:
max_tokens = min(max_tokens, nim.max_tokens)
set_if_not_none(body, "max_tokens", max_tokens)
# NIM-specific temperature/top_p: fall back to NIM defaults if request didn't set
if body.get("temperature") is None and nim.temperature is not None:
body["temperature"] = nim.temperature
if body.get("top_p") is None and nim.top_p is not None:
body["top_p"] = nim.top_p
# NIM-specific stop sequences fallback
if "stop" not in body and nim.stop:
body["stop"] = nim.stop
if nim.presence_penalty != 0.0:
body["presence_penalty"] = nim.presence_penalty
if nim.frequency_penalty != 0.0:
body["frequency_penalty"] = nim.frequency_penalty
if nim.seed is not None:
body["seed"] = nim.seed
body["parallel_tool_calls"] = nim.parallel_tool_calls
# Handle non-standard parameters via extra_body
extra_body: dict[str, Any] = {}
request_extra = getattr(request_data, "extra_body", None)
if request_extra:
extra_body.update(request_extra)
# Only inject chat_template_kwargs for models that support it.
# Other models (Mistral-Large, Dracarys, GLM4, StepFun, Seed-OSS)
# reject these params with BadRequestError.
model_name = str(body.get("model", "")).lower()
model_supports_thinking = any(
model_name.startswith(prefix) for prefix in _THINKING_CAPABLE_MODEL_PREFIXES
)
if thinking_enabled and model_supports_thinking:
chat_template_kwargs = extra_body.setdefault(
"chat_template_kwargs", {"thinking": True, "enable_thinking": True}
)
if isinstance(chat_template_kwargs, dict):
chat_template_kwargs.setdefault("reasoning_budget", max_tokens)
req_top_k = getattr(request_data, "top_k", None)
top_k = req_top_k if req_top_k is not None else nim.top_k
_set_extra(extra_body, "top_k", top_k, ignore_value=-1)
_set_extra(extra_body, "min_p", nim.min_p, ignore_value=0.0)
_set_extra(
extra_body, "repetition_penalty", nim.repetition_penalty, ignore_value=1.0
)
_set_extra(extra_body, "min_tokens", nim.min_tokens, ignore_value=0)
_set_extra(extra_body, "chat_template", nim.chat_template)
_set_extra(extra_body, "request_id", nim.request_id)
_set_extra(extra_body, "ignore_eos", nim.ignore_eos)
if extra_body:
body["extra_body"] = extra_body
logger.debug(
"NIM_REQUEST: conversion done model={} msgs={} tools={}",
body.get("model"),
len(body.get("messages", [])),
len(body.get("tools", [])),
)
return body
|