"""Request builder for NVIDIA NIM provider.""" from collections.abc import Callable from copy import deepcopy from typing import Any from loguru import logger from config.nim import NimSettings from core.anthropic import ( ReasoningReplayMode, build_base_request_body, set_if_not_none, ) from core.anthropic.conversion import OpenAIConversionError from providers.exceptions import InvalidRequestError # Models known to support NIM chat_template_kwargs for thinking/reasoning. # Models NOT in this set will get thinking disabled to avoid BadRequestError. _THINKING_CAPABLE_MODEL_PREFIXES: tuple[str, ...] = ( "qwen/qwen3-", "mistralai/mistral-nemotron", ) _SCHEMA_VALUE_KEYS = frozenset( { "additionalProperties", "additionalItems", "unevaluatedProperties", "unevaluatedItems", "items", "contains", "propertyNames", "if", "then", "else", "not", } ) _SCHEMA_LIST_KEYS = frozenset({"allOf", "anyOf", "oneOf", "prefixItems"}) _SCHEMA_MAP_KEYS = frozenset( {"properties", "patternProperties", "$defs", "definitions", "dependentSchemas"} ) def _clone_strip_extra_body( body: dict[str, Any], strip: Callable[[dict[str, Any]], bool], ) -> dict[str, Any] | None: """Deep-clone ``body`` and remove fields via ``strip`` on ``extra_body`` only. Returns ``None`` when there is no ``extra_body`` dict or ``strip`` reports no change. """ cloned_body = deepcopy(body) extra_body = cloned_body.get("extra_body") if not isinstance(extra_body, dict): return None if not strip(extra_body): return None if not extra_body: cloned_body.pop("extra_body", None) return cloned_body def _strip_reasoning_budget_fields(extra_body: dict[str, Any]) -> bool: removed = extra_body.pop("reasoning_budget", None) is not None chat_template_kwargs = extra_body.get("chat_template_kwargs") if ( isinstance(chat_template_kwargs, dict) and chat_template_kwargs.pop("reasoning_budget", None) is not None ): removed = True return removed def _strip_chat_template_field(extra_body: dict[str, Any]) -> bool: return extra_body.pop("chat_template", None) is not None def _strip_message_reasoning_content(body: dict[str, Any]) -> bool: removed = False messages = body.get("messages") if not isinstance(messages, list): return False for message in messages: if ( isinstance(message, dict) and message.pop("reasoning_content", None) is not None ): removed = True return removed def _sanitize_nim_schema_node(value: Any) -> tuple[bool, Any]: """Remove boolean JSON Schema subschemas that hosted NIM rejects.""" if isinstance(value, bool): return False, None if isinstance(value, dict): sanitized: dict[str, Any] = {} for key, item in value.items(): if key in _SCHEMA_VALUE_KEYS: keep, sanitized_item = _sanitize_nim_schema_node(item) if keep: sanitized[key] = sanitized_item elif key in _SCHEMA_LIST_KEYS and isinstance(item, list): sanitized_items: list[Any] = [] for schema_item in item: keep, sanitized_item = _sanitize_nim_schema_node(schema_item) if keep: sanitized_items.append(sanitized_item) if sanitized_items: sanitized[key] = sanitized_items elif key in _SCHEMA_MAP_KEYS and isinstance(item, dict): sanitized_map: dict[str, Any] = {} for map_key, schema_item in item.items(): keep, sanitized_item = _sanitize_nim_schema_node(schema_item) if keep: sanitized_map[map_key] = sanitized_item sanitized[key] = sanitized_map else: sanitized[key] = item return True, sanitized if isinstance(value, list): sanitized_items = [] for item in value: keep, sanitized_item = _sanitize_nim_schema_node(item) if keep: sanitized_items.append(sanitized_item) return True, sanitized_items return True, value def _sanitize_nim_tool_schemas(body: dict[str, Any]) -> None: """Sanitize only tool parameter schemas, preserving tool calls/history.""" tools = body.get("tools") if not isinstance(tools, list): return sanitized_tools: list[Any] = [] for tool in tools: if not isinstance(tool, dict): sanitized_tools.append(tool) continue sanitized_tool = dict(tool) function = tool.get("function") if isinstance(function, dict): sanitized_function = dict(function) parameters = function.get("parameters") if isinstance(parameters, dict): _, sanitized_parameters = _sanitize_nim_schema_node(parameters) sanitized_function["parameters"] = sanitized_parameters sanitized_tool["function"] = sanitized_function sanitized_tools.append(sanitized_tool) body["tools"] = sanitized_tools def _set_extra( extra_body: dict[str, Any], key: str, value: Any, ignore_value: Any = None ) -> None: if key in extra_body: return if value is None: return if ignore_value is not None and value == ignore_value: return extra_body[key] = value def clone_body_without_reasoning_budget(body: dict[str, Any]) -> dict[str, Any] | None: """Clone a request body and strip only reasoning_budget fields.""" return _clone_strip_extra_body(body, _strip_reasoning_budget_fields) def clone_body_without_chat_template(body: dict[str, Any]) -> dict[str, Any] | None: """Clone a request body and strip only chat_template.""" return _clone_strip_extra_body(body, _strip_chat_template_field) def clone_body_without_reasoning_content(body: dict[str, Any]) -> dict[str, Any] | None: """Clone a request body and strip assistant message ``reasoning_content`` fields.""" cloned_body = deepcopy(body) if not _strip_message_reasoning_content(cloned_body): return None return cloned_body def build_request_body( request_data: Any, nim: NimSettings, *, thinking_enabled: bool ) -> dict: """Build OpenAI-format request body from Anthropic request.""" logger.debug( "NIM_REQUEST: conversion start model={} msgs={}", getattr(request_data, "model", "?"), len(getattr(request_data, "messages", [])), ) try: body = build_base_request_body( request_data, reasoning_replay=ReasoningReplayMode.REASONING_CONTENT if thinking_enabled else ReasoningReplayMode.DISABLED, ) except OpenAIConversionError as exc: raise InvalidRequestError(str(exc)) from exc _sanitize_nim_tool_schemas(body) # NIM-specific max_tokens: cap against nim.max_tokens max_tokens = body.get("max_tokens") or getattr(request_data, "max_tokens", None) if max_tokens is None: max_tokens = nim.max_tokens elif nim.max_tokens: max_tokens = min(max_tokens, nim.max_tokens) set_if_not_none(body, "max_tokens", max_tokens) # NIM-specific temperature/top_p: fall back to NIM defaults if request didn't set if body.get("temperature") is None and nim.temperature is not None: body["temperature"] = nim.temperature if body.get("top_p") is None and nim.top_p is not None: body["top_p"] = nim.top_p # NIM-specific stop sequences fallback if "stop" not in body and nim.stop: body["stop"] = nim.stop if nim.presence_penalty != 0.0: body["presence_penalty"] = nim.presence_penalty if nim.frequency_penalty != 0.0: body["frequency_penalty"] = nim.frequency_penalty if nim.seed is not None: body["seed"] = nim.seed body["parallel_tool_calls"] = nim.parallel_tool_calls # Handle non-standard parameters via extra_body extra_body: dict[str, Any] = {} request_extra = getattr(request_data, "extra_body", None) if request_extra: extra_body.update(request_extra) # Only inject chat_template_kwargs for models that support it. # Other models (Mistral-Large, Dracarys, GLM4, StepFun, Seed-OSS) # reject these params with BadRequestError. model_name = str(body.get("model", "")).lower() model_supports_thinking = any( model_name.startswith(prefix) for prefix in _THINKING_CAPABLE_MODEL_PREFIXES ) if thinking_enabled and model_supports_thinking: chat_template_kwargs = extra_body.setdefault( "chat_template_kwargs", {"thinking": True, "enable_thinking": True} ) if isinstance(chat_template_kwargs, dict): chat_template_kwargs.setdefault("reasoning_budget", max_tokens) req_top_k = getattr(request_data, "top_k", None) top_k = req_top_k if req_top_k is not None else nim.top_k _set_extra(extra_body, "top_k", top_k, ignore_value=-1) _set_extra(extra_body, "min_p", nim.min_p, ignore_value=0.0) _set_extra( extra_body, "repetition_penalty", nim.repetition_penalty, ignore_value=1.0 ) _set_extra(extra_body, "min_tokens", nim.min_tokens, ignore_value=0) _set_extra(extra_body, "chat_template", nim.chat_template) _set_extra(extra_body, "request_id", nim.request_id) _set_extra(extra_body, "ignore_eos", nim.ignore_eos) if extra_body: body["extra_body"] = extra_body logger.debug( "NIM_REQUEST: conversion done model={} msgs={} tools={}", body.get("model"), len(body.get("messages", [])), len(body.get("tools", [])), ) return body