"""Runtime config planning and retry/fallback selection.

Owns:
- Resolution of ConfiguredModelSettings → ModelConfig.
- Per-attempt planning (AttemptPlan) including primary/fallback selection and
  reasoning-effort/thinking-budget resolution.
- Per-call effective config construction (applying caller kwarg overrides onto
  the selected ModelConfig).
- Retry attempt tracking via a ContextVar, plus the temperature-bump heuristic.
"""

from __future__ import annotations

import logging
from contextvars import ContextVar
from dataclasses import dataclass
from typing import Any

from src.config import (
    ConfiguredModelSettings,
    ModelConfig,
    ModelTransport,
    resolve_model_config,
    settings,
)

from .registry import backend_for_provider, client_for_model_config
from .types import ProviderClient, ReasoningEffortType

logger = logging.getLogger(__name__)

# ContextVar tracking the current retry attempt for provider switching.
current_attempt: ContextVar[int] = ContextVar("current_attempt", default=0)


def update_current_langfuse_observation(
    provider: ModelTransport,
    model: str,
    *,
    name: str | None = None,
) -> None:
    """Best-effort annotation of the current Langfuse span with LLM routing."""
    if not settings.LANGFUSE_PUBLIC_KEY:
        return

    try:
        from langfuse import get_client

        update_kwargs: dict[str, Any] = {
            "metadata": {
                "namespace": settings.NAMESPACE,
                "provider": provider,
                "model": model,
            }
        }
        if name is not None:
            update_kwargs["name"] = name
        get_client().update_current_span(**update_kwargs)
    except Exception as exc:  # pragma: no cover - best-effort telemetry
        logger.debug("Failed to update Langfuse span metadata: %s", exc)


@dataclass(frozen=True)
class AttemptPlan:
    """Per-attempt plan produced by `plan_attempt`.

    Replaces the old loose tuple-of-six (`ProviderSelection`) with a single
    dataclass. Carries everything the executor / tool loop needs to make one
    backend call without re-resolving configuration mid-call.
    """

    provider: ModelTransport
    model: str
    client: ProviderClient
    thinking_budget_tokens: int | None
    reasoning_effort: ReasoningEffortType
    selected_config: ModelConfig


def resolve_runtime_model_config(
    model_config: ModelConfig | ConfiguredModelSettings,
) -> ModelConfig:
    """Return a runtime ModelConfig, resolving settings-shape inputs if needed."""
    if isinstance(model_config, ModelConfig):
        return model_config
    return resolve_model_config(model_config)


def select_model_config_for_attempt(
    model_config: ModelConfig,
    *,
    attempt: int,
    retry_attempts: int,
) -> ModelConfig:
    """Pick the effective config for this attempt.

    Primary config on all attempts except the last, which swaps to the
    resolved fallback (if any).
    """
    if attempt != retry_attempts or model_config.fallback is None:
        return model_config

    fb = model_config.fallback
    return ModelConfig(
        model=fb.model,
        transport=fb.transport,
        fallback=None,
        api_key=fb.api_key,
        base_url=fb.base_url,
        temperature=fb.temperature,
        top_p=fb.top_p,
        top_k=fb.top_k,
        frequency_penalty=fb.frequency_penalty,
        presence_penalty=fb.presence_penalty,
        seed=fb.seed,
        thinking_effort=fb.thinking_effort,
        thinking_budget_tokens=fb.thinking_budget_tokens,
        provider_params=fb.provider_params,
        max_output_tokens=fb.max_output_tokens,
        stop_sequences=fb.stop_sequences,
        cache_policy=fb.cache_policy,
    )


def plan_attempt(
    *,
    runtime_model_config: ModelConfig,
    attempt: int,
    retry_attempts: int,
    call_thinking_budget_tokens: int | None,
    call_reasoning_effort: ReasoningEffortType,
) -> AttemptPlan:
    """Build the AttemptPlan for `attempt`.

    Reasoning params are drawn from the caller when we're still on the
    primary config, and from the fallback config otherwise, so cross-transport
    fallbacks use provider-appropriate params.
    """
    selected = select_model_config_for_attempt(
        runtime_model_config,
        attempt=attempt,
        retry_attempts=retry_attempts,
    )
    provider = selected.transport
    client = client_for_model_config(provider, selected)

    is_primary = selected is runtime_model_config
    attempt_thinking_budget = (
        call_thinking_budget_tokens if is_primary else selected.thinking_budget_tokens
    )
    attempt_reasoning_effort: ReasoningEffortType = (
        call_reasoning_effort if is_primary else selected.thinking_effort
    )

    if attempt == retry_attempts and runtime_model_config.fallback is not None:
        logger.warning(
            f"Final retry attempt {attempt}/{retry_attempts}: switching from "
            + f"{runtime_model_config.transport}/{runtime_model_config.model} to "
            + f"backup {provider}/{selected.model}"
        )

    return AttemptPlan(
        provider=provider,
        model=selected.model,
        client=client,
        thinking_budget_tokens=attempt_thinking_budget,
        reasoning_effort=attempt_reasoning_effort,
        selected_config=selected,
    )


def effective_config_for_call(
    *,
    selected_config: ModelConfig | None,
    provider: ModelTransport,
    model: str,
    temperature: float | None,
    stop_seqs: list[str] | None,
    thinking_budget_tokens: int | None,
    reasoning_effort: ReasoningEffortType,
) -> ModelConfig:
    """Build the ModelConfig passed to the executor / request_builder.

    Per-call kwargs (temperature, stop_seqs, thinking_*) win when set; otherwise
    the selected_config's values are used. When selected_config is None
    (test-only callers passing provider+model directly) a minimal ModelConfig
    is synthesized.

    max_output_tokens is forced to None so the per-call max_tokens kwarg is
    authoritative — matching historical honcho_llm_call_inner behavior.
    """
    if selected_config is None:
        return ModelConfig(
            model=model,
            transport=provider,
            temperature=temperature,
            stop_sequences=stop_seqs,
            thinking_budget_tokens=thinking_budget_tokens,
            thinking_effort=reasoning_effort,
        )
    updates: dict[str, Any] = {"max_output_tokens": None}
    if temperature is not None:
        updates["temperature"] = temperature
    if stop_seqs is not None:
        updates["stop_sequences"] = stop_seqs
    if thinking_budget_tokens is not None:
        updates["thinking_budget_tokens"] = thinking_budget_tokens
    if reasoning_effort is not None:
        updates["thinking_effort"] = reasoning_effort
    return selected_config.model_copy(update=updates)


def effective_temperature(temperature: float | None) -> float | None:
    """Bump temperature from 0.0 → 0.2 on retry attempts for variety."""
    if temperature == 0.0 and current_attempt.get() > 1:
        logger.debug("Bumping temperature from 0.0 to 0.2 on retry")
        return 0.2
    return temperature


def resolve_backend_for_plan(plan: AttemptPlan) -> Any:
    """Convenience helper: plan → ready-to-call ProviderBackend."""
    return backend_for_provider(plan.provider, plan.client)


__all__ = [
    "AttemptPlan",
    "current_attempt",
    "effective_config_for_call",
    "effective_temperature",
    "plan_attempt",
    "resolve_backend_for_plan",
    "resolve_runtime_model_config",
    "select_model_config_for_attempt",
    "update_current_langfuse_observation",
]