"""LLM backends for the GPU Goblin agent loop.

Two backends ship today, both Qwen, both speaking OpenAI-shape tool calls:

  * ``QwenHFBackend``    — Qwen via Hugging Face Inference Providers.
                           HF auto-routes to Together / Fireworks-AI /
                           Nebius / etc. Needs ``HF_TOKEN``. The default
                           and the path the public HF Space uses.

  * ``QwenVLLMBackend``  — Qwen self-hosted on YOUR MI300X via vLLM,
                           OpenAI-compatible at ``http://host:8000/v1``.
                           "All AMD silicon" path. Stand it up with
                           the lablab tutorial recipe, then point Goblin
                           at it with ``GOBLIN_AGENT_BACKEND=qwen-vllm``.

Pick one with the env var ``GOBLIN_AGENT_BACKEND``:

    export GOBLIN_AGENT_BACKEND=qwen-hf       # default — uses HF_TOKEN
    export GOBLIN_AGENT_BACKEND=qwen-vllm     # uses GOBLIN_QWEN_VLLM_URL

Backend-specific knobs:

    # qwen-hf
    HF_TOKEN                       # required
    GOBLIN_QWEN_MODEL              # default Qwen/Qwen2.5-7B-Instruct
    GOBLIN_QWEN_PROVIDER           # default auto

    # qwen-vllm
    GOBLIN_QWEN_VLLM_URL           # default http://localhost:8000/v1
    GOBLIN_QWEN_VLLM_MODEL         # default Qwen/Qwen2.5-7B-Instruct
    GOBLIN_QWEN_VLLM_KEY           # optional auth header (vLLM ignores it)
"""

from __future__ import annotations

import os

from agent.backends.base import AgentTurn, Backend, ToolCall

__all__ = [
    "AgentTurn",
    "Backend",
    "ToolCall",
    "make_backend",
    "active_backend_name",
]


_VLLM_ALIASES = {"qwen-vllm", "qwen_vllm", "vllm", "local", "qwen-local"}
_HF_ALIASES = {"qwen-hf", "qwen_hf", "qwen", "hf"}


def active_backend_name() -> str:
    """The backend name selected by env, normalized to its canonical id.

    Anything not recognised falls through to ``qwen-hf`` (the safe default).
    """
    raw = (os.environ.get("GOBLIN_AGENT_BACKEND") or "qwen-hf").strip().lower()
    if raw in _VLLM_ALIASES:
        return "qwen-vllm"
    if raw in _HF_ALIASES:
        return "qwen-hf"
    return "qwen-hf"


def make_backend(system_prompt: str, **kwargs) -> Backend:
    """Construct the agent backend selected by ``GOBLIN_AGENT_BACKEND``.

    Constructor kwargs (``model``, ``provider``, ``base_url``, ``api_key``,
    ``max_tokens``) are forwarded to whichever backend is chosen; irrelevant
    kwargs are silently dropped so callers can stay backend-agnostic.

    Imports the chosen backend module lazily — neither the openai SDK nor
    huggingface_hub is loaded unless the corresponding backend is actually
    in use.
    """
    name = active_backend_name()
    if name == "qwen-vllm":
        from agent.backends.qwen_vllm import QwenVLLMBackend

        return QwenVLLMBackend(
            system_prompt=system_prompt,
            model=kwargs.get("model"),
            base_url=kwargs.get("base_url"),
            api_key=kwargs.get("api_key"),
            max_tokens=kwargs.get("max_tokens", 2048),
        )

    from agent.backends.qwen_hf import QwenHFBackend

    return QwenHFBackend(
        system_prompt=system_prompt,
        model=kwargs.get("model"),
        provider=kwargs.get("provider"),
        max_tokens=kwargs.get("max_tokens", 2048),
    )