"""Guard the ``vllm serve`` argv that ``build_command`` emits.

The serving layer turns one ``ModelConfig`` into the argv launched inside the
container, so these tests pin the mapping from config fields to vLLM flags: the
always-present identity flags, the data-driven toggles (parsers, eager, prefix
caching), and the ``extra_vllm_args`` escape hatch.

``modal/service.py`` does ``import modal`` and ``from catalogue import …``, so we
load it exactly the way ``modal deploy`` does: with ``modal/`` on ``sys.path`` (the
folder's contents become importable under their bare names; ``import modal`` still
binds the installed SDK, not the folder).
"""

from __future__ import annotations

import importlib
import json
import sys
from pathlib import Path

import pytest

_MODAL_DIR = Path(__file__).resolve().parents[1] / "modal"


@pytest.fixture(scope="module")
def service():
    """The serving module, importable with ``modal/`` on the path (as at deploy time)."""
    if str(_MODAL_DIR) not in sys.path:
        sys.path.insert(0, str(_MODAL_DIR))
    return importlib.import_module("service")


def _make(service, **kwargs):
    """A minimal valid ModelConfig with overridable fields."""
    return service.ModelConfig(name="acme/Tiny-1B", endpoint_name="tiny-1b", **kwargs)


def _flag_value(cmd: list[str], flag: str) -> str:
    """The argument that follows ``flag`` in the argv."""
    return cmd[cmd.index(flag) + 1]


# ── always-present identity flags ──────────────────────────────────────────────


def test_serves_the_model_with_identity_flags(service):
    cmd = service.build_command(_make(service))
    assert cmd[:3] == ["vllm", "serve", "acme/Tiny-1B"]
    # served-model-name defaults to the repo name (clients pass the repo id).
    assert _flag_value(cmd, "--served-model-name") == "acme/Tiny-1B"
    assert _flag_value(cmd, "--port") == str(service.VLLM_PORT)
    assert _flag_value(cmd, "--tensor-parallel-size") == "1"


def test_served_model_name_alias(service):
    cmd = service.build_command(_make(service, served_model_name="acme/Tiny"))
    assert _flag_value(cmd, "--served-model-name") == "acme/Tiny"
    # but vLLM still loads the real repo (positional arg)
    assert cmd[2] == "acme/Tiny-1B"


# ── data-driven toggles ────────────────────────────────────────────────────────


def test_prefix_caching_on_by_default_off_when_disabled(service):
    assert "--enable-prefix-caching" in service.build_command(_make(service))
    off = service.build_command(_make(service, enable_prefix_caching=False))
    assert "--no-enable-prefix-caching" in off
    assert "--enable-prefix-caching" not in off


def test_optional_inference_flags_emitted(service):
    cmd = service.build_command(
        _make(
            service,
            max_model_len=8192,
            trust_remote_code=True,
            enforce_eager=True,
            gpu_memory_utilization=0.9,
        )
    )
    assert _flag_value(cmd, "--max-model-len") == "8192"
    assert "--trust-remote-code" in cmd
    assert "--enforce-eager" in cmd
    assert _flag_value(cmd, "--gpu-memory-utilization") == "0.9"


def test_async_scheduling_default_on_off_when_disabled(service):
    assert "--async-scheduling" in service.build_command(_make(service))
    assert "--async-scheduling" not in service.build_command(_make(service, async_scheduling=False))


def test_parser_flags(service):
    cmd = service.build_command(
        _make(service, reasoning_parser="qwen3", tool_call_parser="hermes", enable_auto_tool_choice=True)
    )
    assert _flag_value(cmd, "--reasoning-parser") == "qwen3"
    assert _flag_value(cmd, "--tool-call-parser") == "hermes"
    assert "--enable-auto-tool-choice" in cmd
    # None parsers emit nothing.
    bare = service.build_command(_make(service))
    assert "--reasoning-parser" not in bare
    assert "--tool-call-parser" not in bare


def test_mm_limits_serialized_as_json(service):
    cmd = service.build_command(_make(service, mm_limits={"image": 0, "audio": 0}))
    assert json.loads(_flag_value(cmd, "--limit-mm-per-prompt")) == {"image": 0, "audio": 0}


def test_log_requests_default_on(service):
    assert "--enable-log-requests" in service.build_command(_make(service))
    assert "--enable-log-requests" not in service.build_command(_make(service, log_requests=False))


# ── escape hatch ────────────────────────────────────────────────────────────────


def test_extra_vllm_args_appended_verbatim(service):
    cmd = service.build_command(_make(service, extra_vllm_args=("--quantization", "fp8")))
    assert cmd[-2:] == ["--quantization", "fp8"]


# ── deploy script wiring ───────────────────────────────────────────────────────


def test_deploy_script_propagates_knob_envs():
    sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "scripts"))
    deploy_modal = importlib.import_module("deploy_modal")
    from argparse import Namespace

    env = deploy_modal._env_for(Namespace(keep_warm=True, auth=True))
    assert env["MODAL_LLM_KEEP_WARM"] == "1"
    assert env["MODAL_LLM_REQUIRE_AUTH"] == "1"

    # Both off → neither env var is set (so endpoints stay public + scale-to-zero).
    env_off = deploy_modal._env_for(Namespace(keep_warm=False, auth=False))
    assert "MODAL_LLM_KEEP_WARM" not in env_off
    assert "MODAL_LLM_REQUIRE_AUTH" not in env_off