Spaces:

build-small-hackathon
/

multi-agent-lab

Running on Zero

multi-agent-lab / tests /test_modal_build_command.py

agharsallah

Refactor modal service and logging setup

5d4ef87 17 days ago

5.78 kB

	"""Guard the ``vllm serve`` argv that ``build_command`` emits.

	The serving layer turns one ``ModelConfig`` into the argv launched inside the
	container, so these tests pin the mapping from config fields to vLLM flags: the
	always-present identity flags, the data-driven toggles (parsers, eager, prefix
	caching), and the ``extra_vllm_args`` escape hatch.

	``modal/service.py`` does ``import modal`` and ``from catalogue import …``, so we
	load it exactly the way ``modal deploy`` does: with ``modal/`` on ``sys.path`` (the
	folder's contents become importable under their bare names; ``import modal`` still
	binds the installed SDK, not the folder).
	"""

	from __future__ import annotations

	import importlib
	import json
	import sys
	from pathlib import Path

	import pytest

	_MODAL_DIR = Path(__file__).resolve().parents[1] / "modal"


	@pytest.fixture(scope="module")
	def service():
	"""The serving module, importable with ``modal/`` on the path (as at deploy time)."""
	if str(_MODAL_DIR) not in sys.path:
	sys.path.insert(0, str(_MODAL_DIR))
	return importlib.import_module("service")


	def _make(service, **kwargs):
	"""A minimal valid ModelConfig with overridable fields."""
	return service.ModelConfig(name="acme/Tiny-1B", endpoint_name="tiny-1b", **kwargs)


	def _flag_value(cmd: list[str], flag: str) -> str:
	"""The argument that follows ``flag`` in the argv."""
	return cmd[cmd.index(flag) + 1]


	# ── always-present identity flags ──────────────────────────────────────────────


	def test_serves_the_model_with_identity_flags(service):
	cmd = service.build_command(_make(service))
	assert cmd[:3] == ["vllm", "serve", "acme/Tiny-1B"]
	# served-model-name defaults to the repo name (clients pass the repo id).
	assert _flag_value(cmd, "--served-model-name") == "acme/Tiny-1B"
	assert _flag_value(cmd, "--port") == str(service.VLLM_PORT)
	assert _flag_value(cmd, "--tensor-parallel-size") == "1"


	def test_served_model_name_alias(service):
	cmd = service.build_command(_make(service, served_model_name="acme/Tiny"))
	assert _flag_value(cmd, "--served-model-name") == "acme/Tiny"
	# but vLLM still loads the real repo (positional arg)
	assert cmd[2] == "acme/Tiny-1B"


	# ── data-driven toggles ────────────────────────────────────────────────────────


	def test_prefix_caching_on_by_default_off_when_disabled(service):
	assert "--enable-prefix-caching" in service.build_command(_make(service))
	off = service.build_command(_make(service, enable_prefix_caching=False))
	assert "--no-enable-prefix-caching" in off
	assert "--enable-prefix-caching" not in off


	def test_optional_inference_flags_emitted(service):
	cmd = service.build_command(
	_make(
	service,
	max_model_len=8192,
	trust_remote_code=True,
	enforce_eager=True,
	gpu_memory_utilization=0.9,
	)
	)
	assert _flag_value(cmd, "--max-model-len") == "8192"
	assert "--trust-remote-code" in cmd
	assert "--enforce-eager" in cmd
	assert _flag_value(cmd, "--gpu-memory-utilization") == "0.9"


	def test_async_scheduling_default_on_off_when_disabled(service):
	assert "--async-scheduling" in service.build_command(_make(service))
	assert "--async-scheduling" not in service.build_command(_make(service, async_scheduling=False))


	def test_parser_flags(service):
	cmd = service.build_command(
	_make(service, reasoning_parser="qwen3", tool_call_parser="hermes", enable_auto_tool_choice=True)
	)
	assert _flag_value(cmd, "--reasoning-parser") == "qwen3"
	assert _flag_value(cmd, "--tool-call-parser") == "hermes"
	assert "--enable-auto-tool-choice" in cmd
	# None parsers emit nothing.
	bare = service.build_command(_make(service))
	assert "--reasoning-parser" not in bare
	assert "--tool-call-parser" not in bare


	def test_mm_limits_serialized_as_json(service):
	cmd = service.build_command(_make(service, mm_limits={"image": 0, "audio": 0}))
	assert json.loads(_flag_value(cmd, "--limit-mm-per-prompt")) == {"image": 0, "audio": 0}


	def test_log_requests_default_on(service):
	assert "--enable-log-requests" in service.build_command(_make(service))
	assert "--enable-log-requests" not in service.build_command(_make(service, log_requests=False))


	# ── escape hatch ────────────────────────────────────────────────────────────────


	def test_extra_vllm_args_appended_verbatim(service):
	cmd = service.build_command(_make(service, extra_vllm_args=("--quantization", "fp8")))
	assert cmd[-2:] == ["--quantization", "fp8"]


	# ── deploy script wiring ───────────────────────────────────────────────────────


	def test_deploy_script_propagates_knob_envs():
	sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "scripts"))
	deploy_modal = importlib.import_module("deploy_modal")
	from argparse import Namespace

	env = deploy_modal._env_for(Namespace(keep_warm=True, auth=True))
	assert env["MODAL_LLM_KEEP_WARM"] == "1"
	assert env["MODAL_LLM_REQUIRE_AUTH"] == "1"

	# Both off → neither env var is set (so endpoints stay public + scale-to-zero).
	env_off = deploy_modal._env_for(Namespace(keep_warm=False, auth=False))
	assert "MODAL_LLM_KEEP_WARM" not in env_off
	assert "MODAL_LLM_REQUIRE_AUTH" not in env_off