Spaces:

lablab-ai-amd-developer-hackathon
/

gpu-goblin

Sleeping

App Files Files Community

gpu-goblin / tests /test_misnested_args.py

bharathtelu

Deploy auto-tune UI + scripts (work-from-91d0cf0)

a9aa4ae verified about 2 months ago

Raw

History Blame Contribute Delete

3.72 kB

	"""Tests for the defensive misnested-arg extraction in benchmark + profile_run.

	Live AMD-GPU lesson: Qwen2.5-7B (and probably others) occasionally JSON-nests
	``steps`` / ``cache`` inside the ``config`` dict instead of at the top level
	alongside it. WorkloadConfig strict-validates extras, so without this defense
	the call errors out and a tool slot is wasted. The well-tuned scenario run
	on 2026-05-07 burned two of the eight available slots on this exact mistake;
	fixing it costs nothing and saves the audit.
	"""

	from __future__ import annotations

	import shutil

	from agent.tools import call


	def _baseline_config() -> dict:
	return {
	"model_name": "Qwen/Qwen2.5-7B-Instruct",
	"batch_size": 4,
	"precision": "fp16",
	"attention_impl": "eager",
	"dataloader_workers": 0,
	}


	class TestBenchmarkMisnestedArgs:
	def setup_method(self) -> None:
	# Each test starts with an empty cache so cache-hit doesn't mask the
	# behavior under test.
	shutil.rmtree("bench_cache", ignore_errors=True)

	def test_steps_nested_in_config_is_extracted(self) -> None:
	"""Old behavior: ``WorkloadConfig`` validation explodes with
	'Extra inputs are not permitted [steps]'. New behavior: defensive
	extraction pulls ``steps`` back to the top-level arg, call succeeds.
	"""
	cfg = {**_baseline_config(), "steps": 25}
	result = call("benchmark", config=cfg)
	assert result.ok, result.error
	assert result.result["steps"] == 25

	def test_cache_nested_in_config_is_extracted(self) -> None:
	cfg = {**_baseline_config(), "cache": False}
	result = call("benchmark", config=cfg)
	assert result.ok, result.error

	def test_force_rerun_nested_in_config_is_extracted(self) -> None:
	cfg = {**_baseline_config(), "force_rerun": True}
	result = call("benchmark", config=cfg)
	assert result.ok, result.error

	def test_explicit_top_level_wins_over_nested(self) -> None:
	"""If caller passes BOTH (config has steps + top-level steps), the
	explicit non-default top-level wins. Defensive code is for the
	accident case, not for letting nesting silently override."""
	cfg = {**_baseline_config(), "steps": 25}
	result = call("benchmark", config=cfg, steps=37)
	assert result.ok, result.error
	assert result.result["steps"] == 37

	def test_all_three_nested_at_once(self) -> None:
	"""The exact failure mode from the live run: model nested three
	runtime args inside config. All three should get pulled out.
	"""
	cfg = {
	**_baseline_config(),
	"steps": 30,
	"cache": False,
	"force_rerun": True,
	}
	result = call("benchmark", config=cfg)
	assert result.ok, result.error
	assert result.result["steps"] == 30


	class TestProfileRunMisnestedArgs:
	def test_steps_nested_in_config_is_extracted(self) -> None:
	cfg = {**_baseline_config(), "steps": 7}
	result = call("profile_run", config=cfg)
	assert result.ok, result.error
	assert result.result["steps"] == 7

	def test_explicit_top_level_wins(self) -> None:
	cfg = {**_baseline_config(), "steps": 7}
	result = call("profile_run", config=cfg, steps=15)
	assert result.ok, result.error
	assert result.result["steps"] == 15

	def test_clean_config_unaffected(self) -> None:
	"""Sanity: when nothing is misnested, behavior is unchanged."""
	result = call("profile_run", config=_baseline_config())
	assert result.ok, result.error
	assert result.result["steps"] == 10 # default