gpu-goblin / tests /test_misnested_args.py
bharathtelu's picture
Deploy auto-tune UI + scripts (work-from-91d0cf0)
a9aa4ae verified
Raw
History Blame Contribute Delete
3.72 kB
"""Tests for the defensive misnested-arg extraction in benchmark + profile_run.
Live AMD-GPU lesson: Qwen2.5-7B (and probably others) occasionally JSON-nests
``steps`` / ``cache`` *inside* the ``config`` dict instead of at the top level
alongside it. WorkloadConfig strict-validates extras, so without this defense
the call errors out and a tool slot is wasted. The well-tuned scenario run
on 2026-05-07 burned two of the eight available slots on this exact mistake;
fixing it costs nothing and saves the audit.
"""
from __future__ import annotations
import shutil
from agent.tools import call
def _baseline_config() -> dict:
return {
"model_name": "Qwen/Qwen2.5-7B-Instruct",
"batch_size": 4,
"precision": "fp16",
"attention_impl": "eager",
"dataloader_workers": 0,
}
class TestBenchmarkMisnestedArgs:
def setup_method(self) -> None:
# Each test starts with an empty cache so cache-hit doesn't mask the
# behavior under test.
shutil.rmtree("bench_cache", ignore_errors=True)
def test_steps_nested_in_config_is_extracted(self) -> None:
"""Old behavior: ``WorkloadConfig`` validation explodes with
'Extra inputs are not permitted [steps]'. New behavior: defensive
extraction pulls ``steps`` back to the top-level arg, call succeeds.
"""
cfg = {**_baseline_config(), "steps": 25}
result = call("benchmark", config=cfg)
assert result.ok, result.error
assert result.result["steps"] == 25
def test_cache_nested_in_config_is_extracted(self) -> None:
cfg = {**_baseline_config(), "cache": False}
result = call("benchmark", config=cfg)
assert result.ok, result.error
def test_force_rerun_nested_in_config_is_extracted(self) -> None:
cfg = {**_baseline_config(), "force_rerun": True}
result = call("benchmark", config=cfg)
assert result.ok, result.error
def test_explicit_top_level_wins_over_nested(self) -> None:
"""If caller passes BOTH (config has steps + top-level steps), the
explicit non-default top-level wins. Defensive code is for the
accident case, not for letting nesting silently override."""
cfg = {**_baseline_config(), "steps": 25}
result = call("benchmark", config=cfg, steps=37)
assert result.ok, result.error
assert result.result["steps"] == 37
def test_all_three_nested_at_once(self) -> None:
"""The exact failure mode from the live run: model nested three
runtime args inside config. All three should get pulled out.
"""
cfg = {
**_baseline_config(),
"steps": 30,
"cache": False,
"force_rerun": True,
}
result = call("benchmark", config=cfg)
assert result.ok, result.error
assert result.result["steps"] == 30
class TestProfileRunMisnestedArgs:
def test_steps_nested_in_config_is_extracted(self) -> None:
cfg = {**_baseline_config(), "steps": 7}
result = call("profile_run", config=cfg)
assert result.ok, result.error
assert result.result["steps"] == 7
def test_explicit_top_level_wins(self) -> None:
cfg = {**_baseline_config(), "steps": 7}
result = call("profile_run", config=cfg, steps=15)
assert result.ok, result.error
assert result.result["steps"] == 15
def test_clean_config_unaffected(self) -> None:
"""Sanity: when nothing is misnested, behavior is unchanged."""
result = call("profile_run", config=_baseline_config())
assert result.ok, result.error
assert result.result["steps"] == 10 # default