from __future__ import annotations import argparse import json from pathlib import Path from typing import Dict from mutationgym_env.models import MutationGymAction from mutationgym_env.server.mutationgym_environment import MutationGymEnvironment TEST_TEMPLATES: Dict[str, str] = { "task_001": """ from solution import clamp def test_clamp_bounds(): assert clamp(-1, 0, 10) == 0 assert clamp(0, 0, 10) == 0 assert clamp(5, 0, 10) == 5 assert clamp(10, 0, 10) == 10 assert clamp(11, 0, 10) == 10 """, "task_002": """ from solution import fizzbuzz def test_fizzbuzz(): assert fizzbuzz(3) == "Fizz" assert fizzbuzz(5) == "Buzz" assert fizzbuzz(15) == "FizzBuzz" assert fizzbuzz(7) == "7" """, "task_003": """ from solution import safe_divide def test_safe_divide(): assert safe_divide(10, 2) == 5 assert safe_divide(5, -2) == -2.5 assert safe_divide(1, 0) is None """, "task_004": """ from solution import normalize_whitespace def test_normalize_whitespace(): assert normalize_whitespace(" a\\t b ") == "a b" assert normalize_whitespace("a\\n\\n b") == "a b" assert normalize_whitespace("a b c") == "a b c" """, "task_005": """ from solution import unique_preserve_order def test_unique_preserve_order(): assert unique_preserve_order(["a", "b", "a", "c", "b"]) == ["a", "b", "c"] assert unique_preserve_order([]) == [] assert unique_preserve_order(["x", "x", "x"]) == ["x"] """, "task_006": """ from solution import median def test_median(): assert median([3, 1, 2]) == 2 assert median([4, 1, 2, 3]) == 2.5 assert median([10]) == 10 """, "task_007": """ from solution import rotate_right def test_rotate_right(): assert rotate_right([1, 2, 3], 1) == [3, 1, 2] assert rotate_right([1, 2, 3], 0) == [1, 2, 3] assert rotate_right([1, 2, 3], 4) == [3, 1, 2] assert rotate_right([1, 2, 3], -1) == [2, 3, 1] assert rotate_right([], 3) == [] """, "task_008": """ from solution import count_vowels def test_count_vowels(): assert count_vowels("AEIOU") == 5 assert count_vowels("sky") == 0 assert count_vowels("banana") == 3 """, } def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Run baseline MutationGym eval.") parser.add_argument("--seed", type=int, default=42) parser.add_argument("--episodes", type=int, default=8) parser.add_argument("--output", type=Path, default=Path("outputs/eval_baseline.json")) return parser.parse_args() def main() -> None: args = parse_args() env = MutationGymEnvironment() results = [] killed_total = 0 mutant_total = 0 passed_reference_total = 0 for episode in range(args.episodes): obs = env.reset(seed=args.seed + episode) task_id = obs.task_id tests = TEST_TEMPLATES.get( task_id, f"from solution import {task_id}\\n\\n" "def test_smoke():\\n" " assert True\\n", ) outcome = env.step(MutationGymAction(tests_py=tests, finalize=True)) results.append( { "task_id": task_id, "reward": outcome.reward, "killed": outcome.killed, "total_mutants": outcome.total_mutants, "passed_reference": outcome.passed_reference, "runtime_ms": outcome.runtime_ms, "error": outcome.error, } ) killed_total += outcome.killed mutant_total += outcome.total_mutants passed_reference_total += 1 if outcome.passed_reference else 0 summary = { "episodes": args.episodes, "avg_kill_rate": (killed_total / mutant_total) if mutant_total else 0.0, "reference_pass_rate": passed_reference_total / args.episodes, } payload = {"summary": summary, "results": results} args.output.parent.mkdir(parents=True, exist_ok=True) args.output.write_text(json.dumps(payload, indent=2), encoding="utf-8") print(json.dumps(summary, indent=2)) if __name__ == "__main__": main()