Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import argparse | |
| import json | |
| from pathlib import Path | |
| from typing import Dict | |
| from mutationgym_env.models import MutationGymAction | |
| from mutationgym_env.server.mutationgym_environment import MutationGymEnvironment | |
| TEST_TEMPLATES: Dict[str, str] = { | |
| "task_001": """ | |
| from solution import clamp | |
| def test_clamp_bounds(): | |
| assert clamp(-1, 0, 10) == 0 | |
| assert clamp(0, 0, 10) == 0 | |
| assert clamp(5, 0, 10) == 5 | |
| assert clamp(10, 0, 10) == 10 | |
| assert clamp(11, 0, 10) == 10 | |
| """, | |
| "task_002": """ | |
| from solution import fizzbuzz | |
| def test_fizzbuzz(): | |
| assert fizzbuzz(3) == "Fizz" | |
| assert fizzbuzz(5) == "Buzz" | |
| assert fizzbuzz(15) == "FizzBuzz" | |
| assert fizzbuzz(7) == "7" | |
| """, | |
| "task_003": """ | |
| from solution import safe_divide | |
| def test_safe_divide(): | |
| assert safe_divide(10, 2) == 5 | |
| assert safe_divide(5, -2) == -2.5 | |
| assert safe_divide(1, 0) is None | |
| """, | |
| "task_004": """ | |
| from solution import normalize_whitespace | |
| def test_normalize_whitespace(): | |
| assert normalize_whitespace(" a\\t b ") == "a b" | |
| assert normalize_whitespace("a\\n\\n b") == "a b" | |
| assert normalize_whitespace("a b c") == "a b c" | |
| """, | |
| "task_005": """ | |
| from solution import unique_preserve_order | |
| def test_unique_preserve_order(): | |
| assert unique_preserve_order(["a", "b", "a", "c", "b"]) == ["a", "b", "c"] | |
| assert unique_preserve_order([]) == [] | |
| assert unique_preserve_order(["x", "x", "x"]) == ["x"] | |
| """, | |
| "task_006": """ | |
| from solution import median | |
| def test_median(): | |
| assert median([3, 1, 2]) == 2 | |
| assert median([4, 1, 2, 3]) == 2.5 | |
| assert median([10]) == 10 | |
| """, | |
| "task_007": """ | |
| from solution import rotate_right | |
| def test_rotate_right(): | |
| assert rotate_right([1, 2, 3], 1) == [3, 1, 2] | |
| assert rotate_right([1, 2, 3], 0) == [1, 2, 3] | |
| assert rotate_right([1, 2, 3], 4) == [3, 1, 2] | |
| assert rotate_right([1, 2, 3], -1) == [2, 3, 1] | |
| assert rotate_right([], 3) == [] | |
| """, | |
| "task_008": """ | |
| from solution import count_vowels | |
| def test_count_vowels(): | |
| assert count_vowels("AEIOU") == 5 | |
| assert count_vowels("sky") == 0 | |
| assert count_vowels("banana") == 3 | |
| """, | |
| } | |
| def parse_args() -> argparse.Namespace: | |
| parser = argparse.ArgumentParser(description="Run baseline MutationGym eval.") | |
| parser.add_argument("--seed", type=int, default=42) | |
| parser.add_argument("--episodes", type=int, default=8) | |
| parser.add_argument("--output", type=Path, default=Path("outputs/eval_baseline.json")) | |
| return parser.parse_args() | |
| def main() -> None: | |
| args = parse_args() | |
| env = MutationGymEnvironment() | |
| results = [] | |
| killed_total = 0 | |
| mutant_total = 0 | |
| passed_reference_total = 0 | |
| for episode in range(args.episodes): | |
| obs = env.reset(seed=args.seed + episode) | |
| task_id = obs.task_id | |
| tests = TEST_TEMPLATES.get( | |
| task_id, | |
| f"from solution import {task_id}\\n\\n" | |
| "def test_smoke():\\n" | |
| " assert True\\n", | |
| ) | |
| outcome = env.step(MutationGymAction(tests_py=tests, finalize=True)) | |
| results.append( | |
| { | |
| "task_id": task_id, | |
| "reward": outcome.reward, | |
| "killed": outcome.killed, | |
| "total_mutants": outcome.total_mutants, | |
| "passed_reference": outcome.passed_reference, | |
| "runtime_ms": outcome.runtime_ms, | |
| "error": outcome.error, | |
| } | |
| ) | |
| killed_total += outcome.killed | |
| mutant_total += outcome.total_mutants | |
| passed_reference_total += 1 if outcome.passed_reference else 0 | |
| summary = { | |
| "episodes": args.episodes, | |
| "avg_kill_rate": (killed_total / mutant_total) if mutant_total else 0.0, | |
| "reference_pass_rate": passed_reference_total / args.episodes, | |
| } | |
| payload = {"summary": summary, "results": results} | |
| args.output.parent.mkdir(parents=True, exist_ok=True) | |
| args.output.write_text(json.dumps(payload, indent=2), encoding="utf-8") | |
| print(json.dumps(summary, indent=2)) | |
| if __name__ == "__main__": | |
| main() | |