Spaces:
Sleeping
Sleeping
File size: 4,136 Bytes
8850413 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | from __future__ import annotations
import argparse
import json
from pathlib import Path
from typing import Dict
from mutationgym_env.models import MutationGymAction
from mutationgym_env.server.mutationgym_environment import MutationGymEnvironment
TEST_TEMPLATES: Dict[str, str] = {
"task_001": """
from solution import clamp
def test_clamp_bounds():
assert clamp(-1, 0, 10) == 0
assert clamp(0, 0, 10) == 0
assert clamp(5, 0, 10) == 5
assert clamp(10, 0, 10) == 10
assert clamp(11, 0, 10) == 10
""",
"task_002": """
from solution import fizzbuzz
def test_fizzbuzz():
assert fizzbuzz(3) == "Fizz"
assert fizzbuzz(5) == "Buzz"
assert fizzbuzz(15) == "FizzBuzz"
assert fizzbuzz(7) == "7"
""",
"task_003": """
from solution import safe_divide
def test_safe_divide():
assert safe_divide(10, 2) == 5
assert safe_divide(5, -2) == -2.5
assert safe_divide(1, 0) is None
""",
"task_004": """
from solution import normalize_whitespace
def test_normalize_whitespace():
assert normalize_whitespace(" a\\t b ") == "a b"
assert normalize_whitespace("a\\n\\n b") == "a b"
assert normalize_whitespace("a b c") == "a b c"
""",
"task_005": """
from solution import unique_preserve_order
def test_unique_preserve_order():
assert unique_preserve_order(["a", "b", "a", "c", "b"]) == ["a", "b", "c"]
assert unique_preserve_order([]) == []
assert unique_preserve_order(["x", "x", "x"]) == ["x"]
""",
"task_006": """
from solution import median
def test_median():
assert median([3, 1, 2]) == 2
assert median([4, 1, 2, 3]) == 2.5
assert median([10]) == 10
""",
"task_007": """
from solution import rotate_right
def test_rotate_right():
assert rotate_right([1, 2, 3], 1) == [3, 1, 2]
assert rotate_right([1, 2, 3], 0) == [1, 2, 3]
assert rotate_right([1, 2, 3], 4) == [3, 1, 2]
assert rotate_right([1, 2, 3], -1) == [2, 3, 1]
assert rotate_right([], 3) == []
""",
"task_008": """
from solution import count_vowels
def test_count_vowels():
assert count_vowels("AEIOU") == 5
assert count_vowels("sky") == 0
assert count_vowels("banana") == 3
""",
}
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Run baseline MutationGym eval.")
parser.add_argument("--seed", type=int, default=42)
parser.add_argument("--episodes", type=int, default=8)
parser.add_argument("--output", type=Path, default=Path("outputs/eval_baseline.json"))
return parser.parse_args()
def main() -> None:
args = parse_args()
env = MutationGymEnvironment()
results = []
killed_total = 0
mutant_total = 0
passed_reference_total = 0
for episode in range(args.episodes):
obs = env.reset(seed=args.seed + episode)
task_id = obs.task_id
tests = TEST_TEMPLATES.get(
task_id,
f"from solution import {task_id}\\n\\n"
"def test_smoke():\\n"
" assert True\\n",
)
outcome = env.step(MutationGymAction(tests_py=tests, finalize=True))
results.append(
{
"task_id": task_id,
"reward": outcome.reward,
"killed": outcome.killed,
"total_mutants": outcome.total_mutants,
"passed_reference": outcome.passed_reference,
"runtime_ms": outcome.runtime_ms,
"error": outcome.error,
}
)
killed_total += outcome.killed
mutant_total += outcome.total_mutants
passed_reference_total += 1 if outcome.passed_reference else 0
summary = {
"episodes": args.episodes,
"avg_kill_rate": (killed_total / mutant_total) if mutant_total else 0.0,
"reference_pass_rate": passed_reference_total / args.episodes,
}
payload = {"summary": summary, "results": results}
args.output.parent.mkdir(parents=True, exist_ok=True)
args.output.write_text(json.dumps(payload, indent=2), encoding="utf-8")
print(json.dumps(summary, indent=2))
if __name__ == "__main__":
main()
|