from verifier.sandbox import run_code from verifier.metrics import compute_pass_rate def verify(code: str, test_cases): results = [] for stdin, expected in test_cases: ok, output = run_code(code, stdin) passed = ok and output.strip() == expected.strip() results.append({ "input": stdin.strip(), "expected": expected.strip(), "output": output.strip(), "passed": passed, "error": None if ok else output, }) reward, metrics = compute_pass_rate(results) return reward, { **metrics, "results": results, }