| import numpy as np | |
| from sim_reveal.procedural_envs import available_proxy_names, make_proxy_env | |
| def _mean_utility(proxy_name: str, baseline_name: str, seeds: range) -> float: | |
| utilities = [] | |
| for seed in seeds: | |
| env = make_proxy_env(proxy_name=proxy_name, resolution=32, seed=seed, rollout_horizon=4) | |
| _, _ = env.reset(seed=seed) | |
| chunk = env.baseline_action_chunk(baseline_name, chunk_horizon=4) | |
| outcome = env.evaluate_action_chunk(chunk, rollout_horizon=4) | |
| utility = float(outcome["retrieval_success"]) - float(outcome["final_disturbance_cost"]) - float(outcome["reocclusion_rate"]) | |
| utilities.append(utility) | |
| return float(np.mean(utilities)) | |
| def test_teacher_audit(): | |
| seeds = range(5) | |
| for proxy_name in available_proxy_names(): | |
| teacher_utility = _mean_utility(proxy_name, "teacher", seeds) | |
| assert teacher_utility >= _mean_utility(proxy_name, "random", seeds) | |
| assert teacher_utility >= _mean_utility(proxy_name, "retrieve_only", seeds) | |
| assert teacher_utility >= _mean_utility(proxy_name, "reveal_only", seeds) | |
| assert teacher_utility >= _mean_utility(proxy_name, "no_hold", seeds) | |