nervousystem-env / scripts /seed_variance.py
vx7sh's picture
feat(eval): add seed variance reporting script
95b7a07
#!/usr/bin/env python3
"""Seed variance report for NervousSystem-Env."""
from __future__ import annotations
import json
import statistics
import requests
BASE = "http://localhost:7860"
SEEDS = [42, 7, 13, 99, 256]
TASKS = ["easy", "medium", "hard", "cascade"]
def run_oracle_episode(task_id: str, seed: int) -> dict[str, object]:
"""Run a perfect oracle agent and return grade."""
obs = requests.post(
f"{BASE}/reset",
json={"task_id": task_id, "seed": seed},
timeout=30,
).json()
if task_id == "easy":
failing = next(
node["node_id"] for node in obs["nodes"] if node["health_status"] == "failed"
)
requests.post(
f"{BASE}/step",
json={
"action_type": "inspect_flight_recorder",
"parameters": {"rank_id": failing},
},
timeout=30,
)
elif task_id == "medium":
requests.post(
f"{BASE}/step",
json={"action_type": "topo_reorder", "parameters": {"affinity": "rack"}},
timeout=30,
)
for _ in range(5):
requests.post(
f"{BASE}/step",
json={"action_type": "noop", "parameters": {}},
timeout=30,
)
elif task_id == "hard":
requests.post(
f"{BASE}/step",
json={"action_type": "query_nccl_logs", "parameters": {"time_window": 5}},
timeout=30,
)
for file_name in [
"model/transformer.py",
"model/attention.py",
"model/feedforward.py",
"model/embedding.py",
]:
response = requests.post(
f"{BASE}/step",
json={
"action_type": "patch_divergent_code",
"parameters": {
"file": file_name,
"fix_type": "synchronize_conditional",
},
},
timeout=30,
).json()
if response["reward"]["value"] > 0.1:
break
elif task_id == "cascade":
failing = next(
node["node_id"] for node in obs["nodes"] if node["health_status"] == "failed"
)
requests.post(
f"{BASE}/step",
json={
"action_type": "inspect_flight_recorder",
"parameters": {"rank_id": failing},
},
timeout=30,
)
requests.post(
f"{BASE}/step",
json={"action_type": "topo_reorder", "parameters": {"affinity": "rack"}},
timeout=30,
)
requests.post(
f"{BASE}/step",
json={"action_type": "query_nccl_logs", "parameters": {}},
timeout=30,
)
requests.post(
f"{BASE}/step",
json={
"action_type": "patch_divergent_code",
"parameters": {
"file": "model/transformer.py",
"fix_type": "synchronize_conditional",
},
},
timeout=30,
)
grade = requests.post(
f"{BASE}/grade",
json={"task_id": task_id},
timeout=30,
).json()
return {
"task_id": task_id,
"seed": seed,
"score": grade["score"],
"passed": grade["passed"],
"breakdown": grade["breakdown"],
}
def main() -> None:
print("NervousSystem-Env Seed Variance Report")
print("=" * 50)
results: dict[str, list[float]] = {}
for task_id in TASKS:
results[task_id] = []
for seed in SEEDS:
result = run_oracle_episode(task_id, seed)
score = float(result["score"])
results[task_id].append(score)
print(f" {task_id} seed={seed}: {score:.3f}")
print("\nVariance Summary:")
print(f"{'Task':<10} {'Mean':>6} {'Std':>6} {'Min':>6} {'Max':>6}")
print("-" * 36)
for task_id, scores in results.items():
mean = statistics.mean(scores)
std = statistics.stdev(scores) if len(scores) > 1 else 0.0
print(
f"{task_id:<10} {mean:>6.3f} {std:>6.3f} "
f"{min(scores):>6.3f} {max(scores):>6.3f}"
)
with open("seed_variance_report.json", "w", encoding="utf-8") as file:
json.dump(results, file, indent=2)
print("\nSaved to seed_variance_report.json")
if __name__ == "__main__":
main()