Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """Seed variance report for NervousSystem-Env.""" | |
| from __future__ import annotations | |
| import json | |
| import statistics | |
| import requests | |
| BASE = "http://localhost:7860" | |
| SEEDS = [42, 7, 13, 99, 256] | |
| TASKS = ["easy", "medium", "hard", "cascade"] | |
| def run_oracle_episode(task_id: str, seed: int) -> dict[str, object]: | |
| """Run a perfect oracle agent and return grade.""" | |
| obs = requests.post( | |
| f"{BASE}/reset", | |
| json={"task_id": task_id, "seed": seed}, | |
| timeout=30, | |
| ).json() | |
| if task_id == "easy": | |
| failing = next( | |
| node["node_id"] for node in obs["nodes"] if node["health_status"] == "failed" | |
| ) | |
| requests.post( | |
| f"{BASE}/step", | |
| json={ | |
| "action_type": "inspect_flight_recorder", | |
| "parameters": {"rank_id": failing}, | |
| }, | |
| timeout=30, | |
| ) | |
| elif task_id == "medium": | |
| requests.post( | |
| f"{BASE}/step", | |
| json={"action_type": "topo_reorder", "parameters": {"affinity": "rack"}}, | |
| timeout=30, | |
| ) | |
| for _ in range(5): | |
| requests.post( | |
| f"{BASE}/step", | |
| json={"action_type": "noop", "parameters": {}}, | |
| timeout=30, | |
| ) | |
| elif task_id == "hard": | |
| requests.post( | |
| f"{BASE}/step", | |
| json={"action_type": "query_nccl_logs", "parameters": {"time_window": 5}}, | |
| timeout=30, | |
| ) | |
| for file_name in [ | |
| "model/transformer.py", | |
| "model/attention.py", | |
| "model/feedforward.py", | |
| "model/embedding.py", | |
| ]: | |
| response = requests.post( | |
| f"{BASE}/step", | |
| json={ | |
| "action_type": "patch_divergent_code", | |
| "parameters": { | |
| "file": file_name, | |
| "fix_type": "synchronize_conditional", | |
| }, | |
| }, | |
| timeout=30, | |
| ).json() | |
| if response["reward"]["value"] > 0.1: | |
| break | |
| elif task_id == "cascade": | |
| failing = next( | |
| node["node_id"] for node in obs["nodes"] if node["health_status"] == "failed" | |
| ) | |
| requests.post( | |
| f"{BASE}/step", | |
| json={ | |
| "action_type": "inspect_flight_recorder", | |
| "parameters": {"rank_id": failing}, | |
| }, | |
| timeout=30, | |
| ) | |
| requests.post( | |
| f"{BASE}/step", | |
| json={"action_type": "topo_reorder", "parameters": {"affinity": "rack"}}, | |
| timeout=30, | |
| ) | |
| requests.post( | |
| f"{BASE}/step", | |
| json={"action_type": "query_nccl_logs", "parameters": {}}, | |
| timeout=30, | |
| ) | |
| requests.post( | |
| f"{BASE}/step", | |
| json={ | |
| "action_type": "patch_divergent_code", | |
| "parameters": { | |
| "file": "model/transformer.py", | |
| "fix_type": "synchronize_conditional", | |
| }, | |
| }, | |
| timeout=30, | |
| ) | |
| grade = requests.post( | |
| f"{BASE}/grade", | |
| json={"task_id": task_id}, | |
| timeout=30, | |
| ).json() | |
| return { | |
| "task_id": task_id, | |
| "seed": seed, | |
| "score": grade["score"], | |
| "passed": grade["passed"], | |
| "breakdown": grade["breakdown"], | |
| } | |
| def main() -> None: | |
| print("NervousSystem-Env Seed Variance Report") | |
| print("=" * 50) | |
| results: dict[str, list[float]] = {} | |
| for task_id in TASKS: | |
| results[task_id] = [] | |
| for seed in SEEDS: | |
| result = run_oracle_episode(task_id, seed) | |
| score = float(result["score"]) | |
| results[task_id].append(score) | |
| print(f" {task_id} seed={seed}: {score:.3f}") | |
| print("\nVariance Summary:") | |
| print(f"{'Task':<10} {'Mean':>6} {'Std':>6} {'Min':>6} {'Max':>6}") | |
| print("-" * 36) | |
| for task_id, scores in results.items(): | |
| mean = statistics.mean(scores) | |
| std = statistics.stdev(scores) if len(scores) > 1 else 0.0 | |
| print( | |
| f"{task_id:<10} {mean:>6.3f} {std:>6.3f} " | |
| f"{min(scores):>6.3f} {max(scores):>6.3f}" | |
| ) | |
| with open("seed_variance_report.json", "w", encoding="utf-8") as file: | |
| json.dump(results, file, indent=2) | |
| print("\nSaved to seed_variance_report.json") | |
| if __name__ == "__main__": | |
| main() | |