Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
| """Random baseline agent for the stellarator design environment.""" | |
| from __future__ import annotations | |
| import random | |
| import sys | |
| from fusion_lab.models import StellaratorAction | |
| from server.environment import StellaratorEnvironment | |
| PARAMETERS = [ | |
| "aspect_ratio", | |
| "elongation", | |
| "rotational_transform", | |
| "triangularity_scale", | |
| ] | |
| DIRECTIONS = ["increase", "decrease"] | |
| MAGNITUDES = ["small", "medium", "large"] | |
| def random_episode( | |
| env: StellaratorEnvironment, seed: int | None = None | |
| ) -> tuple[float, list[dict[str, object]]]: | |
| rng = random.Random(seed) | |
| obs = env.reset(seed=seed) | |
| total_reward = 0.0 | |
| trace: list[dict[str, object]] = [ | |
| { | |
| "step": 0, | |
| "score": obs.p1_score, | |
| "evaluation_fidelity": obs.evaluation_fidelity, | |
| "constraints_satisfied": obs.constraints_satisfied, | |
| } | |
| ] | |
| while not obs.done: | |
| if obs.budget_remaining <= 1: | |
| action = StellaratorAction(intent="submit") | |
| else: | |
| action = StellaratorAction( | |
| intent="run", | |
| parameter=rng.choice(PARAMETERS), | |
| direction=rng.choice(DIRECTIONS), | |
| magnitude=rng.choice(MAGNITUDES), | |
| ) | |
| obs = env.step(action) | |
| total_reward += obs.reward or 0.0 | |
| trace.append( | |
| { | |
| "step": len(trace), | |
| "action": action.intent, | |
| "score": obs.p1_score, | |
| "evaluation_fidelity": obs.evaluation_fidelity, | |
| "constraints_satisfied": obs.constraints_satisfied, | |
| "evaluation_failed": obs.evaluation_failed, | |
| "reward": obs.reward, | |
| } | |
| ) | |
| return total_reward, trace | |
| def main(n_episodes: int = 20) -> None: | |
| env = StellaratorEnvironment() | |
| rewards: list[float] = [] | |
| for i in range(n_episodes): | |
| total_reward, trace = random_episode(env, seed=i) | |
| final = trace[-1] | |
| rewards.append(total_reward) | |
| print( | |
| f"Episode {i:3d}: steps={len(trace) - 1} " | |
| f"final_score={final['score']:.6f} fidelity={final['evaluation_fidelity']} " | |
| f"constraints={'yes' if final['constraints_satisfied'] else 'no'} " | |
| f"reward={total_reward:+.4f}" | |
| ) | |
| mean_reward = sum(rewards) / len(rewards) | |
| print(f"\nRandom baseline ({n_episodes} episodes): mean_reward={mean_reward:+.4f}") | |
| if __name__ == "__main__": | |
| n = int(sys.argv[1]) if len(sys.argv) > 1 else 20 | |
| main(n) | |