Spaces:

CreativeEngineer
/

fusion-design-lab

Paused

App Files Files Community

CreativeEngineer commited on about 1 month ago

Commit

1ff8c37

1 Parent(s): ba716cf

fix: align baselines with current episode contract

Browse files

Files changed (2) hide show

baselines/heuristic_agent.py +1 -27
baselines/random_agent.py +6 -9

baselines/heuristic_agent.py CHANGED Viewed

@@ -3,8 +3,7 @@
 Strategy: guided perturbations informed by domain knowledge.
 1. Push elongation upward to improve triangularity.
 2. Nudge rotational transform upward to stay on the iota side of feasibility.
-3. Use restore_best to recover from any worsening.
-4. Submit before exhausting budget.
 """
 from __future__ import annotations
@@ -29,10 +28,6 @@ def heuristic_episode(
     obs = env.reset(seed=seed)
     total_reward = 0.0
     trace: list[dict[str, object]] = [{"step": 0, "score": obs.p1_score}]
-    prev_best = (
-        int(obs.best_feasibility <= 0.01),
-        obs.best_score if obs.best_feasibility <= 0.01 else -obs.best_feasibility,
-    )
     for parameter, direction, magnitude in STRATEGY:
         if obs.done or obs.budget_remaining <= 1:
@@ -56,27 +51,6 @@ def heuristic_episode(
             }
         )
-        current_best = (
-            int(obs.best_feasibility <= 0.01),
-            obs.best_score if obs.best_feasibility <= 0.01 else -obs.best_feasibility,
-        )
-        if current_best < prev_best and obs.budget_remaining > 1:
-            restore = StellaratorAction(intent="restore_best")
-            obs = env.step(restore)
-            total_reward += obs.reward or 0.0
-            trace.append(
-                {
-                    "step": len(trace),
-                    "action": "restore_best",
-                    "score": obs.p1_score,
-                    "best_score": obs.best_score,
-                    "reward": obs.reward,
-                }
-            )
-        prev_best = current_best
     if not obs.done:
         submit = StellaratorAction(intent="submit")
         obs = env.step(submit)

 Strategy: guided perturbations informed by domain knowledge.
 1. Push elongation upward to improve triangularity.
 2. Nudge rotational transform upward to stay on the iota side of feasibility.
+3. Submit before exhausting budget.
 """
 from __future__ import annotations
     obs = env.reset(seed=seed)
     total_reward = 0.0
     trace: list[dict[str, object]] = [{"step": 0, "score": obs.p1_score}]
     for parameter, direction, magnitude in STRATEGY:
         if obs.done or obs.budget_remaining <= 1:
             }
         )
     if not obs.done:
         submit = StellaratorAction(intent="submit")
         obs = env.step(submit)

baselines/random_agent.py CHANGED Viewed

@@ -22,15 +22,12 @@ def random_episode(
     trace: list[dict[str, object]] = [{"step": 0, "score": obs.p1_score}]
     while not obs.done:
-        if obs.budget_remaining <= 0:
-            action = StellaratorAction(intent="submit")
-        else:
-            action = StellaratorAction(
-                intent="run",
-                parameter=rng.choice(PARAMETERS),
-                direction=rng.choice(DIRECTIONS),
-                magnitude=rng.choice(MAGNITUDES),
-            )
         obs = env.step(action)
         total_reward += obs.reward or 0.0
         trace.append(

     trace: list[dict[str, object]] = [{"step": 0, "score": obs.p1_score}]
     while not obs.done:
+        action = StellaratorAction(
+            intent="run",
+            parameter=rng.choice(PARAMETERS),
+            direction=rng.choice(DIRECTIONS),
+            magnitude=rng.choice(MAGNITUDES),
+        )
         obs = env.step(action)
         total_reward += obs.reward or 0.0
         trace.append(