CreativeEngineer commited on
Commit
1ff8c37
·
1 Parent(s): ba716cf

fix: align baselines with current episode contract

Browse files
baselines/heuristic_agent.py CHANGED
@@ -3,8 +3,7 @@
3
  Strategy: guided perturbations informed by domain knowledge.
4
  1. Push elongation upward to improve triangularity.
5
  2. Nudge rotational transform upward to stay on the iota side of feasibility.
6
- 3. Use restore_best to recover from any worsening.
7
- 4. Submit before exhausting budget.
8
  """
9
 
10
  from __future__ import annotations
@@ -29,10 +28,6 @@ def heuristic_episode(
29
  obs = env.reset(seed=seed)
30
  total_reward = 0.0
31
  trace: list[dict[str, object]] = [{"step": 0, "score": obs.p1_score}]
32
- prev_best = (
33
- int(obs.best_feasibility <= 0.01),
34
- obs.best_score if obs.best_feasibility <= 0.01 else -obs.best_feasibility,
35
- )
36
 
37
  for parameter, direction, magnitude in STRATEGY:
38
  if obs.done or obs.budget_remaining <= 1:
@@ -56,27 +51,6 @@ def heuristic_episode(
56
  }
57
  )
58
 
59
- current_best = (
60
- int(obs.best_feasibility <= 0.01),
61
- obs.best_score if obs.best_feasibility <= 0.01 else -obs.best_feasibility,
62
- )
63
-
64
- if current_best < prev_best and obs.budget_remaining > 1:
65
- restore = StellaratorAction(intent="restore_best")
66
- obs = env.step(restore)
67
- total_reward += obs.reward or 0.0
68
- trace.append(
69
- {
70
- "step": len(trace),
71
- "action": "restore_best",
72
- "score": obs.p1_score,
73
- "best_score": obs.best_score,
74
- "reward": obs.reward,
75
- }
76
- )
77
-
78
- prev_best = current_best
79
-
80
  if not obs.done:
81
  submit = StellaratorAction(intent="submit")
82
  obs = env.step(submit)
 
3
  Strategy: guided perturbations informed by domain knowledge.
4
  1. Push elongation upward to improve triangularity.
5
  2. Nudge rotational transform upward to stay on the iota side of feasibility.
6
+ 3. Submit before exhausting budget.
 
7
  """
8
 
9
  from __future__ import annotations
 
28
  obs = env.reset(seed=seed)
29
  total_reward = 0.0
30
  trace: list[dict[str, object]] = [{"step": 0, "score": obs.p1_score}]
 
 
 
 
31
 
32
  for parameter, direction, magnitude in STRATEGY:
33
  if obs.done or obs.budget_remaining <= 1:
 
51
  }
52
  )
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  if not obs.done:
55
  submit = StellaratorAction(intent="submit")
56
  obs = env.step(submit)
baselines/random_agent.py CHANGED
@@ -22,15 +22,12 @@ def random_episode(
22
  trace: list[dict[str, object]] = [{"step": 0, "score": obs.p1_score}]
23
 
24
  while not obs.done:
25
- if obs.budget_remaining <= 0:
26
- action = StellaratorAction(intent="submit")
27
- else:
28
- action = StellaratorAction(
29
- intent="run",
30
- parameter=rng.choice(PARAMETERS),
31
- direction=rng.choice(DIRECTIONS),
32
- magnitude=rng.choice(MAGNITUDES),
33
- )
34
  obs = env.step(action)
35
  total_reward += obs.reward or 0.0
36
  trace.append(
 
22
  trace: list[dict[str, object]] = [{"step": 0, "score": obs.p1_score}]
23
 
24
  while not obs.done:
25
+ action = StellaratorAction(
26
+ intent="run",
27
+ parameter=rng.choice(PARAMETERS),
28
+ direction=rng.choice(DIRECTIONS),
29
+ magnitude=rng.choice(MAGNITUDES),
30
+ )
 
 
 
31
  obs = env.step(action)
32
  total_reward += obs.reward or 0.0
33
  trace.append(