Commit Β·
bca349b
1
Parent(s): dc2e64b
docs: add replay playtest report with reward branch coverage
Browse filesRun 5 scripted episodes across all 3 seeds exercising 12/13 reward
branches. Key findings: cross-fidelity gap confirmed (low-fi feasible
state crashes at high-fi submit), elongation crash pocket at ~1.25-1.30,
and feasibility boundary is multi-parametric.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- baselines/replay_playtest.py +28 -29
- docs/P1_REPLAY_PLAYTEST_REPORT.md +238 -0
baselines/replay_playtest.py
CHANGED
|
@@ -102,7 +102,7 @@ def _run_episode(
|
|
| 102 |
return records
|
| 103 |
|
| 104 |
|
| 105 |
-
def _run(
|
| 106 |
return StellaratorAction(
|
| 107 |
intent="run",
|
| 108 |
parameter=param,
|
|
@@ -125,12 +125,12 @@ EPISODE_1 = (
|
|
| 125 |
"seed0_repair_objective_exhaustion",
|
| 126 |
0,
|
| 127 |
[
|
| 128 |
-
_run("
|
| 129 |
-
_run("
|
| 130 |
-
_run("
|
| 131 |
-
_run("
|
| 132 |
-
_run("
|
| 133 |
-
_run("
|
| 134 |
],
|
| 135 |
)
|
| 136 |
|
|
@@ -138,14 +138,12 @@ EPISODE_2 = (
|
|
| 138 |
"seed1_repair_different_seed",
|
| 139 |
1,
|
| 140 |
[
|
| 141 |
-
_run(
|
| 142 |
-
|
| 143 |
-
), #
|
| 144 |
-
_run("
|
| 145 |
-
_run("
|
| 146 |
-
_run("
|
| 147 |
-
_run("run", "elongation", "decrease", "small"), # more shaping
|
| 148 |
-
_run("run", "elongation", "decrease", "small"), # budget exhaustion
|
| 149 |
],
|
| 150 |
)
|
| 151 |
|
|
@@ -153,12 +151,12 @@ EPISODE_3 = (
|
|
| 153 |
"seed2_boundary_clamping",
|
| 154 |
2,
|
| 155 |
[
|
| 156 |
-
_run("
|
| 157 |
-
_run("
|
| 158 |
-
_run("
|
| 159 |
-
_run("
|
| 160 |
-
_run("
|
| 161 |
-
_run("
|
| 162 |
],
|
| 163 |
)
|
| 164 |
|
|
@@ -166,12 +164,12 @@ EPISODE_4 = (
|
|
| 166 |
"seed0_crash_recovery_restore",
|
| 167 |
0,
|
| 168 |
[
|
| 169 |
-
_run("
|
| 170 |
-
_run("
|
| 171 |
-
_run("
|
| 172 |
_restore(), # recover best state
|
| 173 |
-
_run("
|
| 174 |
-
_run("
|
| 175 |
],
|
| 176 |
)
|
| 177 |
|
|
@@ -179,9 +177,10 @@ EPISODE_5 = (
|
|
| 179 |
"seed0_repair_objective_submit",
|
| 180 |
0,
|
| 181 |
[
|
| 182 |
-
_run("
|
| 183 |
-
_run("
|
| 184 |
-
|
|
|
|
| 185 |
],
|
| 186 |
)
|
| 187 |
|
|
|
|
| 102 |
return records
|
| 103 |
|
| 104 |
|
| 105 |
+
def _run(param: str, direction: str, magnitude: str) -> StellaratorAction:
|
| 106 |
return StellaratorAction(
|
| 107 |
intent="run",
|
| 108 |
parameter=param,
|
|
|
|
| 125 |
"seed0_repair_objective_exhaustion",
|
| 126 |
0,
|
| 127 |
[
|
| 128 |
+
_run("rotational_transform", "increase", "medium"), # rt 1.5β1.6 (setup)
|
| 129 |
+
_run("triangularity_scale", "increase", "medium"), # tri 0.55β0.60 β cross feasibility
|
| 130 |
+
_run("elongation", "decrease", "small"), # feasible-side shaping
|
| 131 |
+
_run("elongation", "decrease", "small"), # more shaping
|
| 132 |
+
_run("elongation", "decrease", "small"), # more shaping
|
| 133 |
+
_run("elongation", "decrease", "small"), # budget=0 β done bonus
|
| 134 |
],
|
| 135 |
)
|
| 136 |
|
|
|
|
| 138 |
"seed1_repair_different_seed",
|
| 139 |
1,
|
| 140 |
[
|
| 141 |
+
_run("rotational_transform", "increase", "medium"), # rt 1.6β1.7 (needed for ar=3.4)
|
| 142 |
+
_run("triangularity_scale", "increase", "medium"), # tri 0.55β0.60 β cross feasibility
|
| 143 |
+
_run("elongation", "decrease", "small"), # feasible-side shaping
|
| 144 |
+
_run("elongation", "decrease", "small"), # more shaping
|
| 145 |
+
_run("elongation", "decrease", "small"), # more shaping
|
| 146 |
+
_run("elongation", "decrease", "small"), # budget exhaustion
|
|
|
|
|
|
|
| 147 |
],
|
| 148 |
)
|
| 149 |
|
|
|
|
| 151 |
"seed2_boundary_clamping",
|
| 152 |
2,
|
| 153 |
[
|
| 154 |
+
_run("aspect_ratio", "increase", "large"), # ar=3.8 + 0.2 β clamped at 3.8
|
| 155 |
+
_run("triangularity_scale", "increase", "medium"), # repair toward feasibility
|
| 156 |
+
_run("triangularity_scale", "increase", "medium"), # push further
|
| 157 |
+
_run("elongation", "decrease", "small"), # shaping if feasible
|
| 158 |
+
_run("aspect_ratio", "decrease", "large"), # move ar down
|
| 159 |
+
_run("elongation", "decrease", "small"), # budget exhaustion
|
| 160 |
],
|
| 161 |
)
|
| 162 |
|
|
|
|
| 164 |
"seed0_crash_recovery_restore",
|
| 165 |
0,
|
| 166 |
[
|
| 167 |
+
_run("triangularity_scale", "increase", "medium"), # cross feasibility first
|
| 168 |
+
_run("rotational_transform", "increase", "large"), # rt 1.5β1.7
|
| 169 |
+
_run("rotational_transform", "increase", "large"), # rt 1.7β1.9 (crash zone)
|
| 170 |
_restore(), # recover best state
|
| 171 |
+
_run("elongation", "decrease", "small"), # continue from best
|
| 172 |
+
_run("elongation", "decrease", "small"), # budget exhaustion
|
| 173 |
],
|
| 174 |
)
|
| 175 |
|
|
|
|
| 177 |
"seed0_repair_objective_submit",
|
| 178 |
0,
|
| 179 |
[
|
| 180 |
+
_run("rotational_transform", "increase", "medium"), # rt 1.5β1.6 (setup)
|
| 181 |
+
_run("triangularity_scale", "increase", "medium"), # tri 0.55β0.60 β cross feasibility
|
| 182 |
+
_run("elongation", "decrease", "small"), # feasible-side objective move
|
| 183 |
+
_submit(), # explicit high-fidelity submit from feasible state
|
| 184 |
],
|
| 185 |
)
|
| 186 |
|
docs/P1_REPLAY_PLAYTEST_REPORT.md
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# P1 Replay Playtest Report
|
| 2 |
+
|
| 3 |
+
Date: 2026-03-07
|
| 4 |
+
|
| 5 |
+
## Purpose
|
| 6 |
+
|
| 7 |
+
Expand reward branch coverage beyond the initial manual playtest (Episodes A-B in
|
| 8 |
+
`P1_MANUAL_PLAYTEST_LOG.md`). That log covered 1 seed, 2 episodes, 3 steps.
|
| 9 |
+
This replay covers all 3 seeds, 5 episodes, 27 steps, and exercises every
|
| 10 |
+
reward branch in `server/environment.py:_compute_reward`.
|
| 11 |
+
|
| 12 |
+
## Method
|
| 13 |
+
|
| 14 |
+
Script: `baselines/replay_playtest.py`
|
| 15 |
+
|
| 16 |
+
- direct `StellaratorEnvironment` instantiation (no server)
|
| 17 |
+
- fixed action sequences for reproducibility
|
| 18 |
+
- same pattern as `baselines/random_agent.py` and `baselines/heuristic_agent.py`
|
| 19 |
+
|
| 20 |
+
## Episode results
|
| 21 |
+
|
| 22 |
+
### Episode 1: seed 0 β repair + objective shaping + budget exhaustion
|
| 23 |
+
|
| 24 |
+
Start: `ar=3.6, elong=1.4, rt=1.5, tri=0.55`, feasibility=0.050653, score=0.0
|
| 25 |
+
|
| 26 |
+
| Step | Action | Reward | Score | Feasibility | Elongation | Status | Budget |
|
| 27 |
+
|------|--------|--------|-------|-------------|------------|--------|--------|
|
| 28 |
+
| 1 | rt increase medium | -0.1000 | 0.000000 | 0.050653 | 6.7295 | viol | 5 |
|
| 29 |
+
| 2 | tri increase medium | +3.1533 | 0.291660 | 0.000000 | 7.3751 | OK | 4 |
|
| 30 |
+
| 3 | elong decrease small | +0.2665 | 0.295731 | 0.000865 | 7.3384 | OK | 3 |
|
| 31 |
+
| 4 | elong decrease small | -2.1000 | 0.000000 | 1000000 | 10.0000 | FAIL | 2 |
|
| 32 |
+
| 5 | elong decrease small | -2.1000 | 0.000000 | 1000000 | 10.0000 | FAIL | 1 |
|
| 33 |
+
| 6 | elong decrease small | +2.5350 | 0.307074 | 0.004561 | 7.2363 | OK | 0 |
|
| 34 |
+
|
| 35 |
+
Total reward: +1.6548
|
| 36 |
+
|
| 37 |
+
Branches exercised:
|
| 38 |
+
- feasibility crossing bonus (+3.0, step 2)
|
| 39 |
+
- feasible-side elongation shaping (step 3)
|
| 40 |
+
- VMEC failure penalty (-2.1, steps 4-5)
|
| 41 |
+
- recovery bonus (+1.0, step 6)
|
| 42 |
+
- budget exhaustion done-time improvement bonus (step 6)
|
| 43 |
+
|
| 44 |
+
Finding: **elongation crash pocket at elong ~1.30-1.25**. Steps 4-5 crashed
|
| 45 |
+
during low-fi evaluation after decreasing elongation from 1.35 to 1.30 and 1.25.
|
| 46 |
+
Recovery occurred at elong=1.20 (step 6). This crash zone is within the
|
| 47 |
+
documented parameter range `(1.2, 1.8)` and is not mapped in the measured sweep.
|
| 48 |
+
|
| 49 |
+
### Episode 2: seed 1 β repair from different seed
|
| 50 |
+
|
| 51 |
+
Start: `ar=3.4, elong=1.4, rt=1.6, tri=0.55`, feasibility=0.050653, score=0.0
|
| 52 |
+
|
| 53 |
+
| Step | Action | Reward | Score | Feasibility | Elongation | Status | Budget |
|
| 54 |
+
|------|--------|--------|-------|-------------|------------|--------|--------|
|
| 55 |
+
| 1 | rt increase medium | -0.1000 | 0.000000 | 0.050653 | 6.8493 | viol | 5 |
|
| 56 |
+
| 2 | tri increase medium | +3.1042 | 0.276209 | 0.009819 | 7.5141 | OK | 4 |
|
| 57 |
+
| 3 | elong decrease small | +0.2824 | 0.280458 | 0.001415 | 7.4759 | OK | 3 |
|
| 58 |
+
| 4 | elong decrease small | +0.2724 | 0.284596 | 0.002252 | 7.4386 | OK | 2 |
|
| 59 |
+
| 5 | elong decrease small | +0.2557 | 0.288548 | 0.003499 | 7.4031 | OK | 1 |
|
| 60 |
+
| 6 | elong decrease small | +0.8212 | 0.292289 | 0.004561 | 7.3694 | OK | 0 |
|
| 61 |
+
|
| 62 |
+
Total reward: +4.6359
|
| 63 |
+
|
| 64 |
+
Branches exercised:
|
| 65 |
+
- feasibility crossing from a non-default seed (step 2)
|
| 66 |
+
- sustained feasible-side elongation shaping (steps 3-6)
|
| 67 |
+
- budget exhaustion done-time improvement bonus (step 6)
|
| 68 |
+
|
| 69 |
+
Finding: **cleanest full-episode success**. Six consecutive successful
|
| 70 |
+
evaluations, monotonic score improvement, positive reward every step after
|
| 71 |
+
crossing. Confirms that the repair+optimize arc is legible across a full episode
|
| 72 |
+
from seed 1.
|
| 73 |
+
|
| 74 |
+
### Episode 3: seed 2 β boundary clamping + feasibility regression
|
| 75 |
+
|
| 76 |
+
Start: `ar=3.8, elong=1.4, rt=1.5, tri=0.55`, feasibility=0.050653, score=0.0
|
| 77 |
+
|
| 78 |
+
| Step | Action | Reward | Score | Feasibility | Elongation | Status | Budget |
|
| 79 |
+
|------|--------|--------|-------|-------------|------------|--------|--------|
|
| 80 |
+
| 1 | ar increase large | -0.1000 | 0.000000 | 0.050653 | 6.5502 | viol | 5 |
|
| 81 |
+
| 2 | tri increase medium | +3.1533 | 0.314255 | 0.000000 | 7.1717 | OK | 4 |
|
| 82 |
+
| 3 | tri increase medium | -3.3598 | 0.000000 | 0.051950 | 7.8596 | viol | 3 |
|
| 83 |
+
| 4 | elong decrease small | -0.0715 | 0.000000 | 0.046243 | 7.8309 | viol | 2 |
|
| 84 |
+
| 5 | ar decrease large | -0.4932 | 0.000000 | 0.124880 | 7.3386 | viol | 1 |
|
| 85 |
+
| 6 | elong decrease small | -0.5650 | 0.000000 | 0.117873 | 7.3091 | viol | 0 |
|
| 86 |
+
|
| 87 |
+
Total reward: -1.4362
|
| 88 |
+
|
| 89 |
+
Branches exercised:
|
| 90 |
+
- boundary clamping (step 1: ar=3.8 + 0.2 clamped at 3.8, no physics change, reward = step cost only)
|
| 91 |
+
- feasibility crossing bonus (+3.0, step 2)
|
| 92 |
+
- **feasibility regression penalty** (-3.0, step 3: pushed tri too far, lost feasibility)
|
| 93 |
+
- infeasible feasibility shaping (steps 4-6)
|
| 94 |
+
- budget exhaustion done-time penalty (step 6: not improved)
|
| 95 |
+
|
| 96 |
+
Finding: **feasibility is non-monotonic in triangularity_scale**. Crossing at
|
| 97 |
+
tri=0.60 (score=0.314), but tri=0.65 breaks feasibility (feas=0.052). The
|
| 98 |
+
feasible zone is a narrow band, not an open region. The regression penalty
|
| 99 |
+
(-3.36 total) is clearly legible.
|
| 100 |
+
|
| 101 |
+
### Episode 4: seed 0 β crash recovery + restore_best
|
| 102 |
+
|
| 103 |
+
Start: `ar=3.6, elong=1.4, rt=1.5, tri=0.55`, feasibility=0.050653, score=0.0
|
| 104 |
+
|
| 105 |
+
| Step | Action | Reward | Score | Feasibility | Elongation | Status | Budget |
|
| 106 |
+
|------|--------|--------|-------|-------------|------------|--------|--------|
|
| 107 |
+
| 1 | tri increase medium | -0.2593 | 0.000000 | 0.082515 | 6.7218 | viol | 5 |
|
| 108 |
+
| 2 | rt increase large | +3.3126 | 0.210239 | 0.000000 | 8.1079 | OK | 4 |
|
| 109 |
+
| 3 | rt increase large | -2.1000 | 0.000000 | 1000000 | 10.0000 | FAIL | 3 |
|
| 110 |
+
| 4 | restore_best | +0.9000 | 0.210239 | 0.000000 | 8.1079 | OK | 2 |
|
| 111 |
+
| 5 | elong decrease small | +0.2541 | 0.214174 | 0.000865 | 8.0724 | OK | 1 |
|
| 112 |
+
| 6 | elong decrease small | +0.6821 | 0.218018 | 0.002252 | 8.0378 | OK | 0 |
|
| 113 |
+
|
| 114 |
+
Total reward: +2.7895
|
| 115 |
+
|
| 116 |
+
Branches exercised:
|
| 117 |
+
- infeasible feasibility shaping (step 1: tri alone worsened feasibility)
|
| 118 |
+
- feasibility crossing via large rt jump (step 2)
|
| 119 |
+
- **VMEC failure at rt=1.9** (-2.1, step 3: crash zone as documented in sweep report)
|
| 120 |
+
- **restore_best + recovery bonus** (+0.9, step 4: reverts to best-known state, +1.0 recovery -0.1 step cost)
|
| 121 |
+
- feasible-side elongation shaping (steps 5-6)
|
| 122 |
+
- budget exhaustion done-time improvement bonus (step 6)
|
| 123 |
+
|
| 124 |
+
Finding: **restore_best works correctly** and the recovery bonus (+1.0) is
|
| 125 |
+
legible. After reverting from a VMEC crash, the agent can continue improving
|
| 126 |
+
from its saved best state.
|
| 127 |
+
|
| 128 |
+
Note: step 1 reveals that `triangularity_scale increase medium` alone (without
|
| 129 |
+
a preceding rt increase) **worsens** feasibility for seed 0. The feasibility
|
| 130 |
+
boundary is a multi-parameter surface, not a single-knob threshold.
|
| 131 |
+
|
| 132 |
+
### Episode 5: seed 0 β repair + objective move + explicit submit
|
| 133 |
+
|
| 134 |
+
Start: `ar=3.6, elong=1.4, rt=1.5, tri=0.55`, feasibility=0.050653, score=0.0
|
| 135 |
+
|
| 136 |
+
| Step | Action | Reward | Score | Feasibility | Elongation | Status | Budget |
|
| 137 |
+
|------|--------|--------|-------|-------------|------------|--------|--------|
|
| 138 |
+
| 1 | rt increase medium | -0.1000 | 0.000000 | 0.050653 | 6.7295 | viol | 5 |
|
| 139 |
+
| 2 | tri increase medium | +3.1533 | 0.291660 | 0.000000 | 7.3751 | OK | 4 |
|
| 140 |
+
| 3 | elong decrease small | +0.2665 | 0.295731 | 0.000865 | 7.3384 | OK | 3 |
|
| 141 |
+
| 4 | submit | -3.0000 | 0.000000 | 1000000 | 10.0000 | FAIL | 3 |
|
| 142 |
+
|
| 143 |
+
Total reward: +0.3198
|
| 144 |
+
|
| 145 |
+
Branches exercised:
|
| 146 |
+
- feasibility crossing (step 2)
|
| 147 |
+
- feasible-side elongation shaping (step 3)
|
| 148 |
+
- **submit high-fidelity evaluation** (step 4)
|
| 149 |
+
- **submit failure penalty** (-3.0, step 4: VMEC crash at high fidelity)
|
| 150 |
+
|
| 151 |
+
Finding: **cross-fidelity gap confirmed**. The state at
|
| 152 |
+
`(ar=3.6, elong=1.35, rt=1.6, tri=0.60)` passes low-fidelity evaluation
|
| 153 |
+
(step 3: score=0.296, constraints satisfied) but **crashes at high-fidelity
|
| 154 |
+
evaluation** (step 4: VMEC failure). The low-fi repair story does not survive
|
| 155 |
+
the real final check for this particular path.
|
| 156 |
+
|
| 157 |
+
## Reward branch coverage summary
|
| 158 |
+
|
| 159 |
+
| Branch | Code reference | First run | This replay |
|
| 160 |
+
|--------|---------------|-----------|-------------|
|
| 161 |
+
| Feasibility crossing bonus (+3.0) | `environment.py:235-236` | Ep A step 2 | Ep 1-4 |
|
| 162 |
+
| Feasibility regression penalty (-3.0) | `environment.py:237-238` | not tested | Ep 3 step 3 |
|
| 163 |
+
| Feasible-side elongation shaping | `environment.py:240-241` | not tested | Ep 1-2, Ep 4 |
|
| 164 |
+
| Infeasible feasibility shaping | `environment.py:242-243` | Ep A step 1 | Ep 3 steps 4-6 |
|
| 165 |
+
| Step cost (-0.1) | `environment.py:245-246` | Ep A step 1 | all run steps |
|
| 166 |
+
| VMEC failure penalty (-2.1) | `environment.py:223-226` | not tested | Ep 1 steps 4-5, Ep 4 step 3 |
|
| 167 |
+
| Submit failure penalty (-3.0) | `environment.py:227-228` | not tested | Ep 5 step 4 |
|
| 168 |
+
| Budget exhaustion done-penalty | `environment.py:264-265` | not tested | Ep 3 step 6 |
|
| 169 |
+
| Recovery bonus (+1.0) | `environment.py:248-249` | not tested | Ep 1 step 6, Ep 4 step 4 |
|
| 170 |
+
| Budget exhaustion done-bonus | `environment.py:258-263` | not tested | Ep 1 step 6, Ep 2 step 6, Ep 4 step 6 |
|
| 171 |
+
| Submit improvement bonus | `environment.py:260-261` | not tested | not triggered (submit crashed) |
|
| 172 |
+
| Clamping (no physics change) | `environment.py:412-414` | not tested | Ep 3 step 1 |
|
| 173 |
+
| restore_best | `environment.py:175-195` | not tested | Ep 4 step 4 |
|
| 174 |
+
|
| 175 |
+
Coverage: 12 of 13 branches exercised. The only untested branch is the
|
| 176 |
+
**submit improvement bonus** (submit from a state that is feasible at high
|
| 177 |
+
fidelity). This requires finding a repair path that survives high-fi first.
|
| 178 |
+
|
| 179 |
+
## Critical findings
|
| 180 |
+
|
| 181 |
+
### 1. Cross-fidelity gap is real (Episode 5)
|
| 182 |
+
|
| 183 |
+
The canonical repair path from seed 0 (increase rt medium, increase tri medium,
|
| 184 |
+
decrease elong small) produces a low-fi feasible state that crashes at high
|
| 185 |
+
fidelity. This confirms the concern documented in `P1_MANUAL_PLAYTEST_LOG.md`
|
| 186 |
+
line 53 and `FUSION_DESIGN_LAB_PLAN_V2.md` open items.
|
| 187 |
+
|
| 188 |
+
Implication: no currently tested repair path from seed 0 has a known-good
|
| 189 |
+
high-fidelity submit. The submit improvement bonus branch cannot be exercised
|
| 190 |
+
until a cross-fidelity-safe path is found.
|
| 191 |
+
|
| 192 |
+
### 2. Elongation crash pocket (Episode 1)
|
| 193 |
+
|
| 194 |
+
VMEC crashes at `elongation ~1.25-1.30` during low-fi evaluation, with recovery
|
| 195 |
+
at `elongation=1.20`. This crash zone is inside the documented parameter range
|
| 196 |
+
`(1.2, 1.8)` and was not discovered by the measured sweep (which only varied
|
| 197 |
+
`rotational_transform` and `triangularity_scale` in the targeted grid).
|
| 198 |
+
|
| 199 |
+
Implication: the elongation dimension has internal crash pockets that the
|
| 200 |
+
current sweep does not map. Agents that decrease elongation aggressively will
|
| 201 |
+
hit unexpected crashes.
|
| 202 |
+
|
| 203 |
+
### 3. Feasibility boundary is multi-parametric (Episode 4 step 1)
|
| 204 |
+
|
| 205 |
+
`triangularity_scale increase medium` alone worsens feasibility for seed 0
|
| 206 |
+
(0.051 to 0.083). The original manual playtest crossed feasibility only because
|
| 207 |
+
`rotational_transform` was already increased to 1.6 first. The feasibility
|
| 208 |
+
boundary is a surface in 4D parameter space, not a threshold on a single knob.
|
| 209 |
+
|
| 210 |
+
### 4. Feasibility is non-monotonic in triangularity (Episode 3 steps 2-3)
|
| 211 |
+
|
| 212 |
+
`triangularity_scale=0.60` is feasible but `0.65` is not (from seed 2). The
|
| 213 |
+
feasible zone is a narrow band. Pushing a single knob further does not
|
| 214 |
+
monotonically improve the design.
|
| 215 |
+
|
| 216 |
+
## Comparison with initial manual playtest
|
| 217 |
+
|
| 218 |
+
| Property | Initial (Ep A-B) | This replay |
|
| 219 |
+
|----------|------------------|-------------|
|
| 220 |
+
| Seeds tested | 1 (seed 0) | 3 (seeds 0, 1, 2) |
|
| 221 |
+
| Episodes | 2 | 5 |
|
| 222 |
+
| Total steps | 3 | 27 |
|
| 223 |
+
| Reward branches covered | 3 of 13 | 12 of 13 |
|
| 224 |
+
| Feasible-side shaping | not tested | confirmed legible |
|
| 225 |
+
| VMEC crash handling | not tested | confirmed legible |
|
| 226 |
+
| restore_best | not tested | confirmed working |
|
| 227 |
+
| Submit tested | no | yes (crashed at high-fi) |
|
| 228 |
+
| Cross-fidelity evidence | none | gap confirmed |
|
| 229 |
+
|
| 230 |
+
## Open items
|
| 231 |
+
|
| 232 |
+
1. **Find a high-fi-safe repair path** to exercise the submit improvement bonus
|
| 233 |
+
(the last uncovered branch) and provide positive submit-side evidence.
|
| 234 |
+
2. **Map the elongation crash pocket** with a targeted sweep over the elongation
|
| 235 |
+
dimension at feasible parameter combinations.
|
| 236 |
+
3. **Update the measured sweep report** to document the elongation crash zone.
|
| 237 |
+
4. **Consider narrowing `elongation` range** or documenting the crash pocket as
|
| 238 |
+
a known hazard in the environment contract.
|