File size: 15,317 Bytes
1a7c2bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
470cdf2
 
1a7c2bd
 
470cdf2
1a7c2bd
 
 
470cdf2
1a7c2bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
470cdf2
 
1a7c2bd
470cdf2
 
 
1a7c2bd
 
 
470cdf2
 
 
 
 
1a7c2bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
"""Extended tests for tasks, rubrics, trl_bridge, and alternate kinetics.



Target: push total coverage from ~79% to >90%.

"""

import sys
import os
import json

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))

from methanol_apc_env.server.reactor_sim import (
    ReactorState, simulate_step,
    _graaf_kinetics, _vbf_kinetics, _seyfert_kinetics, _nestler_kinetics,
)
from methanol_apc_env.server.tasks import (
    TASKS, GRADERS, _clamp_score,
    grade_startup, grade_optimization, grade_disturbance, grade_long_horizon,
    grade_emergency_recovery, grade_feed_upset, grade_cost_minimization,
    grade_pressure_loss, grade_day_night, grade_aged_catalyst,
    grade_multi_disturbance, grade_max_yield,
    compute_step_reward, TaskConfig,
)
from methanol_apc_env.server.methanol_environment import MethanolAPCEnvironment
from methanol_apc_env.models import MethanolAPCAction, MethanolAPCObservation


# ---- Helper: run N steps and get trajectory ----

def _run_episode(task_name, steps=None, feed_h2=5.0, feed_co=2.5, cooling=40.0, compressor=65.0):
    env = MethanolAPCEnvironment()
    obs = env.reset(task_name=task_name, seed=42)
    cfg = TASKS[task_name]
    n = steps or min(cfg.max_steps, 30)
    for _ in range(n):
        obs = env.step(MethanolAPCAction(
            feed_rate_h2=feed_h2, feed_rate_co=feed_co,
            cooling_water_flow=cooling, compressor_power=compressor,
        ))
        if obs.done:
            break
    return env


# ============================================================
# 1. TASK GRADER TESTS — cover all 12 graders
# ============================================================

class TestAllGraders:

    def test_all_12_graders_registered(self):
        assert len(GRADERS) == 12
        for name in TASKS:
            assert name in GRADERS, f"Missing grader for task '{name}'"

    def test_clamp_score_boundaries(self):
        assert _clamp_score(0.0) > 0.0
        assert _clamp_score(1.0) < 1.0
        assert 0.45 < _clamp_score(0.5) < 0.55
        assert _clamp_score(0.0) < _clamp_score(0.5) < _clamp_score(1.0)

    def test_clamp_score_monotonic(self):
        scores = [_clamp_score(x / 10.0) for x in range(11)]
        for i in range(len(scores) - 1):
            assert scores[i] < scores[i + 1]

    def test_grade_startup_empty(self):
        assert grade_startup([]) == 0.0

    def test_grade_optimization_empty(self):
        assert grade_optimization([]) == 0.0

    def test_grade_disturbance_empty(self):
        assert grade_disturbance([]) == 0.0

    def test_grade_long_horizon_empty(self):
        assert grade_long_horizon([]) == 0.0

    def test_grade_emergency_recovery_empty(self):
        assert grade_emergency_recovery([]) == 0.0

    def test_grade_feed_upset_empty(self):
        assert grade_feed_upset([]) == 0.0

    def test_grade_cost_minimization_empty(self):
        assert grade_cost_minimization([]) == 0.0

    def test_grade_day_night_empty(self):
        assert grade_day_night([]) == 0.0

    def test_grade_aged_catalyst_empty(self):
        assert grade_aged_catalyst([]) == 0.0

    def test_grade_multi_disturbance_empty(self):
        assert grade_multi_disturbance([]) == 0.0

    def test_grade_max_yield_empty(self):
        assert grade_max_yield([]) == 0.0

    def test_grade_startup_with_shutdown(self):
        state = ReactorState()
        state.emergency_shutdown = True
        state.temperature = 310.0
        assert grade_startup([state]) == 0.0

    def test_grade_optimization_with_shutdown(self):
        state = ReactorState()
        state.emergency_shutdown = True
        state.cumulative_profit = 5.0
        score = grade_optimization([state])
        assert 0.0 <= score <= 0.5

    def test_grade_startup_reached_target(self):
        states = []
        for i in range(50):
            s = ReactorState()
            s.temperature = 150.0 + i * 2.0  # ramp to 248
            s.time_step = i
            states.append(s)
        score = grade_startup(states)
        assert score > 0.3

    def test_grade_startup_overshoot(self):
        states = []
        for i in range(50):
            s = ReactorState()
            s.temperature = 150.0 + i * 4.0  # overshoot to 346
            if s.temperature > 300:
                s.temperature = 275.0
            s.time_step = i
            states.append(s)
        score = grade_startup(states)
        assert score >= 0.0

    def test_grade_emergency_recovery_success(self):
        states = []
        for i in range(80):
            s = ReactorState()
            s.temperature = 290.0 - i * 0.5  # cooldown
            s.methanol_produced = i * 3.0
            s.time_step = i
            states.append(s)
        score = grade_emergency_recovery(states)
        assert score > 0.3

    def test_grade_emergency_recovery_shutdown(self):
        s = ReactorState()
        s.emergency_shutdown = True
        assert grade_emergency_recovery([s]) == 0.0

    def test_grade_day_night_stable(self):
        states = []
        for i in range(150):
            s = ReactorState()
            s.temperature = 250.0 + (1.0 if i % 2 == 0 else -1.0)
            s.methanol_produced = i * 5.0
            s.time_step = i
            states.append(s)
        score = grade_day_night(states)
        assert score > 0.3

    def test_grade_day_night_shutdown(self):
        s = ReactorState()
        s.emergency_shutdown = True
        assert grade_day_night([s]) == 0.1

    def test_grade_aged_catalyst_production(self):
        states = []
        for i in range(100):
            s = ReactorState()
            s.temperature = 250.0
            s.methanol_produced = i * 3.0
            s.catalyst_health = 0.35
            s.time_step = i
            states.append(s)
        score = grade_aged_catalyst(states)
        assert score > 0.3

    def test_grade_aged_catalyst_shutdown(self):
        s = ReactorState()
        s.emergency_shutdown = True
        assert grade_aged_catalyst([s]) == 0.1

    def test_grade_multi_disturbance_survival(self):
        states = []
        for i in range(150):
            s = ReactorState()
            s.temperature = 250.0
            s.methanol_produced = i * 3.0
            s.time_step = i
            states.append(s)
        score = grade_multi_disturbance(states)
        assert score > 0.4

    def test_grade_max_yield_high(self):
        states = []
        s = ReactorState()
        s.methanol_produced = 800.0
        s.time_step = 200
        states.append(s)
        score = grade_max_yield(states)
        assert score > 0.5

    def test_grade_max_yield_with_shutdown(self):
        s = ReactorState()
        s.methanol_produced = 500.0
        s.emergency_shutdown = True
        score = grade_max_yield([s])
        assert score < 0.2

    def test_grade_cost_minimization_profitable(self):
        # Realistic profitable trajectory: $0.55/kg (real controllers achieve
        # 0.55-0.57). Threshold $0.80 means score = 0.55/0.80 ~ 0.69.
        states = []
        s = ReactorState()
        s.cumulative_profit = 55.0
        s.methanol_produced = 100.0
        states.append(s)
        score = grade_cost_minimization(states)
        assert score >= 0.5

    def test_grade_cost_minimization_shutdown(self):
        s = ReactorState()
        s.emergency_shutdown = True
        s.cumulative_profit = 5.0
        s.methanol_produced = 5.0
        assert grade_cost_minimization([s]) == 0.1

    def test_grade_feed_upset_shutdown(self):
        s = ReactorState()
        s.emergency_shutdown = True
        assert grade_feed_upset([s]) == 0.1


# ============================================================
# 2. GRADERS VIA ENVIRONMENT (integration)
# ============================================================

class TestGradersViaEnvironment:

    def test_all_tasks_produce_valid_scores(self):
        for task_name in TASKS:
            env = _run_episode(task_name, steps=15)
            score = env.get_final_score()
            assert 0.01 <= score <= 0.99, f"Task '{task_name}' score out of range: {score}"


# ============================================================
# 3. STEP REWARD TESTS
# ============================================================

class TestStepReward:

    def test_step_reward_normal(self):
        cfg = TASKS["optimization"]
        prev = ReactorState()
        prev.temperature = 250.0
        prev.catalyst_health = 1.0
        curr = ReactorState()
        curr.temperature = 252.0
        curr.catalyst_health = 0.99
        curr.profit_this_step = 0.3
        reward = compute_step_reward(prev, curr, cfg)
        assert 0.01 <= reward <= 0.99

    def test_step_reward_shutdown(self):
        cfg = TASKS["optimization"]
        prev = ReactorState()
        curr = ReactorState()
        curr.emergency_shutdown = True
        reward = compute_step_reward(prev, curr, cfg)
        assert reward < 0.15  # should be low

    def test_step_reward_high_temp_penalty(self):
        cfg = TASKS["optimization"]
        prev = ReactorState()
        prev.temperature = 250.0
        curr = ReactorState()
        curr.temperature = 285.0
        curr.profit_this_step = 0.2
        curr.catalyst_health = 0.9
        reward = compute_step_reward(prev, curr, cfg)
        # Should be lower than comfortable temperature
        curr2 = ReactorState()
        curr2.temperature = 250.0
        curr2.profit_this_step = 0.2
        curr2.catalyst_health = 0.9
        reward2 = compute_step_reward(prev, curr2, cfg)
        assert reward < reward2

    def test_step_reward_startup_progress(self):
        cfg = TASKS["startup"]
        prev = ReactorState()
        prev.temperature = 200.0
        curr = ReactorState()
        curr.temperature = 210.0
        curr.catalyst_health = 1.0
        curr.profit_this_step = 0.0
        reward = compute_step_reward(prev, curr, cfg)
        assert reward > 0.01


# ============================================================
# 4. TRL BRIDGE TESTS
# ============================================================

class TestTRLBridge:

    def test_reward_function_valid_action(self):
        from methanol_apc_env.trl_bridge import MethanolRewardFunction
        rf = MethanolRewardFunction(task="optimization", seed=42)
        action_json = json.dumps({
            "feed_rate_h2": 5.0, "feed_rate_co": 2.5,
            "cooling_water_flow": 40.0, "compressor_power": 65.0,
        })
        rewards = rf([action_json])
        assert len(rewards) == 1
        assert 0.01 <= rewards[0] <= 0.99

    def test_reward_function_invalid_action(self):
        from methanol_apc_env.trl_bridge import MethanolRewardFunction
        rf = MethanolRewardFunction(task="optimization", seed=42)
        rewards = rf(["not valid json at all"])
        assert len(rewards) == 1
        assert rewards[0] == 0.01

    def test_reward_function_batch(self):
        from methanol_apc_env.trl_bridge import MethanolRewardFunction
        rf = MethanolRewardFunction(task="optimization", seed=42)
        actions = [
            json.dumps({"feed_rate_h2": 5.0, "feed_rate_co": 2.5,
                        "cooling_water_flow": 40.0, "compressor_power": 65.0}),
            json.dumps({"feed_rate_h2": 3.0, "feed_rate_co": 1.5,
                        "cooling_water_flow": 60.0, "compressor_power": 50.0}),
        ]
        rewards = rf(actions)
        assert len(rewards) == 2
        for r in rewards:
            assert 0.01 <= r <= 0.99

    def test_grpo_config(self):
        from methanol_apc_env.trl_bridge import MethanolGRPOConfig
        config = MethanolGRPOConfig.base_kwargs()
        # Validate that returned kwargs are valid TRL GRPOConfig parameters.
        assert "learning_rate" in config
        assert config["num_generations"] > 0  # GRPO group size (real TRL key)
        assert config["beta"] >= 0            # KL penalty coefficient
        assert "max_grad_norm" in config

    def test_unsloth_config(self):
        from methanol_apc_env.trl_bridge import MethanolGRPOConfig
        model_kw = MethanolGRPOConfig.unsloth_model_kwargs()
        lora_kw = MethanolGRPOConfig.unsloth_lora_kwargs()
        assert model_kw["load_in_4bit"] is True
        assert lora_kw["r"] > 0
        assert lora_kw["lora_alpha"] > 0


# ============================================================
# 5. ALTERNATE KINETIC MODELS
# ============================================================

class TestAlternateKinetics:

    def _test_kinetics_fn(self, fn):
        T_K = 523.15  # 250C
        r1, r2, r3 = fn(T_K, 10.0, 40.0, 2.0, 0.1, 0.1, 1.0, 0.7)
        assert r1 >= 0
        assert r2 >= 0
        assert r3 >= 0
        # At least one reaction should produce something
        assert r1 + r2 > 0

    def test_graaf_model(self):
        self._test_kinetics_fn(_graaf_kinetics)

    def test_vbf_model(self):
        self._test_kinetics_fn(_vbf_kinetics)

    def test_seyfert_model(self):
        self._test_kinetics_fn(_seyfert_kinetics)

    def test_nestler_model(self):
        self._test_kinetics_fn(_nestler_kinetics)


# ============================================================
# 6. RUBRICS TESTS
# ============================================================

class TestRubrics:

    def test_rubric_imports(self):
        from methanol_apc_env.server.rubrics import (
            MethanolStepRubric,
            MethanolAPCRubric,
        )
        assert MethanolStepRubric is not None
        assert MethanolAPCRubric is not None

    def test_step_rubric_creation(self):
        from methanol_apc_env.server.rubrics import MethanolStepRubric
        cfg = TASKS["optimization"]
        rubric = MethanolStepRubric(cfg)
        assert rubric._task.name == "optimization"

    def test_apc_rubric_selects_task(self):
        from methanol_apc_env.server.rubrics import MethanolAPCRubric
        rubric = MethanolAPCRubric()
        assert rubric is not None


# ============================================================
# 7. LONG HORIZON GRADER EDGE CASES
# ============================================================

class TestLongHorizonEdgeCases:

    def test_catalyst_destroyed(self):
        s = ReactorState()
        s.methanol_produced = 10000.0
        s.catalyst_health = 0.0
        s.time_step = 100
        score = grade_long_horizon([s])
        assert score < 0.3

    def test_target_reached_fast(self):
        s = ReactorState()
        s.methanol_produced = 50000.0
        s.catalyst_health = 0.8
        s.time_step = 100  # fast
        score = grade_long_horizon([s])
        assert score > 0.3

    def test_target_not_reached(self):
        s = ReactorState()
        s.methanol_produced = 1000.0
        s.catalyst_health = 0.9
        s.time_step = 500
        score = grade_long_horizon([s])
        assert score < 0.5