Spaces:
Sleeping
Sleeping
File size: 15,317 Bytes
1a7c2bd 470cdf2 1a7c2bd 470cdf2 1a7c2bd 470cdf2 1a7c2bd 470cdf2 1a7c2bd 470cdf2 1a7c2bd 470cdf2 1a7c2bd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 | """Extended tests for tasks, rubrics, trl_bridge, and alternate kinetics.
Target: push total coverage from ~79% to >90%.
"""
import sys
import os
import json
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from methanol_apc_env.server.reactor_sim import (
ReactorState, simulate_step,
_graaf_kinetics, _vbf_kinetics, _seyfert_kinetics, _nestler_kinetics,
)
from methanol_apc_env.server.tasks import (
TASKS, GRADERS, _clamp_score,
grade_startup, grade_optimization, grade_disturbance, grade_long_horizon,
grade_emergency_recovery, grade_feed_upset, grade_cost_minimization,
grade_pressure_loss, grade_day_night, grade_aged_catalyst,
grade_multi_disturbance, grade_max_yield,
compute_step_reward, TaskConfig,
)
from methanol_apc_env.server.methanol_environment import MethanolAPCEnvironment
from methanol_apc_env.models import MethanolAPCAction, MethanolAPCObservation
# ---- Helper: run N steps and get trajectory ----
def _run_episode(task_name, steps=None, feed_h2=5.0, feed_co=2.5, cooling=40.0, compressor=65.0):
env = MethanolAPCEnvironment()
obs = env.reset(task_name=task_name, seed=42)
cfg = TASKS[task_name]
n = steps or min(cfg.max_steps, 30)
for _ in range(n):
obs = env.step(MethanolAPCAction(
feed_rate_h2=feed_h2, feed_rate_co=feed_co,
cooling_water_flow=cooling, compressor_power=compressor,
))
if obs.done:
break
return env
# ============================================================
# 1. TASK GRADER TESTS — cover all 12 graders
# ============================================================
class TestAllGraders:
def test_all_12_graders_registered(self):
assert len(GRADERS) == 12
for name in TASKS:
assert name in GRADERS, f"Missing grader for task '{name}'"
def test_clamp_score_boundaries(self):
assert _clamp_score(0.0) > 0.0
assert _clamp_score(1.0) < 1.0
assert 0.45 < _clamp_score(0.5) < 0.55
assert _clamp_score(0.0) < _clamp_score(0.5) < _clamp_score(1.0)
def test_clamp_score_monotonic(self):
scores = [_clamp_score(x / 10.0) for x in range(11)]
for i in range(len(scores) - 1):
assert scores[i] < scores[i + 1]
def test_grade_startup_empty(self):
assert grade_startup([]) == 0.0
def test_grade_optimization_empty(self):
assert grade_optimization([]) == 0.0
def test_grade_disturbance_empty(self):
assert grade_disturbance([]) == 0.0
def test_grade_long_horizon_empty(self):
assert grade_long_horizon([]) == 0.0
def test_grade_emergency_recovery_empty(self):
assert grade_emergency_recovery([]) == 0.0
def test_grade_feed_upset_empty(self):
assert grade_feed_upset([]) == 0.0
def test_grade_cost_minimization_empty(self):
assert grade_cost_minimization([]) == 0.0
def test_grade_day_night_empty(self):
assert grade_day_night([]) == 0.0
def test_grade_aged_catalyst_empty(self):
assert grade_aged_catalyst([]) == 0.0
def test_grade_multi_disturbance_empty(self):
assert grade_multi_disturbance([]) == 0.0
def test_grade_max_yield_empty(self):
assert grade_max_yield([]) == 0.0
def test_grade_startup_with_shutdown(self):
state = ReactorState()
state.emergency_shutdown = True
state.temperature = 310.0
assert grade_startup([state]) == 0.0
def test_grade_optimization_with_shutdown(self):
state = ReactorState()
state.emergency_shutdown = True
state.cumulative_profit = 5.0
score = grade_optimization([state])
assert 0.0 <= score <= 0.5
def test_grade_startup_reached_target(self):
states = []
for i in range(50):
s = ReactorState()
s.temperature = 150.0 + i * 2.0 # ramp to 248
s.time_step = i
states.append(s)
score = grade_startup(states)
assert score > 0.3
def test_grade_startup_overshoot(self):
states = []
for i in range(50):
s = ReactorState()
s.temperature = 150.0 + i * 4.0 # overshoot to 346
if s.temperature > 300:
s.temperature = 275.0
s.time_step = i
states.append(s)
score = grade_startup(states)
assert score >= 0.0
def test_grade_emergency_recovery_success(self):
states = []
for i in range(80):
s = ReactorState()
s.temperature = 290.0 - i * 0.5 # cooldown
s.methanol_produced = i * 3.0
s.time_step = i
states.append(s)
score = grade_emergency_recovery(states)
assert score > 0.3
def test_grade_emergency_recovery_shutdown(self):
s = ReactorState()
s.emergency_shutdown = True
assert grade_emergency_recovery([s]) == 0.0
def test_grade_day_night_stable(self):
states = []
for i in range(150):
s = ReactorState()
s.temperature = 250.0 + (1.0 if i % 2 == 0 else -1.0)
s.methanol_produced = i * 5.0
s.time_step = i
states.append(s)
score = grade_day_night(states)
assert score > 0.3
def test_grade_day_night_shutdown(self):
s = ReactorState()
s.emergency_shutdown = True
assert grade_day_night([s]) == 0.1
def test_grade_aged_catalyst_production(self):
states = []
for i in range(100):
s = ReactorState()
s.temperature = 250.0
s.methanol_produced = i * 3.0
s.catalyst_health = 0.35
s.time_step = i
states.append(s)
score = grade_aged_catalyst(states)
assert score > 0.3
def test_grade_aged_catalyst_shutdown(self):
s = ReactorState()
s.emergency_shutdown = True
assert grade_aged_catalyst([s]) == 0.1
def test_grade_multi_disturbance_survival(self):
states = []
for i in range(150):
s = ReactorState()
s.temperature = 250.0
s.methanol_produced = i * 3.0
s.time_step = i
states.append(s)
score = grade_multi_disturbance(states)
assert score > 0.4
def test_grade_max_yield_high(self):
states = []
s = ReactorState()
s.methanol_produced = 800.0
s.time_step = 200
states.append(s)
score = grade_max_yield(states)
assert score > 0.5
def test_grade_max_yield_with_shutdown(self):
s = ReactorState()
s.methanol_produced = 500.0
s.emergency_shutdown = True
score = grade_max_yield([s])
assert score < 0.2
def test_grade_cost_minimization_profitable(self):
# Realistic profitable trajectory: $0.55/kg (real controllers achieve
# 0.55-0.57). Threshold $0.80 means score = 0.55/0.80 ~ 0.69.
states = []
s = ReactorState()
s.cumulative_profit = 55.0
s.methanol_produced = 100.0
states.append(s)
score = grade_cost_minimization(states)
assert score >= 0.5
def test_grade_cost_minimization_shutdown(self):
s = ReactorState()
s.emergency_shutdown = True
s.cumulative_profit = 5.0
s.methanol_produced = 5.0
assert grade_cost_minimization([s]) == 0.1
def test_grade_feed_upset_shutdown(self):
s = ReactorState()
s.emergency_shutdown = True
assert grade_feed_upset([s]) == 0.1
# ============================================================
# 2. GRADERS VIA ENVIRONMENT (integration)
# ============================================================
class TestGradersViaEnvironment:
def test_all_tasks_produce_valid_scores(self):
for task_name in TASKS:
env = _run_episode(task_name, steps=15)
score = env.get_final_score()
assert 0.01 <= score <= 0.99, f"Task '{task_name}' score out of range: {score}"
# ============================================================
# 3. STEP REWARD TESTS
# ============================================================
class TestStepReward:
def test_step_reward_normal(self):
cfg = TASKS["optimization"]
prev = ReactorState()
prev.temperature = 250.0
prev.catalyst_health = 1.0
curr = ReactorState()
curr.temperature = 252.0
curr.catalyst_health = 0.99
curr.profit_this_step = 0.3
reward = compute_step_reward(prev, curr, cfg)
assert 0.01 <= reward <= 0.99
def test_step_reward_shutdown(self):
cfg = TASKS["optimization"]
prev = ReactorState()
curr = ReactorState()
curr.emergency_shutdown = True
reward = compute_step_reward(prev, curr, cfg)
assert reward < 0.15 # should be low
def test_step_reward_high_temp_penalty(self):
cfg = TASKS["optimization"]
prev = ReactorState()
prev.temperature = 250.0
curr = ReactorState()
curr.temperature = 285.0
curr.profit_this_step = 0.2
curr.catalyst_health = 0.9
reward = compute_step_reward(prev, curr, cfg)
# Should be lower than comfortable temperature
curr2 = ReactorState()
curr2.temperature = 250.0
curr2.profit_this_step = 0.2
curr2.catalyst_health = 0.9
reward2 = compute_step_reward(prev, curr2, cfg)
assert reward < reward2
def test_step_reward_startup_progress(self):
cfg = TASKS["startup"]
prev = ReactorState()
prev.temperature = 200.0
curr = ReactorState()
curr.temperature = 210.0
curr.catalyst_health = 1.0
curr.profit_this_step = 0.0
reward = compute_step_reward(prev, curr, cfg)
assert reward > 0.01
# ============================================================
# 4. TRL BRIDGE TESTS
# ============================================================
class TestTRLBridge:
def test_reward_function_valid_action(self):
from methanol_apc_env.trl_bridge import MethanolRewardFunction
rf = MethanolRewardFunction(task="optimization", seed=42)
action_json = json.dumps({
"feed_rate_h2": 5.0, "feed_rate_co": 2.5,
"cooling_water_flow": 40.0, "compressor_power": 65.0,
})
rewards = rf([action_json])
assert len(rewards) == 1
assert 0.01 <= rewards[0] <= 0.99
def test_reward_function_invalid_action(self):
from methanol_apc_env.trl_bridge import MethanolRewardFunction
rf = MethanolRewardFunction(task="optimization", seed=42)
rewards = rf(["not valid json at all"])
assert len(rewards) == 1
assert rewards[0] == 0.01
def test_reward_function_batch(self):
from methanol_apc_env.trl_bridge import MethanolRewardFunction
rf = MethanolRewardFunction(task="optimization", seed=42)
actions = [
json.dumps({"feed_rate_h2": 5.0, "feed_rate_co": 2.5,
"cooling_water_flow": 40.0, "compressor_power": 65.0}),
json.dumps({"feed_rate_h2": 3.0, "feed_rate_co": 1.5,
"cooling_water_flow": 60.0, "compressor_power": 50.0}),
]
rewards = rf(actions)
assert len(rewards) == 2
for r in rewards:
assert 0.01 <= r <= 0.99
def test_grpo_config(self):
from methanol_apc_env.trl_bridge import MethanolGRPOConfig
config = MethanolGRPOConfig.base_kwargs()
# Validate that returned kwargs are valid TRL GRPOConfig parameters.
assert "learning_rate" in config
assert config["num_generations"] > 0 # GRPO group size (real TRL key)
assert config["beta"] >= 0 # KL penalty coefficient
assert "max_grad_norm" in config
def test_unsloth_config(self):
from methanol_apc_env.trl_bridge import MethanolGRPOConfig
model_kw = MethanolGRPOConfig.unsloth_model_kwargs()
lora_kw = MethanolGRPOConfig.unsloth_lora_kwargs()
assert model_kw["load_in_4bit"] is True
assert lora_kw["r"] > 0
assert lora_kw["lora_alpha"] > 0
# ============================================================
# 5. ALTERNATE KINETIC MODELS
# ============================================================
class TestAlternateKinetics:
def _test_kinetics_fn(self, fn):
T_K = 523.15 # 250C
r1, r2, r3 = fn(T_K, 10.0, 40.0, 2.0, 0.1, 0.1, 1.0, 0.7)
assert r1 >= 0
assert r2 >= 0
assert r3 >= 0
# At least one reaction should produce something
assert r1 + r2 > 0
def test_graaf_model(self):
self._test_kinetics_fn(_graaf_kinetics)
def test_vbf_model(self):
self._test_kinetics_fn(_vbf_kinetics)
def test_seyfert_model(self):
self._test_kinetics_fn(_seyfert_kinetics)
def test_nestler_model(self):
self._test_kinetics_fn(_nestler_kinetics)
# ============================================================
# 6. RUBRICS TESTS
# ============================================================
class TestRubrics:
def test_rubric_imports(self):
from methanol_apc_env.server.rubrics import (
MethanolStepRubric,
MethanolAPCRubric,
)
assert MethanolStepRubric is not None
assert MethanolAPCRubric is not None
def test_step_rubric_creation(self):
from methanol_apc_env.server.rubrics import MethanolStepRubric
cfg = TASKS["optimization"]
rubric = MethanolStepRubric(cfg)
assert rubric._task.name == "optimization"
def test_apc_rubric_selects_task(self):
from methanol_apc_env.server.rubrics import MethanolAPCRubric
rubric = MethanolAPCRubric()
assert rubric is not None
# ============================================================
# 7. LONG HORIZON GRADER EDGE CASES
# ============================================================
class TestLongHorizonEdgeCases:
def test_catalyst_destroyed(self):
s = ReactorState()
s.methanol_produced = 10000.0
s.catalyst_health = 0.0
s.time_step = 100
score = grade_long_horizon([s])
assert score < 0.3
def test_target_reached_fast(self):
s = ReactorState()
s.methanol_produced = 50000.0
s.catalyst_health = 0.8
s.time_step = 100 # fast
score = grade_long_horizon([s])
assert score > 0.3
def test_target_not_reached(self):
s = ReactorState()
s.methanol_produced = 1000.0
s.catalyst_health = 0.9
s.time_step = 500
score = grade_long_horizon([s])
assert score < 0.5
|