methanol-apc-env / tests /test_coverage.py
glitchfilter's picture
Full sync from GitHub main branch
470cdf2 verified
"""Extended tests for tasks, rubrics, trl_bridge, and alternate kinetics.
Target: push total coverage from ~79% to >90%.
"""
import sys
import os
import json
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from methanol_apc_env.server.reactor_sim import (
ReactorState, simulate_step,
_graaf_kinetics, _vbf_kinetics, _seyfert_kinetics, _nestler_kinetics,
)
from methanol_apc_env.server.tasks import (
TASKS, GRADERS, _clamp_score,
grade_startup, grade_optimization, grade_disturbance, grade_long_horizon,
grade_emergency_recovery, grade_feed_upset, grade_cost_minimization,
grade_pressure_loss, grade_day_night, grade_aged_catalyst,
grade_multi_disturbance, grade_max_yield,
compute_step_reward, TaskConfig,
)
from methanol_apc_env.server.methanol_environment import MethanolAPCEnvironment
from methanol_apc_env.models import MethanolAPCAction, MethanolAPCObservation
# ---- Helper: run N steps and get trajectory ----
def _run_episode(task_name, steps=None, feed_h2=5.0, feed_co=2.5, cooling=40.0, compressor=65.0):
env = MethanolAPCEnvironment()
obs = env.reset(task_name=task_name, seed=42)
cfg = TASKS[task_name]
n = steps or min(cfg.max_steps, 30)
for _ in range(n):
obs = env.step(MethanolAPCAction(
feed_rate_h2=feed_h2, feed_rate_co=feed_co,
cooling_water_flow=cooling, compressor_power=compressor,
))
if obs.done:
break
return env
# ============================================================
# 1. TASK GRADER TESTS — cover all 12 graders
# ============================================================
class TestAllGraders:
def test_all_12_graders_registered(self):
assert len(GRADERS) == 12
for name in TASKS:
assert name in GRADERS, f"Missing grader for task '{name}'"
def test_clamp_score_boundaries(self):
assert _clamp_score(0.0) > 0.0
assert _clamp_score(1.0) < 1.0
assert 0.45 < _clamp_score(0.5) < 0.55
assert _clamp_score(0.0) < _clamp_score(0.5) < _clamp_score(1.0)
def test_clamp_score_monotonic(self):
scores = [_clamp_score(x / 10.0) for x in range(11)]
for i in range(len(scores) - 1):
assert scores[i] < scores[i + 1]
def test_grade_startup_empty(self):
assert grade_startup([]) == 0.0
def test_grade_optimization_empty(self):
assert grade_optimization([]) == 0.0
def test_grade_disturbance_empty(self):
assert grade_disturbance([]) == 0.0
def test_grade_long_horizon_empty(self):
assert grade_long_horizon([]) == 0.0
def test_grade_emergency_recovery_empty(self):
assert grade_emergency_recovery([]) == 0.0
def test_grade_feed_upset_empty(self):
assert grade_feed_upset([]) == 0.0
def test_grade_cost_minimization_empty(self):
assert grade_cost_minimization([]) == 0.0
def test_grade_day_night_empty(self):
assert grade_day_night([]) == 0.0
def test_grade_aged_catalyst_empty(self):
assert grade_aged_catalyst([]) == 0.0
def test_grade_multi_disturbance_empty(self):
assert grade_multi_disturbance([]) == 0.0
def test_grade_max_yield_empty(self):
assert grade_max_yield([]) == 0.0
def test_grade_startup_with_shutdown(self):
state = ReactorState()
state.emergency_shutdown = True
state.temperature = 310.0
assert grade_startup([state]) == 0.0
def test_grade_optimization_with_shutdown(self):
state = ReactorState()
state.emergency_shutdown = True
state.cumulative_profit = 5.0
score = grade_optimization([state])
assert 0.0 <= score <= 0.5
def test_grade_startup_reached_target(self):
states = []
for i in range(50):
s = ReactorState()
s.temperature = 150.0 + i * 2.0 # ramp to 248
s.time_step = i
states.append(s)
score = grade_startup(states)
assert score > 0.3
def test_grade_startup_overshoot(self):
states = []
for i in range(50):
s = ReactorState()
s.temperature = 150.0 + i * 4.0 # overshoot to 346
if s.temperature > 300:
s.temperature = 275.0
s.time_step = i
states.append(s)
score = grade_startup(states)
assert score >= 0.0
def test_grade_emergency_recovery_success(self):
states = []
for i in range(80):
s = ReactorState()
s.temperature = 290.0 - i * 0.5 # cooldown
s.methanol_produced = i * 3.0
s.time_step = i
states.append(s)
score = grade_emergency_recovery(states)
assert score > 0.3
def test_grade_emergency_recovery_shutdown(self):
s = ReactorState()
s.emergency_shutdown = True
assert grade_emergency_recovery([s]) == 0.0
def test_grade_day_night_stable(self):
states = []
for i in range(150):
s = ReactorState()
s.temperature = 250.0 + (1.0 if i % 2 == 0 else -1.0)
s.methanol_produced = i * 5.0
s.time_step = i
states.append(s)
score = grade_day_night(states)
assert score > 0.3
def test_grade_day_night_shutdown(self):
s = ReactorState()
s.emergency_shutdown = True
assert grade_day_night([s]) == 0.1
def test_grade_aged_catalyst_production(self):
states = []
for i in range(100):
s = ReactorState()
s.temperature = 250.0
s.methanol_produced = i * 3.0
s.catalyst_health = 0.35
s.time_step = i
states.append(s)
score = grade_aged_catalyst(states)
assert score > 0.3
def test_grade_aged_catalyst_shutdown(self):
s = ReactorState()
s.emergency_shutdown = True
assert grade_aged_catalyst([s]) == 0.1
def test_grade_multi_disturbance_survival(self):
states = []
for i in range(150):
s = ReactorState()
s.temperature = 250.0
s.methanol_produced = i * 3.0
s.time_step = i
states.append(s)
score = grade_multi_disturbance(states)
assert score > 0.4
def test_grade_max_yield_high(self):
states = []
s = ReactorState()
s.methanol_produced = 800.0
s.time_step = 200
states.append(s)
score = grade_max_yield(states)
assert score > 0.5
def test_grade_max_yield_with_shutdown(self):
s = ReactorState()
s.methanol_produced = 500.0
s.emergency_shutdown = True
score = grade_max_yield([s])
assert score < 0.2
def test_grade_cost_minimization_profitable(self):
# Realistic profitable trajectory: $0.55/kg (real controllers achieve
# 0.55-0.57). Threshold $0.80 means score = 0.55/0.80 ~ 0.69.
states = []
s = ReactorState()
s.cumulative_profit = 55.0
s.methanol_produced = 100.0
states.append(s)
score = grade_cost_minimization(states)
assert score >= 0.5
def test_grade_cost_minimization_shutdown(self):
s = ReactorState()
s.emergency_shutdown = True
s.cumulative_profit = 5.0
s.methanol_produced = 5.0
assert grade_cost_minimization([s]) == 0.1
def test_grade_feed_upset_shutdown(self):
s = ReactorState()
s.emergency_shutdown = True
assert grade_feed_upset([s]) == 0.1
# ============================================================
# 2. GRADERS VIA ENVIRONMENT (integration)
# ============================================================
class TestGradersViaEnvironment:
def test_all_tasks_produce_valid_scores(self):
for task_name in TASKS:
env = _run_episode(task_name, steps=15)
score = env.get_final_score()
assert 0.01 <= score <= 0.99, f"Task '{task_name}' score out of range: {score}"
# ============================================================
# 3. STEP REWARD TESTS
# ============================================================
class TestStepReward:
def test_step_reward_normal(self):
cfg = TASKS["optimization"]
prev = ReactorState()
prev.temperature = 250.0
prev.catalyst_health = 1.0
curr = ReactorState()
curr.temperature = 252.0
curr.catalyst_health = 0.99
curr.profit_this_step = 0.3
reward = compute_step_reward(prev, curr, cfg)
assert 0.01 <= reward <= 0.99
def test_step_reward_shutdown(self):
cfg = TASKS["optimization"]
prev = ReactorState()
curr = ReactorState()
curr.emergency_shutdown = True
reward = compute_step_reward(prev, curr, cfg)
assert reward < 0.15 # should be low
def test_step_reward_high_temp_penalty(self):
cfg = TASKS["optimization"]
prev = ReactorState()
prev.temperature = 250.0
curr = ReactorState()
curr.temperature = 285.0
curr.profit_this_step = 0.2
curr.catalyst_health = 0.9
reward = compute_step_reward(prev, curr, cfg)
# Should be lower than comfortable temperature
curr2 = ReactorState()
curr2.temperature = 250.0
curr2.profit_this_step = 0.2
curr2.catalyst_health = 0.9
reward2 = compute_step_reward(prev, curr2, cfg)
assert reward < reward2
def test_step_reward_startup_progress(self):
cfg = TASKS["startup"]
prev = ReactorState()
prev.temperature = 200.0
curr = ReactorState()
curr.temperature = 210.0
curr.catalyst_health = 1.0
curr.profit_this_step = 0.0
reward = compute_step_reward(prev, curr, cfg)
assert reward > 0.01
# ============================================================
# 4. TRL BRIDGE TESTS
# ============================================================
class TestTRLBridge:
def test_reward_function_valid_action(self):
from methanol_apc_env.trl_bridge import MethanolRewardFunction
rf = MethanolRewardFunction(task="optimization", seed=42)
action_json = json.dumps({
"feed_rate_h2": 5.0, "feed_rate_co": 2.5,
"cooling_water_flow": 40.0, "compressor_power": 65.0,
})
rewards = rf([action_json])
assert len(rewards) == 1
assert 0.01 <= rewards[0] <= 0.99
def test_reward_function_invalid_action(self):
from methanol_apc_env.trl_bridge import MethanolRewardFunction
rf = MethanolRewardFunction(task="optimization", seed=42)
rewards = rf(["not valid json at all"])
assert len(rewards) == 1
assert rewards[0] == 0.01
def test_reward_function_batch(self):
from methanol_apc_env.trl_bridge import MethanolRewardFunction
rf = MethanolRewardFunction(task="optimization", seed=42)
actions = [
json.dumps({"feed_rate_h2": 5.0, "feed_rate_co": 2.5,
"cooling_water_flow": 40.0, "compressor_power": 65.0}),
json.dumps({"feed_rate_h2": 3.0, "feed_rate_co": 1.5,
"cooling_water_flow": 60.0, "compressor_power": 50.0}),
]
rewards = rf(actions)
assert len(rewards) == 2
for r in rewards:
assert 0.01 <= r <= 0.99
def test_grpo_config(self):
from methanol_apc_env.trl_bridge import MethanolGRPOConfig
config = MethanolGRPOConfig.base_kwargs()
# Validate that returned kwargs are valid TRL GRPOConfig parameters.
assert "learning_rate" in config
assert config["num_generations"] > 0 # GRPO group size (real TRL key)
assert config["beta"] >= 0 # KL penalty coefficient
assert "max_grad_norm" in config
def test_unsloth_config(self):
from methanol_apc_env.trl_bridge import MethanolGRPOConfig
model_kw = MethanolGRPOConfig.unsloth_model_kwargs()
lora_kw = MethanolGRPOConfig.unsloth_lora_kwargs()
assert model_kw["load_in_4bit"] is True
assert lora_kw["r"] > 0
assert lora_kw["lora_alpha"] > 0
# ============================================================
# 5. ALTERNATE KINETIC MODELS
# ============================================================
class TestAlternateKinetics:
def _test_kinetics_fn(self, fn):
T_K = 523.15 # 250C
r1, r2, r3 = fn(T_K, 10.0, 40.0, 2.0, 0.1, 0.1, 1.0, 0.7)
assert r1 >= 0
assert r2 >= 0
assert r3 >= 0
# At least one reaction should produce something
assert r1 + r2 > 0
def test_graaf_model(self):
self._test_kinetics_fn(_graaf_kinetics)
def test_vbf_model(self):
self._test_kinetics_fn(_vbf_kinetics)
def test_seyfert_model(self):
self._test_kinetics_fn(_seyfert_kinetics)
def test_nestler_model(self):
self._test_kinetics_fn(_nestler_kinetics)
# ============================================================
# 6. RUBRICS TESTS
# ============================================================
class TestRubrics:
def test_rubric_imports(self):
from methanol_apc_env.server.rubrics import (
MethanolStepRubric,
MethanolAPCRubric,
)
assert MethanolStepRubric is not None
assert MethanolAPCRubric is not None
def test_step_rubric_creation(self):
from methanol_apc_env.server.rubrics import MethanolStepRubric
cfg = TASKS["optimization"]
rubric = MethanolStepRubric(cfg)
assert rubric._task.name == "optimization"
def test_apc_rubric_selects_task(self):
from methanol_apc_env.server.rubrics import MethanolAPCRubric
rubric = MethanolAPCRubric()
assert rubric is not None
# ============================================================
# 7. LONG HORIZON GRADER EDGE CASES
# ============================================================
class TestLongHorizonEdgeCases:
def test_catalyst_destroyed(self):
s = ReactorState()
s.methanol_produced = 10000.0
s.catalyst_health = 0.0
s.time_step = 100
score = grade_long_horizon([s])
assert score < 0.3
def test_target_reached_fast(self):
s = ReactorState()
s.methanol_produced = 50000.0
s.catalyst_health = 0.8
s.time_step = 100 # fast
score = grade_long_horizon([s])
assert score > 0.3
def test_target_not_reached(self):
s = ReactorState()
s.methanol_produced = 1000.0
s.catalyst_health = 0.9
s.time_step = 500
score = grade_long_horizon([s])
assert score < 0.5