"""Extended tests for tasks, rubrics, trl_bridge, and alternate kinetics. Target: push total coverage from ~79% to >90%. """ import sys import os import json sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) from methanol_apc_env.server.reactor_sim import ( ReactorState, simulate_step, _graaf_kinetics, _vbf_kinetics, _seyfert_kinetics, _nestler_kinetics, ) from methanol_apc_env.server.tasks import ( TASKS, GRADERS, _clamp_score, grade_startup, grade_optimization, grade_disturbance, grade_long_horizon, grade_emergency_recovery, grade_feed_upset, grade_cost_minimization, grade_pressure_loss, grade_day_night, grade_aged_catalyst, grade_multi_disturbance, grade_max_yield, compute_step_reward, TaskConfig, ) from methanol_apc_env.server.methanol_environment import MethanolAPCEnvironment from methanol_apc_env.models import MethanolAPCAction, MethanolAPCObservation # ---- Helper: run N steps and get trajectory ---- def _run_episode(task_name, steps=None, feed_h2=5.0, feed_co=2.5, cooling=40.0, compressor=65.0): env = MethanolAPCEnvironment() obs = env.reset(task_name=task_name, seed=42) cfg = TASKS[task_name] n = steps or min(cfg.max_steps, 30) for _ in range(n): obs = env.step(MethanolAPCAction( feed_rate_h2=feed_h2, feed_rate_co=feed_co, cooling_water_flow=cooling, compressor_power=compressor, )) if obs.done: break return env # ============================================================ # 1. TASK GRADER TESTS — cover all 12 graders # ============================================================ class TestAllGraders: def test_all_12_graders_registered(self): assert len(GRADERS) == 12 for name in TASKS: assert name in GRADERS, f"Missing grader for task '{name}'" def test_clamp_score_boundaries(self): assert _clamp_score(0.0) > 0.0 assert _clamp_score(1.0) < 1.0 assert 0.45 < _clamp_score(0.5) < 0.55 assert _clamp_score(0.0) < _clamp_score(0.5) < _clamp_score(1.0) def test_clamp_score_monotonic(self): scores = [_clamp_score(x / 10.0) for x in range(11)] for i in range(len(scores) - 1): assert scores[i] < scores[i + 1] def test_grade_startup_empty(self): assert grade_startup([]) == 0.0 def test_grade_optimization_empty(self): assert grade_optimization([]) == 0.0 def test_grade_disturbance_empty(self): assert grade_disturbance([]) == 0.0 def test_grade_long_horizon_empty(self): assert grade_long_horizon([]) == 0.0 def test_grade_emergency_recovery_empty(self): assert grade_emergency_recovery([]) == 0.0 def test_grade_feed_upset_empty(self): assert grade_feed_upset([]) == 0.0 def test_grade_cost_minimization_empty(self): assert grade_cost_minimization([]) == 0.0 def test_grade_day_night_empty(self): assert grade_day_night([]) == 0.0 def test_grade_aged_catalyst_empty(self): assert grade_aged_catalyst([]) == 0.0 def test_grade_multi_disturbance_empty(self): assert grade_multi_disturbance([]) == 0.0 def test_grade_max_yield_empty(self): assert grade_max_yield([]) == 0.0 def test_grade_startup_with_shutdown(self): state = ReactorState() state.emergency_shutdown = True state.temperature = 310.0 assert grade_startup([state]) == 0.0 def test_grade_optimization_with_shutdown(self): state = ReactorState() state.emergency_shutdown = True state.cumulative_profit = 5.0 score = grade_optimization([state]) assert 0.0 <= score <= 0.5 def test_grade_startup_reached_target(self): states = [] for i in range(50): s = ReactorState() s.temperature = 150.0 + i * 2.0 # ramp to 248 s.time_step = i states.append(s) score = grade_startup(states) assert score > 0.3 def test_grade_startup_overshoot(self): states = [] for i in range(50): s = ReactorState() s.temperature = 150.0 + i * 4.0 # overshoot to 346 if s.temperature > 300: s.temperature = 275.0 s.time_step = i states.append(s) score = grade_startup(states) assert score >= 0.0 def test_grade_emergency_recovery_success(self): states = [] for i in range(80): s = ReactorState() s.temperature = 290.0 - i * 0.5 # cooldown s.methanol_produced = i * 3.0 s.time_step = i states.append(s) score = grade_emergency_recovery(states) assert score > 0.3 def test_grade_emergency_recovery_shutdown(self): s = ReactorState() s.emergency_shutdown = True assert grade_emergency_recovery([s]) == 0.0 def test_grade_day_night_stable(self): states = [] for i in range(150): s = ReactorState() s.temperature = 250.0 + (1.0 if i % 2 == 0 else -1.0) s.methanol_produced = i * 5.0 s.time_step = i states.append(s) score = grade_day_night(states) assert score > 0.3 def test_grade_day_night_shutdown(self): s = ReactorState() s.emergency_shutdown = True assert grade_day_night([s]) == 0.1 def test_grade_aged_catalyst_production(self): states = [] for i in range(100): s = ReactorState() s.temperature = 250.0 s.methanol_produced = i * 3.0 s.catalyst_health = 0.35 s.time_step = i states.append(s) score = grade_aged_catalyst(states) assert score > 0.3 def test_grade_aged_catalyst_shutdown(self): s = ReactorState() s.emergency_shutdown = True assert grade_aged_catalyst([s]) == 0.1 def test_grade_multi_disturbance_survival(self): states = [] for i in range(150): s = ReactorState() s.temperature = 250.0 s.methanol_produced = i * 3.0 s.time_step = i states.append(s) score = grade_multi_disturbance(states) assert score > 0.4 def test_grade_max_yield_high(self): states = [] s = ReactorState() s.methanol_produced = 800.0 s.time_step = 200 states.append(s) score = grade_max_yield(states) assert score > 0.5 def test_grade_max_yield_with_shutdown(self): s = ReactorState() s.methanol_produced = 500.0 s.emergency_shutdown = True score = grade_max_yield([s]) assert score < 0.2 def test_grade_cost_minimization_profitable(self): # Realistic profitable trajectory: $0.55/kg (real controllers achieve # 0.55-0.57). Threshold $0.80 means score = 0.55/0.80 ~ 0.69. states = [] s = ReactorState() s.cumulative_profit = 55.0 s.methanol_produced = 100.0 states.append(s) score = grade_cost_minimization(states) assert score >= 0.5 def test_grade_cost_minimization_shutdown(self): s = ReactorState() s.emergency_shutdown = True s.cumulative_profit = 5.0 s.methanol_produced = 5.0 assert grade_cost_minimization([s]) == 0.1 def test_grade_feed_upset_shutdown(self): s = ReactorState() s.emergency_shutdown = True assert grade_feed_upset([s]) == 0.1 # ============================================================ # 2. GRADERS VIA ENVIRONMENT (integration) # ============================================================ class TestGradersViaEnvironment: def test_all_tasks_produce_valid_scores(self): for task_name in TASKS: env = _run_episode(task_name, steps=15) score = env.get_final_score() assert 0.01 <= score <= 0.99, f"Task '{task_name}' score out of range: {score}" # ============================================================ # 3. STEP REWARD TESTS # ============================================================ class TestStepReward: def test_step_reward_normal(self): cfg = TASKS["optimization"] prev = ReactorState() prev.temperature = 250.0 prev.catalyst_health = 1.0 curr = ReactorState() curr.temperature = 252.0 curr.catalyst_health = 0.99 curr.profit_this_step = 0.3 reward = compute_step_reward(prev, curr, cfg) assert 0.01 <= reward <= 0.99 def test_step_reward_shutdown(self): cfg = TASKS["optimization"] prev = ReactorState() curr = ReactorState() curr.emergency_shutdown = True reward = compute_step_reward(prev, curr, cfg) assert reward < 0.15 # should be low def test_step_reward_high_temp_penalty(self): cfg = TASKS["optimization"] prev = ReactorState() prev.temperature = 250.0 curr = ReactorState() curr.temperature = 285.0 curr.profit_this_step = 0.2 curr.catalyst_health = 0.9 reward = compute_step_reward(prev, curr, cfg) # Should be lower than comfortable temperature curr2 = ReactorState() curr2.temperature = 250.0 curr2.profit_this_step = 0.2 curr2.catalyst_health = 0.9 reward2 = compute_step_reward(prev, curr2, cfg) assert reward < reward2 def test_step_reward_startup_progress(self): cfg = TASKS["startup"] prev = ReactorState() prev.temperature = 200.0 curr = ReactorState() curr.temperature = 210.0 curr.catalyst_health = 1.0 curr.profit_this_step = 0.0 reward = compute_step_reward(prev, curr, cfg) assert reward > 0.01 # ============================================================ # 4. TRL BRIDGE TESTS # ============================================================ class TestTRLBridge: def test_reward_function_valid_action(self): from methanol_apc_env.trl_bridge import MethanolRewardFunction rf = MethanolRewardFunction(task="optimization", seed=42) action_json = json.dumps({ "feed_rate_h2": 5.0, "feed_rate_co": 2.5, "cooling_water_flow": 40.0, "compressor_power": 65.0, }) rewards = rf([action_json]) assert len(rewards) == 1 assert 0.01 <= rewards[0] <= 0.99 def test_reward_function_invalid_action(self): from methanol_apc_env.trl_bridge import MethanolRewardFunction rf = MethanolRewardFunction(task="optimization", seed=42) rewards = rf(["not valid json at all"]) assert len(rewards) == 1 assert rewards[0] == 0.01 def test_reward_function_batch(self): from methanol_apc_env.trl_bridge import MethanolRewardFunction rf = MethanolRewardFunction(task="optimization", seed=42) actions = [ json.dumps({"feed_rate_h2": 5.0, "feed_rate_co": 2.5, "cooling_water_flow": 40.0, "compressor_power": 65.0}), json.dumps({"feed_rate_h2": 3.0, "feed_rate_co": 1.5, "cooling_water_flow": 60.0, "compressor_power": 50.0}), ] rewards = rf(actions) assert len(rewards) == 2 for r in rewards: assert 0.01 <= r <= 0.99 def test_grpo_config(self): from methanol_apc_env.trl_bridge import MethanolGRPOConfig config = MethanolGRPOConfig.base_kwargs() # Validate that returned kwargs are valid TRL GRPOConfig parameters. assert "learning_rate" in config assert config["num_generations"] > 0 # GRPO group size (real TRL key) assert config["beta"] >= 0 # KL penalty coefficient assert "max_grad_norm" in config def test_unsloth_config(self): from methanol_apc_env.trl_bridge import MethanolGRPOConfig model_kw = MethanolGRPOConfig.unsloth_model_kwargs() lora_kw = MethanolGRPOConfig.unsloth_lora_kwargs() assert model_kw["load_in_4bit"] is True assert lora_kw["r"] > 0 assert lora_kw["lora_alpha"] > 0 # ============================================================ # 5. ALTERNATE KINETIC MODELS # ============================================================ class TestAlternateKinetics: def _test_kinetics_fn(self, fn): T_K = 523.15 # 250C r1, r2, r3 = fn(T_K, 10.0, 40.0, 2.0, 0.1, 0.1, 1.0, 0.7) assert r1 >= 0 assert r2 >= 0 assert r3 >= 0 # At least one reaction should produce something assert r1 + r2 > 0 def test_graaf_model(self): self._test_kinetics_fn(_graaf_kinetics) def test_vbf_model(self): self._test_kinetics_fn(_vbf_kinetics) def test_seyfert_model(self): self._test_kinetics_fn(_seyfert_kinetics) def test_nestler_model(self): self._test_kinetics_fn(_nestler_kinetics) # ============================================================ # 6. RUBRICS TESTS # ============================================================ class TestRubrics: def test_rubric_imports(self): from methanol_apc_env.server.rubrics import ( MethanolStepRubric, MethanolAPCRubric, ) assert MethanolStepRubric is not None assert MethanolAPCRubric is not None def test_step_rubric_creation(self): from methanol_apc_env.server.rubrics import MethanolStepRubric cfg = TASKS["optimization"] rubric = MethanolStepRubric(cfg) assert rubric._task.name == "optimization" def test_apc_rubric_selects_task(self): from methanol_apc_env.server.rubrics import MethanolAPCRubric rubric = MethanolAPCRubric() assert rubric is not None # ============================================================ # 7. LONG HORIZON GRADER EDGE CASES # ============================================================ class TestLongHorizonEdgeCases: def test_catalyst_destroyed(self): s = ReactorState() s.methanol_produced = 10000.0 s.catalyst_health = 0.0 s.time_step = 100 score = grade_long_horizon([s]) assert score < 0.3 def test_target_reached_fast(self): s = ReactorState() s.methanol_produced = 50000.0 s.catalyst_health = 0.8 s.time_step = 100 # fast score = grade_long_horizon([s]) assert score > 0.3 def test_target_not_reached(self): s = ReactorState() s.methanol_produced = 1000.0 s.catalyst_health = 0.9 s.time_step = 500 score = grade_long_horizon([s]) assert score < 0.5