Spaces:

Roopalgn
/

openenv-clinical-trial

Sleeping

App Files Files Community

Coding Ninja commited on Apr 21

Commit

1f2ca34

1 Parent(s): 22170b0

Push 3: Curriculum controller, hidden-state pipeline, phase detector, trial judge, and full EpisodeManager wiring

Browse files

Files changed (24) hide show

models.py +1 -0
pyproject.toml +3 -0
server/curriculum/__init__.py +30 -2
server/curriculum/controller.py +147 -0
server/curriculum/scenarios.py +116 -0
server/episode_manager.py +233 -109
server/judge.py +277 -0
server/logger.py +1 -3
server/noise_model.py +5 -0
server/phase_detector.py +125 -0
server/reward/reward_computer.py +9 -20
server/reward/shaping.py +1 -3
server/rules/prerequisite_rules.py +1 -3
server/simulator/__init__.py +1 -1
server/simulator/output_generator.py +238 -0
server/simulator/transition_engine.py +167 -0
server/simulator/trial_simulator.py +3 -3
tests/test_curriculum_controller.py +171 -0
tests/test_episode_logger_wiring.py +2 -6
tests/test_episode_manager_compliance.py +4 -12
tests/test_judge.py +350 -0
tests/test_noise_model.py +3 -9
tests/test_output_generator.py +479 -0
tests/test_phase_detector.py +207 -0

models.py CHANGED Viewed

@@ -100,6 +100,7 @@ class TrialLatentState(BaseModel):
     protocol_submitted: bool
     interim_complete: bool
     trial_complete: bool
     # Episode tracking (used by rule engine and phase detector)
     episode_phase: str
     action_history: list[str]

     protocol_submitted: bool
     interim_complete: bool
     trial_complete: bool
+    adverse_events: int  # cumulative count of recorded adverse events
     # Episode tracking (used by rule engine and phase detector)
     episode_phase: str
     action_history: list[str]

pyproject.toml CHANGED Viewed

@@ -32,6 +32,9 @@ target-version = "py311"
 select = ["E", "F", "W", "I"]
 ignore = []
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 addopts = "-v"

 select = ["E", "F", "W", "I"]
 ignore = []
+[tool.ruff.lint.per-file-ignores]
+"tests/**" = ["E501"]
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 addopts = "-v"

server/curriculum/__init__.py CHANGED Viewed

@@ -1,7 +1,35 @@
 """
 curriculum — Curriculum controller and scenario registry.
-Provides advance_curriculum, select_scenario, and the four initial
-ScenarioConfig instances (solid_tumor_chemo, autoimmune_biologic,
 cns_depression, rare_disease_orphan).
 """

 """
 curriculum — Curriculum controller and scenario registry.
+Provides advance_curriculum, select_scenario, EpisodeMetrics, and the four
+initial ScenarioConfig instances (solid_tumor_chemo, autoimmune_biologic,
 cns_depression, rare_disease_orphan).
 """
+from server.curriculum.controller import (
+    EpisodeMetrics,
+    advance_curriculum,
+    select_scenario,
+)
+from server.curriculum.scenarios import (
+    AUTOIMMUNE_BIOLOGIC,
+    CNS_DEPRESSION,
+    RARE_DISEASE_ORPHAN,
+    SCENARIO_LIST,
+    SCENARIOS,
+    SOLID_TUMOR_CHEMO,
+    WARMUP,
+)
+__all__ = [
+    "EpisodeMetrics",
+    "advance_curriculum",
+    "select_scenario",
+    "WARMUP",
+    "SOLID_TUMOR_CHEMO",
+    "AUTOIMMUNE_BIOLOGIC",
+    "CNS_DEPRESSION",
+    "RARE_DISEASE_ORPHAN",
+    "SCENARIOS",
+    "SCENARIO_LIST",
+]

server/curriculum/controller.py ADDED Viewed

	@@ -0,0 +1,147 @@

+"""
+Curriculum controller for the Clinical Trial Designer environment.
+Exposes:
+  - advance_curriculum(tier, metrics) -> int
+  - select_scenario(tier, rng) -> ScenarioConfig
+5-tier mastery logic:
+  Tier 0: warmup
+  Tier 1: beginner
+  Tier 2: intermediate
+  Tier 3: advanced
+  Tier 4: expert
+Graduation rules:
+  - 70% rolling success rate over recent episodes → advance one tier
+  - 90% success rate after at least 3 episodes → fast-track (skip one tier)
+  - Max tier is 4 (expert); cannot advance beyond.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Sequence
+import numpy as np
+from models import ScenarioConfig
+from server.curriculum.scenarios import (
+    AUTOIMMUNE_BIOLOGIC,
+    CNS_DEPRESSION,
+    RARE_DISEASE_ORPHAN,
+    SOLID_TUMOR_CHEMO,
+    WARMUP,
+)
+# ── Constants ────────────────────────────────────────────────────────────────
+MIN_TIER: int = 0
+MAX_TIER: int = 4
+MASTERY_THRESHOLD: float = 0.70  # 70% rolling success → graduate
+FAST_TRACK_THRESHOLD: float = 0.90  # 90% success after ≥3 episodes → skip tier
+FAST_TRACK_MIN_EPISODES: int = 3
+# Rolling window size for success-rate calculation
+ROLLING_WINDOW: int = 10
+# Tier → ScenarioConfig mapping (one canonical scenario per tier)
+_TIER_SCENARIO: dict[int, ScenarioConfig] = {
+    0: WARMUP,
+    1: SOLID_TUMOR_CHEMO,
+    2: AUTOIMMUNE_BIOLOGIC,
+    3: CNS_DEPRESSION,
+    4: RARE_DISEASE_ORPHAN,
+}
+TIER_NAMES: dict[int, str] = {
+    0: "warmup",
+    1: "beginner",
+    2: "intermediate",
+    3: "advanced",
+    4: "expert",
+}
+# ── EpisodeMetrics ────────────────────────────────────────────────────────────
+@dataclass
+class EpisodeMetrics:
+    """Performance metrics for a completed episode.
+    Attributes:
+        success: Whether the episode ended in a successful trial outcome.
+        episode_history: Rolling list of recent success booleans (most recent
+            episode appended last).  The controller uses the last
+            ``ROLLING_WINDOW`` entries to compute the rolling success rate.
+            Callers should append the current episode's ``success`` value
+            *before* passing this object to ``advance_curriculum``.
+    """
+    success: bool
+    episode_history: list[bool] = field(default_factory=list)
+# ── Public API ────────────────────────────────────────────────────────────────
+def advance_curriculum(tier: int, metrics: EpisodeMetrics) -> int:
+    """Return the updated curriculum tier after evaluating episode metrics.
+    Args:
+        tier: Current curriculum tier (0–4).
+        metrics: Performance metrics for the just-completed episode.
+            ``metrics.episode_history`` must already include the current
+            episode's success value as its last element.
+    Returns:
+        The new curriculum tier.  May be the same tier (not yet mastered),
+        ``tier + 1`` (normal graduation), or ``tier + 2`` (fast-track skip).
+        Never exceeds ``MAX_TIER``.
+    """
+    if tier >= MAX_TIER:
+        return MAX_TIER
+    history: Sequence[bool] = metrics.episode_history
+    n_episodes = len(history)
+    if n_episodes == 0:
+        return tier
+    # Use the most recent ROLLING_WINDOW episodes for the rolling rate
+    window = list(history[-ROLLING_WINDOW:])
+    rolling_rate = sum(window) / len(window)
+    # Fast-track: 90%+ success after at least 3 episodes → skip one tier
+    if n_episodes >= FAST_TRACK_MIN_EPISODES and rolling_rate >= FAST_TRACK_THRESHOLD:
+        new_tier = min(tier + 2, MAX_TIER)
+        return new_tier
+    # Normal graduation: 70%+ rolling success → advance one tier
+    if rolling_rate >= MASTERY_THRESHOLD:
+        return min(tier + 1, MAX_TIER)
+    return tier
+def select_scenario(tier: int, rng: np.random.Generator) -> ScenarioConfig:
+    """Select a ScenarioConfig appropriate for the given curriculum tier.
+    At tier 0 (warmup) the solid_tumor_chemo scenario is returned with an
+    inflated effect size (already encoded in the WARMUP ScenarioConfig).
+    Args:
+        tier: Current curriculum tier (0–4).  Values outside [0, 4] are
+            clamped to the valid range.
+        rng: A seeded ``numpy.random.Generator`` used for any stochastic
+            selection.  Currently each tier maps to exactly one scenario, so
+            ``rng`` is accepted for API consistency and future extensibility
+            (e.g. sampling from a pool of scenarios at the same tier).
+    Returns:
+        The ``ScenarioConfig`` for the given tier.
+    """
+    clamped_tier = max(MIN_TIER, min(tier, MAX_TIER))
+    return _TIER_SCENARIO[clamped_tier]

server/curriculum/scenarios.py ADDED Viewed

	@@ -0,0 +1,116 @@

+"""
+Scenario registry for the curriculum controller.
+Defines ScenarioConfig instances for all four scenario IDs plus a tier-0 warmup
+variant of solid_tumor_chemo with an inflated effect size.
+"""
+from models import ScenarioConfig
+# Tier 0 — warmup (solid_tumor_chemo with inflated effect size, easier)
+WARMUP = ScenarioConfig(
+    scenario_id="solid_tumor_chemo_warmup",
+    curriculum_tier=0,
+    disease_area="oncology",
+    effect_size_range=(0.55, 0.85),  # inflated vs tier-1 (0.25–0.55)
+    side_effect_rate_range=(0.10, 0.25),
+    placebo_response_range=(0.05, 0.15),
+    dropout_rate_range=(0.05, 0.10),
+    budget_usd=8_000_000.0,
+    time_budget_days=365,
+    min_sample_size=60,
+    description=(
+        "Warmup scenario: EGFR+ solid-tumour chemotherapy with an inflated "
+        "effect size to help the agent learn basic trial-design mechanics."
+    ),
+)
+# Tier 1 — EGFR+ subgroup enrichment
+SOLID_TUMOR_CHEMO = ScenarioConfig(
+    scenario_id="solid_tumor_chemo",
+    curriculum_tier=1,
+    disease_area="oncology",
+    effect_size_range=(0.25, 0.55),
+    side_effect_rate_range=(0.15, 0.35),
+    placebo_response_range=(0.05, 0.15),
+    dropout_rate_range=(0.05, 0.15),
+    budget_usd=10_000_000.0,
+    time_budget_days=540,
+    min_sample_size=80,
+    description=(
+        "EGFR+ solid-tumour chemotherapy. Agent must identify the EGFR+ "
+        "biomarker subgroup to unlock the true effect size."
+    ),
+)
+# Tier 2 — U-shaped dose-response
+AUTOIMMUNE_BIOLOGIC = ScenarioConfig(
+    scenario_id="autoimmune_biologic",
+    curriculum_tier=2,
+    disease_area="immunology",
+    effect_size_range=(0.20, 0.45),
+    side_effect_rate_range=(0.10, 0.30),
+    placebo_response_range=(0.15, 0.30),
+    dropout_rate_range=(0.08, 0.18),
+    budget_usd=15_000_000.0,
+    time_budget_days=720,
+    min_sample_size=120,
+    description=(
+        "Autoimmune biologic with a U-shaped dose-response curve. "
+        "Agent must run dose-escalation to find the optimal dose window."
+    ),
+)
+# Tier 3 — high placebo response
+CNS_DEPRESSION = ScenarioConfig(
+    scenario_id="cns_depression",
+    curriculum_tier=3,
+    disease_area="psychiatry",
+    effect_size_range=(0.15, 0.35),
+    side_effect_rate_range=(0.10, 0.25),
+    placebo_response_range=(0.35, 0.55),  # high placebo response
+    dropout_rate_range=(0.10, 0.25),
+    budget_usd=20_000_000.0,
+    time_budget_days=900,
+    min_sample_size=200,
+    description=(
+        "CNS depression trial with a high placebo-response rate. "
+        "Agent must power the study to detect a small drug-placebo delta."
+    ),
+)
+# Tier 4 — rare disease / tiny n
+RARE_DISEASE_ORPHAN = ScenarioConfig(
+    scenario_id="rare_disease_orphan",
+    curriculum_tier=4,
+    disease_area="rare_disease",
+    effect_size_range=(0.40, 0.80),  # larger effect needed to compensate tiny n
+    side_effect_rate_range=(0.05, 0.20),
+    placebo_response_range=(0.05, 0.15),
+    dropout_rate_range=(0.05, 0.15),
+    budget_usd=5_000_000.0,
+    time_budget_days=1080,
+    min_sample_size=10,  # tiny n — orphan disease
+    description=(
+        "Rare-disease orphan drug trial with a very small patient population. "
+        "Agent must justify statistical validity under FDA orphan-drug rules."
+    ),
+)
+# Registry — keyed by scenario_id for O(1) lookup
+SCENARIOS: dict[str, ScenarioConfig] = {
+    WARMUP.scenario_id: WARMUP,
+    SOLID_TUMOR_CHEMO.scenario_id: SOLID_TUMOR_CHEMO,
+    AUTOIMMUNE_BIOLOGIC.scenario_id: AUTOIMMUNE_BIOLOGIC,
+    CNS_DEPRESSION.scenario_id: CNS_DEPRESSION,
+    RARE_DISEASE_ORPHAN.scenario_id: RARE_DISEASE_ORPHAN,
+}
+# Convenience list ordered by tier
+SCENARIO_LIST: list[ScenarioConfig] = [
+    WARMUP,
+    SOLID_TUMOR_CHEMO,
+    AUTOIMMUNE_BIOLOGIC,
+    CNS_DEPRESSION,
+    RARE_DISEASE_ORPHAN,
+]

server/episode_manager.py CHANGED Viewed

@@ -10,38 +10,45 @@ from __future__ import annotations
 import random
 import uuid
 from models import (
     RewardBreakdown,
     ScenarioConfig,
     TrialAction,
     TrialLatentState,
     TrialObservation,
-    TrialResult,
     TrialState,
 )
 from server.logger import EpisodeLogger
 from server.noise_model import NoiseModel
 from server.rules.fda_rules import check_fda_compliance
-# Default scenario used until CurriculumController is fully wired (Push 3).
-_DEFAULT_SCENARIO = ScenarioConfig(
-    scenario_id="solid_tumor_chemo",
-    curriculum_tier=0,
-    disease_area="NSCLC",
-    effect_size_range=(0.3, 0.7),
-    side_effect_rate_range=(0.05, 0.20),
-    placebo_response_range=(0.10, 0.25),
-    dropout_rate_range=(0.05, 0.15),
-    budget_usd=1_000_000.0,
-    time_budget_days=365,
-    min_sample_size=100,
-    description="Solid tumor chemotherapy — find EGFR+ subgroup",
-)
 _MAX_STEPS = 100
 class EpisodeManager:
     """Orchestrates the reset/step lifecycle for a single clinical trial episode.
@@ -58,22 +65,35 @@ class EpisodeManager:
         self._episode_id: str = ""
         self._difficulty: float = 0.0
         self._scenario: ScenarioConfig | None = None
     # ------------------------------------------------------------------
     # Public API
     # ------------------------------------------------------------------
     def reset(self, seed: int | None = None) -> TrialObservation:
-        """Initialize a new episode and return the initial TrialObservation."""
         resolved_seed = seed if seed is not None else random.randint(0, 2**31 - 1)
         self._episode_id = str(uuid.uuid4())
-        # Step 1: Select scenario (stub — CurriculumController wired in Push 3)
-        scenario = _DEFAULT_SCENARIO
         self._scenario = scenario
-        # Step 2: Apply domain randomization via NoiseModel (req 9.1, 9.2)
         noise_model = NoiseModel(seed=resolved_seed)
         randomized = noise_model.randomize(scenario)
         # Sample concrete hidden values from randomized ranges
@@ -109,6 +129,7 @@ class EpisodeManager:
             protocol_submitted=False,
             interim_complete=False,
             trial_complete=False,
             episode_phase="literature_review",
             action_history=[],
             seed=resolved_seed,
@@ -117,26 +138,55 @@ class EpisodeManager:
         # Step 4: Build lightweight TrialState for training loop
         self._state = self._state_from_latent(self._latent, randomized)
         self._clear_cache()
-        # Step 5: Fresh logger and reward accumulator
         self._logger = EpisodeLogger(
-            curriculum_tier=randomized.curriculum_tier
         )
         self._total_reward = 0.0
-        self._difficulty = 0.0
-        return self._observation_from_latent(self._latent, randomized)
     def step(
         self, action: TrialAction
     ) -> tuple[TrialObservation, RewardBreakdown, bool, dict]:
-        """Advance the episode by one step."""
         if self._latent is None or self._scenario is None:
             raise RuntimeError("No active episode. Call reset() before step().")
         try:
-            # Check FDA compliance against latent state (req 10.1, 10.4)
             compliance = check_fda_compliance(action, self._latent)
             if not compliance.valid:
@@ -146,73 +196,116 @@ class EpisodeManager:
                     r_info_gain=0.0,
                     r_efficiency=0.0,
                     r_novelty=0.0,
-                    r_penalty=0.0,
                     r_terminal_success=0.0,
                     r_terminal_calibration=0.0,
                 )
                 done = False
                 info: dict = {
-                    "step_index": len(self._latent.action_history),
                     "action_valid": False,
                     "violations": compliance.violations,
                 }
-                obs = self._observation_from_latent(
-                    self._latent,
-                    self._scenario,
                     rule_violations=compliance.violations,
                 )
                 if self._logger is not None:
-                    self._logger.log_step(
-                        len(self._latent.action_history), action, obs, reward, done
-                    )
                 return obs, reward, done, info
-            # Valid action: advance latent state
-            self._latent = self._latent.model_copy(
-                update={
-                    "action_history": (
-                        self._latent.action_history + [action.action_type.value]
-                    ),
-                }
             )
-            # Stub TrialResult
-            _result = TrialResult(
-                p_value=0.05,
-                success=False,
-                power=0.8,
-                adverse_event_rate=0.1,
-                confidence_interval=(0.0, 1.0),
-                failure_reason=None,
-            )
-            # Stub reward
-            reward = RewardBreakdown(
-                r_validity=0.0,
-                r_ordering=0.0,
-                r_info_gain=0.0,
-                r_efficiency=0.0,
-                r_novelty=0.0,
-                r_penalty=0.0,
-                r_terminal_success=0.0,
-                r_terminal_calibration=0.0,
             )
             step_idx = len(self._latent.action_history)
             done = step_idx >= _MAX_STEPS or self._latent.trial_complete
-            info = {"step_index": step_idx, "action_valid": True}
-            obs = self._observation_from_latent(self._latent, self._scenario)
-            # Update training-loop TrialState
-            self._state = self._state_from_latent(self._latent, self._scenario)
-            # Accumulate reward and log step (req 7.1)
-            self._total_reward += sum(reward.model_dump().values())
             if self._logger is not None:
                 self._logger.log_step(step_idx, action, obs, reward, done)
-            # Log summary on episode end (req 7.2)
             if done and self._logger is not None:
                 self._logger.log_summary(
                     scenario_id=self._scenario.scenario_id,
@@ -223,11 +316,22 @@ class EpisodeManager:
                     ),
                 )
             return obs, reward, done, info
         except RuntimeError:
             raise
-        except Exception as exc:  # req 10.4: no unhandled exceptions
             reward = RewardBreakdown(
                 r_validity=-1.0,
                 r_ordering=0.0,
@@ -244,10 +348,50 @@ class EpisodeManager:
                 "action_valid": False,
                 "violations": [f"Internal error: {exc}"],
             }
-            obs = self._observation_from_latent(
-                self._latent,
-                self._scenario,
-                rule_violations=[f"Internal error: {exc}"],
             )
             return obs, reward, False, info
@@ -276,9 +420,20 @@ class EpisodeManager:
         """Build the lightweight TrialState from latent state."""
         step_count = len(latent.action_history)
         unique_actions = len(set(latent.action_history))
-        action_diversity = (
-            unique_actions / step_count if step_count > 0 else 0.0
-        )
         return TrialState(
             episode_id=self._episode_id,
             step_count=step_count,
@@ -287,37 +442,6 @@ class EpisodeManager:
             curriculum_tier=str(scenario.curriculum_tier),
             curriculum_stats={},
             action_diversity=action_diversity,
-            phase_compliance_rate=0.0,  # wired in Push 3 with PhaseDetector
             is_resolved=latent.trial_complete,
         )
-    def _observation_from_latent(
-        self,
-        latent: TrialLatentState,
-        scenario: ScenarioConfig,
-        rule_violations: list[str] | None = None,
-    ) -> TrialObservation:
-        """Build a TrialObservation from latent state — noisy, agent-facing."""
-        return TrialObservation(
-            scenario_description=scenario.description,
-            phase_data={
-                "episode_phase": latent.episode_phase,
-                "observed_effect_estimate": None,
-                "observed_side_effect_rate": None,
-                "phase_i_complete": latent.phase_i_complete,
-                "interim_complete": latent.interim_complete,
-                "protocol_submitted": latent.protocol_submitted,
-            },
-            resource_status={
-                "budget_remaining": latent.budget_remaining,
-                "time_remaining_days": latent.time_remaining_days,
-                "patients_enrolled": latent.patients_enrolled,
-            },
-            rule_violations=rule_violations or [],
-            available_actions=[],  # wired in Push 3 with TransitionEngine
-            steps_taken=len(latent.action_history),
-            max_steps=_MAX_STEPS,
-            hint="",  # populated by TrialJudge at junior difficulty (Push 3)
-            done=latent.trial_complete,
-            reward=0.0,  # filled in by step() after reward computation
-        )

 import random
 import uuid
+from datetime import datetime, timezone
+import numpy as np
 from models import (
+    EpisodeTranscript,
     RewardBreakdown,
     ScenarioConfig,
     TrialAction,
     TrialLatentState,
     TrialObservation,
     TrialState,
 )
+from server.curriculum.controller import select_scenario
+from server.judge import TrialJudge
 from server.logger import EpisodeLogger
 from server.noise_model import NoiseModel
+from server.phase_detector import detect_phase
+from server.reward.reward_computer import compute_reward
 from server.rules.fda_rules import check_fda_compliance
+from server.simulator.output_generator import OutputGenerator
+from server.simulator.transition_engine import TransitionEngine
+from server.simulator.trial_simulator import simulate_trial
 _MAX_STEPS = 100
+def _phase_order_correct_at(phase: str, prior_history: list[str]) -> bool:
+    """Return True if `phase` is a valid next phase given `prior_history`."""
+    from server.phase_detector import PHASE_ORDER
+    if not prior_history:
+        return True
+    last = prior_history[-1]
+    last_idx = PHASE_ORDER.index(last) if last in PHASE_ORDER else 0
+    current_idx = PHASE_ORDER.index(phase) if phase in PHASE_ORDER else 0
+    return current_idx >= last_idx and (current_idx - last_idx) <= 1
 class EpisodeManager:
     """Orchestrates the reset/step lifecycle for a single clinical trial episode.
         self._episode_id: str = ""
         self._difficulty: float = 0.0
         self._scenario: ScenarioConfig | None = None
+        self._phase_history: list[str] = []
+        self._noise_model: NoiseModel | None = None
+        self._curriculum_tier: int = 0
+        self._transition_engine: TransitionEngine = TransitionEngine()
+        self._judge: TrialJudge = TrialJudge()
     # ------------------------------------------------------------------
     # Public API
     # ------------------------------------------------------------------
     def reset(self, seed: int | None = None) -> TrialObservation:
+        """Initialize a new episode and return the initial TrialObservation.
+        Seeded resets are reproducible: same seed → same scenario selection
+        and initial TrialLatentState (Req 8.5, 9.4).
+        """
         resolved_seed = seed if seed is not None else random.randint(0, 2**31 - 1)
         self._episode_id = str(uuid.uuid4())
+        # Step 1: Select scenario via CurriculumController (Req 8.3, 8.5)
+        # Use a seeded RNG so scenario selection is reproducible for same seed.
+        scenario_rng = np.random.default_rng(resolved_seed)
+        scenario = select_scenario(self._curriculum_tier, scenario_rng)
         self._scenario = scenario
+        # Step 2: Apply domain randomization via NoiseModel (Req 9.1, 9.2)
+        # NoiseModel is seeded so same seed → same randomized config.
         noise_model = NoiseModel(seed=resolved_seed)
+        self._noise_model = noise_model
         randomized = noise_model.randomize(scenario)
         # Sample concrete hidden values from randomized ranges
             protocol_submitted=False,
             interim_complete=False,
             trial_complete=False,
+            adverse_events=0,
             episode_phase="literature_review",
             action_history=[],
             seed=resolved_seed,
         # Step 4: Build lightweight TrialState for training loop
         self._state = self._state_from_latent(self._latent, randomized)
+        # Step 5: Clear power cache (Req 14.3)
         self._clear_cache()
+        self._phase_history = []
+        # Step 6: Fresh logger (episode_id matches this episode), reward accumulator
         self._logger = EpisodeLogger(
+            episode_id=self._episode_id,
+            curriculum_tier=randomized.curriculum_tier,
         )
         self._total_reward = 0.0
+        # Difficulty scales linearly with curriculum tier: tier 0 → 0.0, tier 4 → 1.0
+        self._difficulty = scenario.curriculum_tier / 4.0
+        # Step 7: Return initial TrialObservation via OutputGenerator
+        output_gen = OutputGenerator(noise_model)
+        return output_gen.generate(
+            latent=self._latent,
+            trial_state=self._state,
+            steps_taken=0,
+            max_steps=_MAX_STEPS,
+            rule_violations=[],
+            done=False,
+            reward=0.0,
+            scenario_description=scenario.description,
+            hint="",
+        )
     def step(
         self, action: TrialAction
     ) -> tuple[TrialObservation, RewardBreakdown, bool, dict]:
+        """Advance the episode by one step.
+        Full pipeline (Req 8.5, 9.4, 7.1):
+          1. Validate active episode
+          2. check_fda_compliance → ComplianceResult
+          3. TransitionEngine.apply_transition() mutates TrialLatentState
+          4. OutputGenerator.generate() produces noisy TrialObservation
+          5. compute_reward() → RewardBreakdown
+          6. PhaseDetector.detect_phase() classifies action
+          7. TrialJudge.verify() for hint/feedback
+          8. Check terminal condition
+          9. Log full EpisodeTranscript to JSONL
+          10. Return (obs, reward_breakdown, done, info)
+        """
         if self._latent is None or self._scenario is None:
             raise RuntimeError("No active episode. Call reset() before step().")
         try:
+            # Step 1: Check FDA compliance (read-only, does not mutate state)
             compliance = check_fda_compliance(action, self._latent)
             if not compliance.valid:
                     r_info_gain=0.0,
                     r_efficiency=0.0,
                     r_novelty=0.0,
+                    r_penalty=-0.5 * len(compliance.violations),
                     r_terminal_success=0.0,
                     r_terminal_calibration=0.0,
                 )
                 done = False
+                step_idx = len(self._latent.action_history)
                 info: dict = {
+                    "step_index": step_idx,
                     "action_valid": False,
                     "violations": compliance.violations,
                 }
+                # Build observation without mutating latent
+                noise_model = self._noise_model or NoiseModel(seed=self._latent.seed)
+                output_gen = OutputGenerator(noise_model)
+                obs = output_gen.generate(
+                    latent=self._latent,
+                    trial_state=self._state
+                    or self._state_from_latent(self._latent, self._scenario),
+                    steps_taken=step_idx,
+                    max_steps=_MAX_STEPS,
                     rule_violations=compliance.violations,
+                    done=False,
+                    reward=reward.total,
+                    scenario_description=self._scenario.description,
+                    hint="",
                 )
+                # Log invalid step
                 if self._logger is not None:
+                    self._logger.log_step(step_idx, action, obs, reward, done)
                 return obs, reward, done, info
+            # Step 2: TransitionEngine mutates TrialLatentState
+            updated_latent = self._transition_engine.apply_transition(
+                self._latent, action
             )
+            self._latent = updated_latent
+            # Step 3: Detect phase and update phase history
+            phase_name, phase_order_correct = detect_phase(action, self._phase_history)
+            self._phase_history = self._phase_history + [phase_name]
+            # Step 4: Simulate trial result for reward computation
+            result = simulate_trial(self._latent, action)
+            # Step 5: Compute reward (all 8 components)
+            reward = compute_reward(
+                action=action,
+                latent=self._latent,
+                result=result,
+                phase_history=self._phase_history[:-1],  # history before this step
             )
+            # Step 6: TrialJudge verification (hint + overconfidence penalty)
+            self._state = self._state_from_latent(self._latent, self._scenario)
+            judge_result = self._judge.verify(action, self._state, self._latent)
+            hint = judge_result.hint or ""
+            # Apply overconfidence penalty to r_penalty
+            if judge_result.overconfidence_penalty != 0.0:
+                reward = reward.model_copy(
+                    update={
+                        "r_penalty": (
+                            reward.r_penalty + judge_result.overconfidence_penalty
+                        )
+                    }
+                )
+            # Step 7: Check terminal condition
             step_idx = len(self._latent.action_history)
             done = step_idx >= _MAX_STEPS or self._latent.trial_complete
+            # Step 8: Generate noisy observation via OutputGenerator
+            noise_model = self._noise_model or NoiseModel(seed=self._latent.seed)
+            output_gen = OutputGenerator(noise_model)
+            obs = output_gen.generate(
+                latent=self._latent,
+                trial_state=self._state,
+                steps_taken=step_idx,
+                max_steps=_MAX_STEPS,
+                rule_violations=[],
+                done=done,
+                reward=reward.total,
+                scenario_description=self._scenario.description,
+                hint=hint,
+            )
+            # Step 9: Accumulate total reward
+            self._total_reward += reward.total
+            # Step 10: Log full EpisodeTranscript record to JSONL (Req 7.1)
+            transcript = EpisodeTranscript(
+                episode_id=self._episode_id,
+                step=step_idx,
+                action=action,
+                observation=obs,
+                reward_breakdown=reward.model_dump(),
+                total_reward=reward.total,
+                phase_detected=phase_name,
+                phase_order_correct=phase_order_correct,
+                hidden_state_snapshot=self._latent,
+                timestamp=datetime.now(timezone.utc).isoformat(),
+            )
             if self._logger is not None:
                 self._logger.log_step(step_idx, action, obs, reward, done)
+                # Also write the full EpisodeTranscript as a separate JSONL record
+                self._logger._append_jsonl(
+                    {"type": "transcript", **transcript.model_dump(mode="json")}
+                )
+            # Log summary on episode end (Req 7.2)
             if done and self._logger is not None:
                 self._logger.log_summary(
                     scenario_id=self._scenario.scenario_id,
                     ),
                 )
+            info = {
+                "step_index": step_idx,
+                "action_valid": True,
+                "phase_detected": phase_name,
+                "phase_order_correct": phase_order_correct,
+                "judge_passed": judge_result.passed,
+                "judge_feedback": judge_result.feedback,
+                "judge_hint": hint,
+                "overconfidence_penalty": judge_result.overconfidence_penalty,
+            }
             return obs, reward, done, info
         except RuntimeError:
             raise
+        except Exception as exc:  # Req 10.4: no unhandled exceptions
             reward = RewardBreakdown(
                 r_validity=-1.0,
                 r_ordering=0.0,
                 "action_valid": False,
                 "violations": [f"Internal error: {exc}"],
             }
+            noise_model = self._noise_model or NoiseModel(
+                seed=self._latent.seed if self._latent else 0
+            )
+            output_gen = OutputGenerator(noise_model)
+            obs = (
+                output_gen.generate(
+                    latent=self._latent,
+                    trial_state=self._state
+                    or TrialState(
+                        episode_id=self._episode_id,
+                        step_count=step_idx,
+                        difficulty=self._difficulty,
+                        scenario_id=self._scenario.scenario_id
+                        if self._scenario
+                        else "",
+                        curriculum_tier="0",
+                        curriculum_stats={},
+                        action_diversity=0.0,
+                        phase_compliance_rate=0.0,
+                        is_resolved=False,
+                    ),
+                    steps_taken=step_idx,
+                    max_steps=_MAX_STEPS,
+                    rule_violations=[f"Internal error: {exc}"],
+                    done=False,
+                    reward=reward.total,
+                    scenario_description=(
+                        self._scenario.description if self._scenario else ""
+                    ),
+                    hint="",
+                )
+                if self._latent is not None
+                else TrialObservation(
+                    scenario_description="",
+                    phase_data={},
+                    resource_status={},
+                    rule_violations=[f"Internal error: {exc}"],
+                    available_actions=[],
+                    steps_taken=step_idx,
+                    max_steps=_MAX_STEPS,
+                    hint="",
+                    done=False,
+                    reward=0.0,
+                )
             )
             return obs, reward, False, info
         """Build the lightweight TrialState from latent state."""
         step_count = len(latent.action_history)
         unique_actions = len(set(latent.action_history))
+        action_diversity = unique_actions / step_count if step_count > 0 else 0.0
+        # Compute phase compliance rate from phase history
+        phase_steps = len(self._phase_history)
+        if phase_steps > 0:
+            correct_count = sum(
+                1
+                for i, ph in enumerate(self._phase_history)
+                if _phase_order_correct_at(ph, self._phase_history[:i])
+            )
+            phase_compliance_rate = correct_count / phase_steps
+        else:
+            phase_compliance_rate = 0.0
         return TrialState(
             episode_id=self._episode_id,
             step_count=step_count,
             curriculum_tier=str(scenario.curriculum_tier),
             curriculum_stats={},
             action_diversity=action_diversity,
+            phase_compliance_rate=phase_compliance_rate,
             is_resolved=latent.trial_complete,
         )

server/judge.py ADDED Viewed

	@@ -0,0 +1,277 @@

+"""
+Trial Judge — multi-layer verification for clinical trial design decisions.
+Layer 1 (programmatic, authoritative, never overridden):
+  - power >= 0.80
+  - p_value < 0.05
+  - FDA compliance passes
+  - budget_remaining > 0
+Layer 2 (persona-scaled LLM stub):
+  - junior  (difficulty < 0.4): gives hints, lenient feedback
+  - senior  (0.4–0.7):          balanced feedback
+  - principal (> 0.7):          strict, no hints
+Overconfidence penalty: -0.5 per high-confidence wrong claim
+(action.confidence >= 0.8 and the claim is incorrect per Layer 1).
+"""
+from __future__ import annotations
+from pydantic import BaseModel
+from models import TrialAction, TrialLatentState, TrialState
+from server.rules.fda_rules import check_fda_compliance
+from server.simulator.power_calculator import calculate_power
+# ---------------------------------------------------------------------------
+# Result model
+# ---------------------------------------------------------------------------
+class JudgeResult(BaseModel):
+    """Output of TrialJudge.verify()."""
+    passed: bool
+    violations: list[str]
+    feedback: str
+    hint: str | None
+    overconfidence_penalty: float
+    persona: str
+# ---------------------------------------------------------------------------
+# Persona thresholds
+# ---------------------------------------------------------------------------
+_JUNIOR_MAX = 0.4
+_SENIOR_MAX = 0.7
+_HIGH_CONFIDENCE_THRESHOLD = 0.8
+_OVERCONFIDENCE_PENALTY = -0.5
+def _select_persona(difficulty: float) -> str:
+    if difficulty < _JUNIOR_MAX:
+        return "junior"
+    if difficulty <= _SENIOR_MAX:
+        return "senior"
+    return "principal"
+# ---------------------------------------------------------------------------
+# Layer 2: rule-based LLM stub
+# ---------------------------------------------------------------------------
+def _generate_feedback(
+    persona: str,
+    violations: list[str],
+    passed: bool,
+    action: TrialAction,
+    latent: TrialLatentState,
+) -> tuple[str, str | None]:
+    """Return (feedback, hint) for the given persona.
+    This is a rule-based stub that can be replaced with a real LLM call later.
+    The stub generates contextually appropriate strings without an LLM.
+    """
+    action_name = action.action_type.value.replace("_", " ")
+    if passed:
+        if persona == "junior":
+            feedback = (
+                f"Good work on '{action_name}'! Your trial design looks solid. "
+                f"Power and significance thresholds are met. Keep it up!"
+            )
+            hint = (
+                "Tip: continue building on this foundation — "
+                "consider biomarker stratification next to improve precision."
+            )
+        elif persona == "senior":
+            feedback = (
+                f"'{action_name}' passes all programmatic checks. "
+                f"Statistical power and p-value criteria are satisfied. "
+                f"Proceed to the next design step."
+            )
+            hint = None
+        else:  # principal
+            feedback = (
+                f"'{action_name}' meets minimum criteria. "
+                f"Ensure alpha-spending and interim analysis boundaries "
+                f"are pre-specified before submission."
+            )
+            hint = None
+    else:
+        violation_summary = "; ".join(violations) if violations else "unknown issue"
+        if persona == "junior":
+            feedback = (
+                f"'{action_name}' did not pass verification. "
+                f"Issues found: {violation_summary}. "
+                f"Review the requirements and try again."
+            )
+            hint = _build_hint_for_violations(violations, latent)
+        elif persona == "senior":
+            feedback = (
+                f"'{action_name}' failed verification. "
+                f"Violations: {violation_summary}. "
+                f"Address these before proceeding."
+            )
+            hint = None
+        else:  # principal
+            feedback = (
+                f"'{action_name}' is non-compliant. "
+                f"Violations: {violation_summary}. "
+                f"No further guidance will be provided — resolve independently."
+            )
+            hint = None
+    return feedback, hint
+def _build_hint_for_violations(
+    violations: list[str], latent: TrialLatentState
+) -> str | None:
+    """Build a contextual hint for junior persona based on violation content."""
+    if not violations:
+        return None
+    first = violations[0].lower()
+    if "power" in first:
+        return (
+            "Hint: current power is below 0.80. "
+            "Try increasing the sample size — "
+            "more patients enrolled improves statistical power."
+        )
+    if "p-value" in first or "p_value" in first or "significance" in first:
+        return (
+            "Hint: the p-value threshold of 0.05 is not met. "
+            "Consider a larger effect size or more patients."
+        )
+    if "budget" in first:
+        return (
+            f"Hint: budget is exhausted (remaining: {latent.budget_remaining:.2f}). "
+            f"Look for cost-saving measures or request a protocol amendment."
+        )
+    if "fda" in first or "compliance" in first or "permitted" in first:
+        return (
+            f"Hint: this action is not allowed in the current phase "
+            f"('{latent.episode_phase}'). "
+            f"Check the transition table for permitted actions."
+        )
+    if "sample size" in first:
+        return "Hint: the minimum regulatory sample size is 30 participants."
+    if "protocol" in first:
+        return "Hint: submit the protocol before attempting FDA review."
+    if "phase i" in first:
+        return "Hint: complete Phase I before submitting to FDA review."
+    if "interim" in first:
+        return "Hint: run an interim analysis before the primary analysis."
+    if "patients" in first or "enrolled" in first:
+        return "Hint: enroll patients before running analyses."
+    # Generic fallback
+    return f"Hint: {violations[0]}"
+# ---------------------------------------------------------------------------
+# Main judge class
+# ---------------------------------------------------------------------------
+class TrialJudge:
+    """Multi-layer trial design verifier.
+    Layer 1 is programmatic and authoritative — its result is never overridden.
+    Layer 2 is persona-scaled and provides human-readable feedback and hints.
+    """
+    def verify(
+        self,
+        action: TrialAction,
+        state: TrialState,
+        latent: TrialLatentState,
+    ) -> JudgeResult:
+        """Verify the action against both programmatic and persona layers.
+        Args:
+            action:  The agent's action to evaluate.
+            state:   Lightweight training-loop metadata (carries difficulty).
+            latent:  Hidden ground-truth + episode tracking state.
+        Returns:
+            JudgeResult with pass/fail, violations, feedback, hint, and penalty.
+        """
+        violations: list[str] = []
+        # ------------------------------------------------------------------
+        # Layer 1: Programmatic checks (authoritative, never overridden)
+        # ------------------------------------------------------------------
+        # 1a. Budget check
+        if latent.budget_remaining <= 0:
+            violations.append(
+                f"Budget exhausted: budget_remaining={latent.budget_remaining:.2f} "
+                f"(must be > 0)."
+            )
+        # 1b. Statistical power check
+        n = max(latent.patients_enrolled, 1)
+        power = calculate_power(latent.true_effect_size, n)
+        if power < 0.80:
+            violations.append(
+                f"Insufficient statistical power: {power:.3f} < 0.80 "
+                f"(effect_size={latent.true_effect_size:.3f}, n={n})."
+            )
+        # 1c. p-value check — derive from power/effect/n
+        #     We use the same normal approximation as the simulator.
+        import math
+        from scipy.stats import norm
+        if n > 0 and latent.true_effect_size != 0.0:
+            n_per_arm = n / 2.0
+            se = 1.0 / math.sqrt(n_per_arm) if n_per_arm > 0 else 1.0
+            z_stat = latent.true_effect_size / se
+            p_value = float(2.0 * norm.sf(abs(z_stat)))
+        else:
+            p_value = 1.0
+        if p_value >= 0.05:
+            violations.append(
+                f"p-value not significant: {p_value:.4f} >= 0.05 "
+                f"(n={n}, effect_size={latent.true_effect_size:.3f})."
+            )
+        # 1d. FDA compliance check
+        compliance = check_fda_compliance(action, latent)
+        if not compliance.valid:
+            violations.extend(compliance.violations)
+        passed = len(violations) == 0
+        # ------------------------------------------------------------------
+        # Overconfidence penalty
+        # ------------------------------------------------------------------
+        # A "high-confidence wrong claim" is when the agent's confidence is
+        # >= 0.8 but Layer 1 found violations (the claim is incorrect).
+        overconfidence_penalty = 0.0
+        if not passed and action.confidence >= _HIGH_CONFIDENCE_THRESHOLD:
+            # One penalty per violation that was caused by a wrong claim
+            overconfidence_penalty = _OVERCONFIDENCE_PENALTY * len(violations)
+        # ------------------------------------------------------------------
+        # Layer 2: Persona-scaled feedback (never overrides Layer 1 result)
+        # ------------------------------------------------------------------
+        persona = _select_persona(state.difficulty)
+        feedback, hint = _generate_feedback(persona, violations, passed, action, latent)
+        return JudgeResult(
+            passed=passed,
+            violations=violations,
+            feedback=feedback,
+            hint=hint,
+            overconfidence_penalty=overconfidence_penalty,
+            persona=persona,
+        )

server/logger.py CHANGED Viewed

@@ -31,9 +31,7 @@ class EpisodeLogger:
         episode_id: str | None = None,
         curriculum_tier: int = 0,
     ) -> None:
-        self._log_path: Path = (
-            log_path if log_path is not None else settings.log_path
-        )
         self._episode_id: str = (
             episode_id if episode_id is not None else str(uuid.uuid4())
         )

         episode_id: str | None = None,
         curriculum_tier: int = 0,
     ) -> None:
+        self._log_path: Path = log_path if log_path is not None else settings.log_path
         self._episode_id: str = (
             episode_id if episode_id is not None else str(uuid.uuid4())
         )

server/noise_model.py CHANGED Viewed

@@ -36,6 +36,11 @@ class NoiseModel:
         self._seed = seed
         self._rng: np.random.Generator = np.random.default_rng(seed)
     def randomize(self, config: ScenarioConfig) -> ScenarioConfig:
         """Return a new ScenarioConfig with domain-randomized parameters.

         self._seed = seed
         self._rng: np.random.Generator = np.random.default_rng(seed)
+    @property
+    def rng(self) -> np.random.Generator:
+        """Public access to the seeded Generator."""
+        return self._rng
     def randomize(self, config: ScenarioConfig) -> ScenarioConfig:
         """Return a new ScenarioConfig with domain-randomized parameters.

server/phase_detector.py ADDED Viewed

	@@ -0,0 +1,125 @@

+"""
+Phase Detector — classifies TrialActions into clinical workflow phases.
+Clinical workflow phase order:
+  literature_review → hypothesis → design → enrollment →
+  monitoring → analysis → submission
+Phase-order bonus: +0.2 for correct order (no regression, no skips)
+Skip penalty: -0.3 per skipped phase
+Requirements: 8.5, 9.4
+"""
+from __future__ import annotations
+from models import ActionType, TrialAction
+# Ordered list of clinical workflow phases
+PHASE_ORDER: list[str] = [
+    "literature_review",
+    "hypothesis",
+    "design",
+    "enrollment",
+    "monitoring",
+    "analysis",
+    "submission",
+]
+# Reward constants
+PHASE_BONUS: float = 0.2
+PHASE_SKIP_PENALTY: float = -0.3
+# Mapping from ActionType to phase name.
+# literature_review has no direct action — used as default for unknown.
+_ACTION_TO_PHASE: dict[ActionType, str] = {
+    # hypothesis
+    ActionType.ESTIMATE_EFFECT_SIZE: "hypothesis",
+    ActionType.ADD_BIOMARKER_STRATIFICATION: "hypothesis",
+    # design
+    ActionType.SET_PRIMARY_ENDPOINT: "design",
+    ActionType.SET_SAMPLE_SIZE: "design",
+    ActionType.SET_INCLUSION_CRITERIA: "design",
+    ActionType.SET_EXCLUSION_CRITERIA: "design",
+    ActionType.SET_DOSING_SCHEDULE: "design",
+    ActionType.SET_CONTROL_ARM: "design",
+    ActionType.SET_RANDOMIZATION_RATIO: "design",
+    ActionType.SET_BLINDING: "design",
+    ActionType.REQUEST_PROTOCOL_AMENDMENT: "design",
+    # enrollment
+    ActionType.ENROLL_PATIENTS: "enrollment",
+    # monitoring
+    ActionType.RUN_DOSE_ESCALATION: "monitoring",
+    ActionType.OBSERVE_SAFETY_SIGNAL: "monitoring",
+    ActionType.RUN_INTERIM_ANALYSIS: "monitoring",
+    ActionType.MODIFY_SAMPLE_SIZE: "monitoring",
+    # analysis
+    ActionType.RUN_PRIMARY_ANALYSIS: "analysis",
+    ActionType.SYNTHESIZE_CONCLUSION: "analysis",
+    # submission
+    ActionType.SUBMIT_TO_FDA_REVIEW: "submission",
+}
+def detect_phase(action: TrialAction, history: list[str]) -> tuple[str, bool]:
+    """Classify a TrialAction into a clinical workflow phase.
+    Args:
+        action: The agent's action for this step.
+        history: List of phase names (strings) from previous steps in the episode.
+    Returns:
+        A tuple of (phase_name, phase_order_correct) where:
+          - phase_name is the detected phase string
+          - phase_order_correct is True iff the phase transition is valid
+            (no regression, no skipped phases)
+    """
+    phase_name = _ACTION_TO_PHASE.get(action.action_type, "literature_review")
+    if not history:
+        # First action — any phase is valid
+        return phase_name, True
+    last_phase = history[-1]
+    last_idx = PHASE_ORDER.index(last_phase) if last_phase in PHASE_ORDER else 0
+    current_idx = PHASE_ORDER.index(phase_name) if phase_name in PHASE_ORDER else 0
+    # Regression: going backwards is not correct
+    if current_idx < last_idx:
+        return phase_name, False
+    # Skipped phases: any phase between last+1 and current-1 (exclusive) is a skip
+    skipped = current_idx - last_idx - 1
+    if skipped > 0:
+        return phase_name, False
+    # Staying in same phase or advancing by exactly one — correct
+    return phase_name, True
+def compute_phase_ordering_reward(action: TrialAction, history: list[str]) -> float:
+    """Compute the r_ordering reward component using phase detection.
+    Returns:
+        +PHASE_BONUS if phase order is correct.
+        PHASE_SKIP_PENALTY * num_skipped_phases if phases were skipped.
+        0.0 if there is a regression (going backwards).
+    """
+    phase_name = _ACTION_TO_PHASE.get(action.action_type, "literature_review")
+    if not history:
+        return PHASE_BONUS
+    last_phase = history[-1]
+    last_idx = PHASE_ORDER.index(last_phase) if last_phase in PHASE_ORDER else 0
+    current_idx = PHASE_ORDER.index(phase_name) if phase_name in PHASE_ORDER else 0
+    if current_idx < last_idx:
+        # Regression — no bonus, no skip penalty
+        return 0.0
+    skipped = current_idx - last_idx - 1
+    if skipped > 0:
+        return PHASE_SKIP_PENALTY * skipped
+    return PHASE_BONUS

server/reward/reward_computer.py CHANGED Viewed

@@ -18,6 +18,7 @@ from models import (
     TrialLatentState,
     TrialResult,
 )
 from server.rules.fda_rules import check_fda_compliance
 # Reward magnitude constants
@@ -29,13 +30,13 @@ _TERMINAL_CALIBRATION = 5.0
 _INFO_GAIN_BASE = 0.5
 _EFFICIENCY_SCALE = 2.0
 _NOVELTY_BASE = 0.2
-_ORDERING_BONUS = 0.2
 def compute_reward(
     action: TrialAction,
     latent: TrialLatentState,
     result: TrialResult,
 ) -> RewardBreakdown:
     """Compute all eight reward components for a single step.
@@ -46,6 +47,7 @@ def compute_reward(
         action: The agent's action.
         latent: Hidden ground-truth + episode tracking state.
         result: The simulated trial result.
     Returns:
         A RewardBreakdown with all eight keys populated.
@@ -54,11 +56,9 @@ def compute_reward(
     r_validity = _VALIDITY_VALID if compliance.valid else _VALIDITY_INVALID
     r_penalty = (
-        _PENALTY_INVALID * len(compliance.violations)
-        if not compliance.valid
-        else 0.0
     )
-    r_ordering = _ordering_reward(action, latent)
     r_info_gain = _info_gain_reward(action, result)
     r_efficiency = _efficiency_reward(latent)
     r_novelty = _novelty_reward(action, latent)
@@ -81,18 +81,11 @@ def compute_reward(
 # Component helpers
 # ---------------------------------------------------------------------------
-def _ordering_reward(action: TrialAction, latent: TrialLatentState) -> float:
-    """Bonus for actions that match the expected clinical workflow phase."""
-    from server.rules.fda_rules import TRANSITION_TABLE
-    permitted = TRANSITION_TABLE.get(latent.episode_phase, set())
-    if action.action_type in permitted:
-        return _ORDERING_BONUS
-    return 0.0
 def _info_gain_reward(action: TrialAction, result: TrialResult) -> float:
     """Reward for information-gathering actions that produce useful results."""
     from models import ActionType
     info_actions = {
         ActionType.ESTIMATE_EFFECT_SIZE,
         ActionType.OBSERVE_SAFETY_SIGNAL,
@@ -110,9 +103,7 @@ def _efficiency_reward(latent: TrialLatentState) -> float:
     initial_budget = 1_000_000.0
     if initial_budget <= 0:
         return 0.0
-    budget_fraction = min(
-        max(latent.budget_remaining / initial_budget, 0.0), 1.0
-    )
     return _EFFICIENCY_SCALE * budget_fraction
@@ -123,9 +114,7 @@ def _novelty_reward(action: TrialAction, latent: TrialLatentState) -> float:
     return 0.0
-def _terminal_success_reward(
-    latent: TrialLatentState, result: TrialResult
-) -> float:
     """Positive reward when the episode ends with a successful trial (req 6.4)."""
     if latent.trial_complete and result.success and result.failure_reason is None:
         return _TERMINAL_SUCCESS
@@ -150,6 +139,6 @@ def _terminal_calibration_reward(
     centre_error = abs(ci_centre - true_effect)
     calibration_score = max(0.0, 1.0 - centre_error)
     width_penalty = min(ci_width, 1.0)
-    calibration_score *= (1.0 - width_penalty * 0.5)
     return _TERMINAL_CALIBRATION * calibration_score

     TrialLatentState,
     TrialResult,
 )
+from server.phase_detector import compute_phase_ordering_reward
 from server.rules.fda_rules import check_fda_compliance
 # Reward magnitude constants
 _INFO_GAIN_BASE = 0.5
 _EFFICIENCY_SCALE = 2.0
 _NOVELTY_BASE = 0.2
 def compute_reward(
     action: TrialAction,
     latent: TrialLatentState,
     result: TrialResult,
+    phase_history: list[str] | None = None,
 ) -> RewardBreakdown:
     """Compute all eight reward components for a single step.
         action: The agent's action.
         latent: Hidden ground-truth + episode tracking state.
         result: The simulated trial result.
+        phase_history: List of phase names from previous steps (for r_ordering).
     Returns:
         A RewardBreakdown with all eight keys populated.
     r_validity = _VALIDITY_VALID if compliance.valid else _VALIDITY_INVALID
     r_penalty = (
+        _PENALTY_INVALID * len(compliance.violations) if not compliance.valid else 0.0
     )
+    r_ordering = compute_phase_ordering_reward(action, phase_history or [])
     r_info_gain = _info_gain_reward(action, result)
     r_efficiency = _efficiency_reward(latent)
     r_novelty = _novelty_reward(action, latent)
 # Component helpers
 # ---------------------------------------------------------------------------
 def _info_gain_reward(action: TrialAction, result: TrialResult) -> float:
     """Reward for information-gathering actions that produce useful results."""
     from models import ActionType
     info_actions = {
         ActionType.ESTIMATE_EFFECT_SIZE,
         ActionType.OBSERVE_SAFETY_SIGNAL,
     initial_budget = 1_000_000.0
     if initial_budget <= 0:
         return 0.0
+    budget_fraction = min(max(latent.budget_remaining / initial_budget, 0.0), 1.0)
     return _EFFICIENCY_SCALE * budget_fraction
     return 0.0
+def _terminal_success_reward(latent: TrialLatentState, result: TrialResult) -> float:
     """Positive reward when the episode ends with a successful trial (req 6.4)."""
     if latent.trial_complete and result.success and result.failure_reason is None:
         return _TERMINAL_SUCCESS
     centre_error = abs(ci_centre - true_effect)
     calibration_score = max(0.0, 1.0 - centre_error)
     width_penalty = min(ci_width, 1.0)
+    calibration_score *= 1.0 - width_penalty * 0.5
     return _TERMINAL_CALIBRATION * calibration_score

server/reward/shaping.py CHANGED Viewed

@@ -32,9 +32,7 @@ def _budget_efficiency(
     return min(max(latent.budget_remaining / initial_budget, 0.0), 1.0)
-def potential(
-    latent: TrialLatentState, initial_budget: float = 1_000_000.0
-) -> float:
     """φ(s) = milestone_completion × budget_efficiency."""
     return _milestone_completion(latent) * _budget_efficiency(latent, initial_budget)

     return min(max(latent.budget_remaining / initial_budget, 0.0), 1.0)
+def potential(latent: TrialLatentState, initial_budget: float = 1_000_000.0) -> float:
     """φ(s) = milestone_completion × budget_efficiency."""
     return _milestone_completion(latent) * _budget_efficiency(latent, initial_budget)

server/rules/prerequisite_rules.py CHANGED Viewed

@@ -20,9 +20,7 @@ _HISTORY_PREREQUISITES: dict[ActionType, list[ActionType]] = {
 }
-def check_prerequisites(
-    action: TrialAction, latent: TrialLatentState
-) -> list[str]:
     """Return a list of prerequisite violation strings for *action* given *latent*.
     Returns an empty list when all prerequisites are satisfied.

 }
+def check_prerequisites(action: TrialAction, latent: TrialLatentState) -> list[str]:
     """Return a list of prerequisite violation strings for *action* given *latent*.
     Returns an empty list when all prerequisites are satisfied.

server/simulator/__init__.py CHANGED Viewed

@@ -2,5 +2,5 @@
 simulator — Trial outcome simulation and power calculation.
 Provides simulate_trial, calculate_power (with episode-scoped cache),
-compute_reward, and the seeded hidden-state generator.
 """

 simulator — Trial outcome simulation and power calculation.
 Provides simulate_trial, calculate_power (with episode-scoped cache),
+compute_reward, TransitionEngine, and the seeded hidden-state generator.
 """

server/simulator/output_generator.py ADDED Viewed

	@@ -0,0 +1,238 @@

+"""
+OutputGenerator — produces a noisy TrialObservation from a TrialLatentState.
+Follows the Bio Experiment pattern: TransitionEngine updates hidden state,
+OutputGenerator produces noisy observations from it. Agent never sees clean
+hidden values.
+Key responsibilities:
+  - Inject measurement noise and site variability via NoiseModel's seeded RNG
+  - Populate phase_data with noisy (not raw) experimental results
+  - Populate resource_status from latent state resource fields
+  - Populate available_actions based on current milestone flags and phase
+  - Never expose true_effect_size, true_side_effect_rate, or other hidden values
+    directly — always add noise before returning to the agent
+"""
+from __future__ import annotations
+import numpy as np
+from models import ActionType, TrialLatentState, TrialObservation, TrialState
+from server.noise_model import NoiseModel
+from server.rules.fda_rules import TRANSITION_TABLE
+from server.rules.prerequisite_rules import _HISTORY_PREREQUISITES
+class OutputGenerator:
+    """Produces a noisy TrialObservation from a TrialLatentState.
+    The agent never sees clean hidden values — all experimental results are
+    perturbed by measurement noise and site variability before being returned.
+    Args:
+        noise_model: Seeded NoiseModel used to draw observation noise.
+    """
+    def __init__(self, noise_model: NoiseModel) -> None:
+        self._noise_model = noise_model
+    def generate(
+        self,
+        latent: TrialLatentState,
+        trial_state: TrialState,
+        *,
+        steps_taken: int,
+        max_steps: int,
+        rule_violations: list[str],
+        done: bool,
+        reward: float,
+        scenario_description: str,
+        hint: str = "",
+    ) -> TrialObservation:
+        """Generate a noisy TrialObservation from the current latent state.
+        Args:
+            latent: Updated hidden state from TransitionEngine.
+            trial_state: Episode metadata (difficulty, curriculum tier, etc.).
+            steps_taken: Number of steps taken so far in the episode.
+            max_steps: Maximum steps allowed in the episode.
+            rule_violations: List of rule violation strings from this step.
+            done: Whether the episode is finished.
+            reward: Reward signal for this step.
+            scenario_description: Human-readable scenario description.
+            hint: Optional hint string (only populated at junior difficulty).
+        Returns:
+            A TrialObservation with noisy phase_data, resource_status, and
+            available_actions. Raw hidden values are never included.
+        """
+        rng = self._noise_model.rng
+        phase_data = self._build_phase_data(latent, rng)
+        resource_status = self._build_resource_status(latent)
+        available_actions = self._build_available_actions(latent)
+        return TrialObservation(
+            scenario_description=scenario_description,
+            phase_data=phase_data,
+            resource_status=resource_status,
+            rule_violations=rule_violations,
+            available_actions=available_actions,
+            steps_taken=steps_taken,
+            max_steps=max_steps,
+            hint=hint,
+            done=done,
+            reward=reward,
+        )
+    # ------------------------------------------------------------------
+    # Private helpers
+    # ------------------------------------------------------------------
+    def _build_phase_data(
+        self,
+        latent: TrialLatentState,
+        rng: "np.random.Generator",
+    ) -> dict:
+        """Build noisy phase_data dict — never exposes raw hidden values.
+        Measurement noise (latent.measurement_noise) is applied to effect-size
+        estimates. Site variability (latent.site_variability) is applied to
+        adverse-event-rate estimates.
+        """
+        import numpy as np  # local import to keep module-level deps minimal
+        noise_std = max(latent.measurement_noise, 1e-6)
+        site_std = max(latent.site_variability, 1e-6)
+        phase_data: dict = {
+            "current_phase": latent.episode_phase,
+            "patients_enrolled": latent.patients_enrolled,
+            # Milestones — these are observable flags, not hidden values
+            "phase_i_complete": latent.phase_i_complete,
+            "mtd_identified": latent.mtd_identified,
+            "effect_estimated": latent.effect_estimated,
+            "protocol_submitted": latent.protocol_submitted,
+            "interim_complete": latent.interim_complete,
+            "trial_complete": latent.trial_complete,
+        }
+        # Noisy effect-size estimate — only available after ESTIMATE_EFFECT_SIZE
+        if latent.effect_estimated:
+            noisy_effect = float(latent.true_effect_size + rng.normal(0.0, noise_std))
+            phase_data["observed_effect_size"] = round(noisy_effect, 4)
+            # Noisy confidence interval width (derived from noise level)
+            ci_half_width = float(rng.normal(noise_std * 2, noise_std * 0.5))
+            ci_half_width = max(ci_half_width, 0.01)
+            phase_data["effect_size_ci"] = (
+                round(noisy_effect - ci_half_width, 4),
+                round(noisy_effect + ci_half_width, 4),
+            )
+        # Noisy adverse-event rate — only available after OBSERVE_SAFETY_SIGNAL
+        # or RUN_DOSE_ESCALATION
+        if (
+            latent.phase_i_complete
+            or ActionType.OBSERVE_SAFETY_SIGNAL.value in latent.action_history
+        ):
+            noisy_ae_rate = float(
+                latent.true_side_effect_rate + rng.normal(0.0, site_std)
+            )
+            noisy_ae_rate = float(np.clip(noisy_ae_rate, 0.0, 1.0))
+            phase_data["observed_adverse_event_rate"] = round(noisy_ae_rate, 4)
+        # Noisy placebo response — only available after interim or primary analysis
+        if latent.interim_complete or latent.trial_complete:
+            noisy_placebo = float(
+                latent.placebo_response_rate + rng.normal(0.0, noise_std)
+            )
+            noisy_placebo = float(np.clip(noisy_placebo, 0.0, 1.0))
+            phase_data["observed_placebo_response"] = round(noisy_placebo, 4)
+        # Noisy dose-response curve — only available after Phase I
+        if latent.phase_i_complete and latent.true_dose_response:
+            noisy_dose_response: dict[str, float] = {}
+            for dose, response in latent.true_dose_response.items():
+                noisy_resp = float(response + rng.normal(0.0, noise_std))
+                noisy_resp = float(np.clip(noisy_resp, 0.0, 1.0))
+                noisy_dose_response[str(dose)] = round(noisy_resp, 4)
+            phase_data["observed_dose_response"] = noisy_dose_response
+        # Dropout rate estimate — noisy, only after enrollment begins
+        if latent.patients_enrolled > 0:
+            noisy_dropout = float(
+                latent.dropout_rate + rng.normal(0.0, noise_std * 0.5)
+            )
+            noisy_dropout = float(np.clip(noisy_dropout, 0.0, 1.0))
+            phase_data["observed_dropout_rate"] = round(noisy_dropout, 4)
+        # Responder population hint — only after biomarker stratification
+        if ActionType.ADD_BIOMARKER_STRATIFICATION.value in latent.action_history:
+            # Reveal population label but NOT the true criteria (hidden)
+            phase_data["responder_population_hint"] = latent.true_responder_population
+        return phase_data
+    def _build_resource_status(self, latent: TrialLatentState) -> dict:
+        """Build resource_status from latent state resource fields."""
+        return {
+            "budget_remaining": latent.budget_remaining,
+            "time_remaining_days": latent.time_remaining_days,
+            "patients_enrolled": latent.patients_enrolled,
+        }
+    def _build_available_actions(self, latent: TrialLatentState) -> list[str]:
+        """Return the list of valid action strings given current milestone flags.
+        Filters the phase-permitted actions through prerequisite checks so the
+        agent only sees actions it can actually take right now.
+        """
+        phase_permitted: set[ActionType] = TRANSITION_TABLE.get(
+            latent.episode_phase, set()
+        )
+        available: list[str] = []
+        for action_type in sorted(phase_permitted, key=lambda a: a.value):
+            if self._prerequisites_met(action_type, latent):
+                available.append(action_type.value)
+        return available
+    def _prerequisites_met(
+        self, action_type: ActionType, latent: TrialLatentState
+    ) -> bool:
+        """Return True if all prerequisites for *action_type* are satisfied."""
+        # History-based prerequisites
+        required_actions = _HISTORY_PREREQUISITES.get(action_type, [])
+        for required in required_actions:
+            if required.value not in latent.action_history:
+                return False
+        # State-flag prerequisites (mirrors prerequisite_rules.py logic)
+        if action_type == ActionType.REQUEST_PROTOCOL_AMENDMENT:
+            if not latent.protocol_submitted:
+                return False
+        if action_type == ActionType.SUBMIT_TO_FDA_REVIEW:
+            if not latent.protocol_submitted or not latent.phase_i_complete:
+                return False
+        if action_type == ActionType.RUN_PRIMARY_ANALYSIS:
+            if not latent.interim_complete:
+                return False
+        if action_type == ActionType.RUN_INTERIM_ANALYSIS:
+            if latent.patients_enrolled <= 0:
+                return False
+        if action_type == ActionType.MODIFY_SAMPLE_SIZE:
+            if ActionType.SET_SAMPLE_SIZE.value not in latent.action_history:
+                return False
+        if action_type == ActionType.SYNTHESIZE_CONCLUSION:
+            if not latent.trial_complete:
+                return False
+        return True

server/simulator/transition_engine.py ADDED Viewed

	@@ -0,0 +1,167 @@

+"""
+TransitionEngine — mutates TrialLatentState per action.
+Follows the Bio Experiment pattern: TransitionEngine updates hidden state,
+OutputGenerator produces noisy observations from it. Agent never sees clean
+hidden values.
+Key responsibilities:
+  - Enroll patients (ENROLL_PATIENTS)
+  - Spend budget and advance time
+  - Record adverse events
+  - Set milestone flags (phase_i_complete, mtd_identified, effect_estimated,
+    protocol_submitted, interim_complete, trial_complete)
+  - Degrade data quality on soft violations
+"""
+from __future__ import annotations
+import random
+from models import ActionType, TrialAction, TrialLatentState
+class TransitionEngine:
+    """Mutates TrialLatentState in response to agent actions.
+    All state transitions are deterministic given the same seed and action
+    sequence (reproducibility requirement 9.2).
+    """
+    # Cost and time constants (per action type)
+    _ACTION_COSTS: dict[ActionType, float] = {
+        ActionType.SET_PRIMARY_ENDPOINT: 5_000.0,
+        ActionType.SET_SAMPLE_SIZE: 2_000.0,
+        ActionType.SET_INCLUSION_CRITERIA: 3_000.0,
+        ActionType.SET_EXCLUSION_CRITERIA: 3_000.0,
+        ActionType.SET_DOSING_SCHEDULE: 10_000.0,
+        ActionType.SET_CONTROL_ARM: 5_000.0,
+        ActionType.SET_RANDOMIZATION_RATIO: 2_000.0,
+        ActionType.SET_BLINDING: 4_000.0,
+        ActionType.RUN_DOSE_ESCALATION: 50_000.0,
+        ActionType.OBSERVE_SAFETY_SIGNAL: 15_000.0,
+        ActionType.ESTIMATE_EFFECT_SIZE: 20_000.0,
+        ActionType.RUN_INTERIM_ANALYSIS: 30_000.0,
+        ActionType.MODIFY_SAMPLE_SIZE: 5_000.0,
+        ActionType.ADD_BIOMARKER_STRATIFICATION: 25_000.0,
+        ActionType.SUBMIT_TO_FDA_REVIEW: 100_000.0,
+        ActionType.REQUEST_PROTOCOL_AMENDMENT: 15_000.0,
+        ActionType.RUN_PRIMARY_ANALYSIS: 50_000.0,
+        ActionType.SYNTHESIZE_CONCLUSION: 10_000.0,
+        ActionType.ENROLL_PATIENTS: 0.0,  # cost computed per patient
+    }
+    _ACTION_TIME_DAYS: dict[ActionType, int] = {
+        ActionType.SET_PRIMARY_ENDPOINT: 7,
+        ActionType.SET_SAMPLE_SIZE: 3,
+        ActionType.SET_INCLUSION_CRITERIA: 5,
+        ActionType.SET_EXCLUSION_CRITERIA: 5,
+        ActionType.SET_DOSING_SCHEDULE: 14,
+        ActionType.SET_CONTROL_ARM: 7,
+        ActionType.SET_RANDOMIZATION_RATIO: 3,
+        ActionType.SET_BLINDING: 5,
+        ActionType.RUN_DOSE_ESCALATION: 90,
+        ActionType.OBSERVE_SAFETY_SIGNAL: 30,
+        ActionType.ESTIMATE_EFFECT_SIZE: 45,
+        ActionType.RUN_INTERIM_ANALYSIS: 60,
+        ActionType.MODIFY_SAMPLE_SIZE: 7,
+        ActionType.ADD_BIOMARKER_STRATIFICATION: 30,
+        ActionType.SUBMIT_TO_FDA_REVIEW: 180,
+        ActionType.REQUEST_PROTOCOL_AMENDMENT: 30,
+        ActionType.RUN_PRIMARY_ANALYSIS: 90,
+        ActionType.SYNTHESIZE_CONCLUSION: 14,
+        ActionType.ENROLL_PATIENTS: 0,  # time computed per patient
+    }
+    # Cost per patient enrolled (varies by disease area complexity)
+    _COST_PER_PATIENT: float = 10_000.0
+    _DAYS_PER_PATIENT: float = 2.0
+    def __init__(self) -> None:
+        """Initialize the TransitionEngine."""
+        pass
+    def apply_transition(
+        self, latent: TrialLatentState, action: TrialAction
+    ) -> TrialLatentState:
+        """Apply *action* to *latent* and return the updated state.
+        Does NOT mutate the input latent state — returns a new copy with
+        updated fields.
+        Args:
+            latent: Current hidden state.
+            action: Agent action to apply.
+        Returns:
+            Updated TrialLatentState with mutated fields.
+        """
+        # Create a mutable copy
+        updated = latent.model_copy(deep=True)
+        # Update action history
+        updated.action_history.append(action.action_type.value)
+        # Compute step-specific RNG
+        step_index = len(updated.action_history)
+        rng = random.Random(latent.seed ^ step_index)
+        # --- Budget and time consumption ---
+        base_cost = self._ACTION_COSTS.get(action.action_type, 0.0)
+        base_time = self._ACTION_TIME_DAYS.get(action.action_type, 0)
+        if action.action_type == ActionType.ENROLL_PATIENTS:
+            n_patients = action.parameters.get("n_patients", 0)
+            base_cost = n_patients * self._COST_PER_PATIENT
+            base_time = int(n_patients * self._DAYS_PER_PATIENT)
+            updated.patients_enrolled += n_patients
+        updated.budget_remaining -= base_cost
+        updated.time_remaining_days -= base_time
+        # --- Milestone flag updates ---
+        if action.action_type == ActionType.RUN_DOSE_ESCALATION:
+            updated.phase_i_complete = True
+            updated.mtd_identified = True
+        if action.action_type == ActionType.ESTIMATE_EFFECT_SIZE:
+            updated.effect_estimated = True
+        if action.action_type == ActionType.SUBMIT_TO_FDA_REVIEW:
+            updated.protocol_submitted = True
+        if action.action_type == ActionType.RUN_INTERIM_ANALYSIS:
+            updated.interim_complete = True
+        if action.action_type == ActionType.RUN_PRIMARY_ANALYSIS:
+            updated.trial_complete = True
+        # --- Soft violation: degrade data quality ---
+        # If action confidence is low (< 0.5), increase measurement noise
+        if action.confidence < 0.5:
+            degradation_factor = 1.0 + (0.5 - action.confidence)
+            updated.measurement_noise = min(
+                updated.measurement_noise * degradation_factor, 0.5
+            )
+        # If budget is negative (soft violation), degrade site variability
+        if updated.budget_remaining < 0:
+            updated.site_variability = min(updated.site_variability * 1.2, 0.5)
+        # If time is negative (soft violation), increase dropout rate
+        if updated.time_remaining_days < 0:
+            updated.dropout_rate = min(updated.dropout_rate * 1.15, 0.8)
+        # --- Adverse event recording (stochastic) ---
+        # On certain actions, record adverse events based on true_side_effect_rate
+        if action.action_type in {
+            ActionType.ENROLL_PATIENTS,
+            ActionType.OBSERVE_SAFETY_SIGNAL,
+            ActionType.RUN_DOSE_ESCALATION,
+        }:
+            # Adverse events increase site variability slightly
+            if rng.random() < updated.true_side_effect_rate:
+                updated.adverse_events += 1
+                updated.site_variability = min(updated.site_variability + 0.02, 0.5)
+        return updated

server/simulator/trial_simulator.py CHANGED Viewed

@@ -84,6 +84,7 @@ def simulate_trial(
         se = 1.0 / math.sqrt(n_per_arm)
         z_stat = observed_effect / se if se > 0 else 0.0
         from scipy.stats import norm
         p_value = float(2.0 * norm.sf(abs(z_stat)))
     else:
         p_value = 1.0
@@ -93,6 +94,7 @@ def simulate_trial(
     if n_per_arm > 0:
         from scipy.stats import norm as _norm
         z_95 = _norm.ppf(0.975)
         se = 1.0 / math.sqrt(n_per_arm)
         ci_low = observed_effect - z_95 * se
@@ -104,9 +106,7 @@ def simulate_trial(
         0.0,
         latent.site_variability if latent.site_variability > 0 else 0.01,
     )
-    adverse_event_rate = min(
-        max(latent.true_side_effect_rate + ae_noise, 0.0), 1.0
-    )
     return TrialResult(
         p_value=p_value,

         se = 1.0 / math.sqrt(n_per_arm)
         z_stat = observed_effect / se if se > 0 else 0.0
         from scipy.stats import norm
         p_value = float(2.0 * norm.sf(abs(z_stat)))
     else:
         p_value = 1.0
     if n_per_arm > 0:
         from scipy.stats import norm as _norm
         z_95 = _norm.ppf(0.975)
         se = 1.0 / math.sqrt(n_per_arm)
         ci_low = observed_effect - z_95 * se
         0.0,
         latent.site_variability if latent.site_variability > 0 else 0.01,
     )
+    adverse_event_rate = min(max(latent.true_side_effect_rate + ae_noise, 0.0), 1.0)
     return TrialResult(
         p_value=p_value,

tests/test_curriculum_controller.py ADDED Viewed

	@@ -0,0 +1,171 @@

+"""
+Tests for server/curriculum/controller.py
+Verifies:
+  - advance_curriculum mastery logic (70% → graduate, 90% → fast-track)
+  - select_scenario tier mapping
+  - Edge cases (empty history, max tier, clamping)
+"""
+import numpy as np
+from server.curriculum.controller import (
+    MAX_TIER,
+    EpisodeMetrics,
+    advance_curriculum,
+    select_scenario,
+)
+from server.curriculum.scenarios import (
+    AUTOIMMUNE_BIOLOGIC,
+    CNS_DEPRESSION,
+    RARE_DISEASE_ORPHAN,
+    SOLID_TUMOR_CHEMO,
+    WARMUP,
+)
+# ── advance_curriculum tests ──────────────────────────────────────────────────
+def test_advance_curriculum_empty_history():
+    """Empty history → stay at current tier."""
+    metrics = EpisodeMetrics(success=True, episode_history=[])
+    assert advance_curriculum(0, metrics) == 0
+    assert advance_curriculum(2, metrics) == 2
+def test_advance_curriculum_no_mastery():
+    """Below 70% success → stay at current tier."""
+    # 6/10 = 60% → no graduation
+    history = [True, False, True, False, True, False, True, False, True, False]
+    metrics = EpisodeMetrics(success=False, episode_history=history)
+    assert advance_curriculum(1, metrics) == 1
+def test_advance_curriculum_normal_graduation():
+    """70%+ rolling success → advance one tier."""
+    # 7/10 = 70% → graduate
+    history = [True, True, True, True, True, True, True, False, False, False]
+    metrics = EpisodeMetrics(success=False, episode_history=history)
+    assert advance_curriculum(0, metrics) == 1
+    assert advance_curriculum(2, metrics) == 3
+def test_advance_curriculum_fast_track():
+    """90%+ success after ≥3 episodes → skip one tier (advance by 2)."""
+    # 9/10 = 90% → fast-track
+    history = [True, True, True, True, True, True, True, True, True, False]
+    metrics = EpisodeMetrics(success=False, episode_history=history)
+    assert advance_curriculum(0, metrics) == 2  # skip tier 1
+    assert advance_curriculum(1, metrics) == 3  # skip tier 2
+def test_advance_curriculum_fast_track_requires_min_episodes():
+    """Fast-track requires at least 3 episodes."""
+    # 2 episodes, 100% success → not enough for fast-track
+    history = [True, True]
+    metrics = EpisodeMetrics(success=True, episode_history=history)
+    # Should not fast-track (only 2 episodes), but 100% ≥ 70% → normal graduate
+    assert advance_curriculum(0, metrics) == 1
+    # 3 episodes, 100% success → fast-track
+    history = [True, True, True]
+    metrics = EpisodeMetrics(success=True, episode_history=history)
+    assert advance_curriculum(0, metrics) == 2
+def test_advance_curriculum_max_tier_clamp():
+    """Cannot advance beyond MAX_TIER (4)."""
+    history = [True] * 10  # 100% success
+    metrics = EpisodeMetrics(success=True, episode_history=history)
+    assert advance_curriculum(MAX_TIER, metrics) == MAX_TIER
+    assert advance_curriculum(MAX_TIER - 1, metrics) == MAX_TIER  # fast-track clamped
+def test_advance_curriculum_rolling_window():
+    """Only the most recent 10 episodes count for rolling rate."""
+    # 20 episodes: first 10 are all False, last 10 are 9 True + 1 False
+    # Rolling window (last 10) = 9/10 = 90% → fast-track
+    history = [False] * 10 + [True] * 9 + [False]
+    metrics = EpisodeMetrics(success=False, episode_history=history)
+    assert advance_curriculum(0, metrics) == 2
+def test_advance_curriculum_exactly_70_percent():
+    """Exactly 70% success → should graduate."""
+    history = [True] * 7 + [False] * 3
+    metrics = EpisodeMetrics(success=False, episode_history=history)
+    assert advance_curriculum(1, metrics) == 2
+def test_advance_curriculum_exactly_90_percent():
+    """Exactly 90% success after ≥3 episodes → fast-track."""
+    history = [True] * 9 + [False]
+    metrics = EpisodeMetrics(success=False, episode_history=history)
+    assert advance_curriculum(0, metrics) == 2
+# ── select_scenario tests ─────────────────────────────────────────────────────
+def test_select_scenario_tier_mapping():
+    """Each tier maps to the correct ScenarioConfig."""
+    rng = np.random.default_rng(42)
+    assert select_scenario(0, rng) == WARMUP
+    assert select_scenario(1, rng) == SOLID_TUMOR_CHEMO
+    assert select_scenario(2, rng) == AUTOIMMUNE_BIOLOGIC
+    assert select_scenario(3, rng) == CNS_DEPRESSION
+    assert select_scenario(4, rng) == RARE_DISEASE_ORPHAN
+def test_select_scenario_clamping():
+    """Out-of-range tiers are clamped to [MIN_TIER, MAX_TIER]."""
+    rng = np.random.default_rng(42)
+    # Below MIN_TIER → clamp to 0
+    assert select_scenario(-1, rng) == WARMUP
+    assert select_scenario(-100, rng) == WARMUP
+    # Above MAX_TIER → clamp to 4
+    assert select_scenario(5, rng) == RARE_DISEASE_ORPHAN
+    assert select_scenario(100, rng) == RARE_DISEASE_ORPHAN
+def test_select_scenario_deterministic():
+    """Same tier + rng seed → same scenario (currently deterministic anyway)."""
+    rng1 = np.random.default_rng(42)
+    rng2 = np.random.default_rng(42)
+    assert select_scenario(2, rng1) == select_scenario(2, rng2)
+# ── Integration test: full curriculum progression ─────────────────────────────
+def test_full_curriculum_progression():
+    """Simulate a full curriculum progression from tier 0 → 4."""
+    tier = 0
+    history: list[bool] = []
+    # Tier 0 → 1 (normal graduation at 70%)
+    for _ in range(7):
+        history.append(True)
+    for _ in range(3):
+        history.append(False)
+    metrics = EpisodeMetrics(success=False, episode_history=history)
+    tier = advance_curriculum(tier, metrics)
+    assert tier == 1
+    # Tier 1 → 3 (fast-track at 90%)
+    history = [True] * 9 + [False]
+    metrics = EpisodeMetrics(success=False, episode_history=history)
+    tier = advance_curriculum(tier, metrics)
+    assert tier == 3
+    # Tier 3 → 4 (normal graduation)
+    history = [True] * 7 + [False] * 3
+    metrics = EpisodeMetrics(success=False, episode_history=history)
+    tier = advance_curriculum(tier, metrics)
+    assert tier == 4
+    # Tier 4 → 4 (max tier, cannot advance)
+    history = [True] * 10
+    metrics = EpisodeMetrics(success=True, episode_history=history)
+    tier = advance_curriculum(tier, metrics)
+    assert tier == 4

tests/test_episode_logger_wiring.py CHANGED Viewed

@@ -38,9 +38,7 @@ class TestLoggerCreatedOnReset:
     def test_logger_exists_after_reset(self, manager: EpisodeManager) -> None:
         assert manager._logger is not None
-    def test_logger_replaced_on_second_reset(
-        self, manager: EpisodeManager
-    ) -> None:
         first_id = manager._logger.episode_id
         manager.reset()
         second_id = manager._logger.episode_id
@@ -53,9 +51,7 @@ class TestLoggerCreatedOnReset:
 class TestLogStepCalledOnStep:
     """Requirement 7.1: log_step() is called for every step."""
-    def test_log_step_called_for_invalid_action(
-        self, manager: EpisodeManager
-    ) -> None:
         mock_logger = MagicMock()
         manager._logger = mock_logger

     def test_logger_exists_after_reset(self, manager: EpisodeManager) -> None:
         assert manager._logger is not None
+    def test_logger_replaced_on_second_reset(self, manager: EpisodeManager) -> None:
         first_id = manager._logger.episode_id
         manager.reset()
         second_id = manager._logger.episode_id
 class TestLogStepCalledOnStep:
     """Requirement 7.1: log_step() is called for every step."""
+    def test_log_step_called_for_invalid_action(self, manager: EpisodeManager) -> None:
         mock_logger = MagicMock()
         manager._logger = mock_logger

tests/test_episode_manager_compliance.py CHANGED Viewed

@@ -33,17 +33,13 @@ def manager() -> EpisodeManager:
 class TestInvalidActionReturnsNegativeRValidity:
     """Requirement 10.1: invalid actions → negative r_validity, latent unchanged."""
-    def test_invalid_action_r_validity_negative(
-        self, manager: EpisodeManager
-    ) -> None:
         # SUBMIT_TO_FDA_REVIEW not permitted in literature_review phase
         action = _make_action(ActionType.SUBMIT_TO_FDA_REVIEW)
         _, reward, _, _ = manager.step(action)
         assert reward.r_validity < 0, "r_validity must be negative for invalid action"
-    def test_invalid_action_state_unchanged(
-        self, manager: EpisodeManager
-    ) -> None:
         action = _make_action(ActionType.SUBMIT_TO_FDA_REVIEW)
         history_before = list(manager._latent.action_history)
         step_before = len(history_before)
@@ -62,9 +58,7 @@ class TestInvalidActionReturnsNegativeRValidity:
         assert len(obs.rule_violations) > 0
         assert len(info["violations"]) > 0
-    def test_invalid_action_done_is_false(
-        self, manager: EpisodeManager
-    ) -> None:
         action = _make_action(ActionType.SUBMIT_TO_FDA_REVIEW)
         _, _, done, _ = manager.step(action)
         assert done is False
@@ -108,9 +102,7 @@ class TestNoUnhandledExceptions:
         with pytest.raises(RuntimeError, match="No active episode"):
             em.step(action)
-    def test_multiple_invalid_steps_do_not_raise(
-        self, manager: EpisodeManager
-    ) -> None:
         action = _make_action(ActionType.SUBMIT_TO_FDA_REVIEW)
         for _ in range(5):
             _, reward, _, _ = manager.step(action)

 class TestInvalidActionReturnsNegativeRValidity:
     """Requirement 10.1: invalid actions → negative r_validity, latent unchanged."""
+    def test_invalid_action_r_validity_negative(self, manager: EpisodeManager) -> None:
         # SUBMIT_TO_FDA_REVIEW not permitted in literature_review phase
         action = _make_action(ActionType.SUBMIT_TO_FDA_REVIEW)
         _, reward, _, _ = manager.step(action)
         assert reward.r_validity < 0, "r_validity must be negative for invalid action"
+    def test_invalid_action_state_unchanged(self, manager: EpisodeManager) -> None:
         action = _make_action(ActionType.SUBMIT_TO_FDA_REVIEW)
         history_before = list(manager._latent.action_history)
         step_before = len(history_before)
         assert len(obs.rule_violations) > 0
         assert len(info["violations"]) > 0
+    def test_invalid_action_done_is_false(self, manager: EpisodeManager) -> None:
         action = _make_action(ActionType.SUBMIT_TO_FDA_REVIEW)
         _, _, done, _ = manager.step(action)
         assert done is False
         with pytest.raises(RuntimeError, match="No active episode"):
             em.step(action)
+    def test_multiple_invalid_steps_do_not_raise(self, manager: EpisodeManager) -> None:
         action = _make_action(ActionType.SUBMIT_TO_FDA_REVIEW)
         for _ in range(5):
             _, reward, _, _ = manager.step(action)

tests/test_judge.py ADDED Viewed

	@@ -0,0 +1,350 @@

+"""
+Tests for server/judge.py — TrialJudge multi-layer verification.
+Covers:
+  - Layer 1 programmatic checks (power, p-value, FDA compliance, budget)
+  - Layer 2 persona selection (junior/senior/principal)
+  - Overconfidence penalty
+  - Hint generation for junior persona
+  - No unhandled exceptions on any valid input (req 10.4)
+"""
+from __future__ import annotations
+import pytest
+from models import ActionType, TrialAction, TrialLatentState, TrialState
+from server.judge import JudgeResult, TrialJudge, _select_persona
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+def _make_latent(**overrides) -> TrialLatentState:
+    defaults = dict(
+        true_effect_size=0.8,
+        true_side_effect_rate=0.05,
+        true_responder_population="all",
+        true_responder_criteria=[],
+        true_dose_response={},
+        true_mechanism="unknown",
+        placebo_response_rate=0.1,
+        dropout_rate=0.05,
+        site_variability=0.0,
+        measurement_noise=0.0,
+        budget_remaining=500_000.0,
+        time_remaining_days=300,
+        patients_enrolled=200,
+        phase_i_complete=True,
+        mtd_identified=True,
+        effect_estimated=True,
+        protocol_submitted=True,
+        interim_complete=True,
+        trial_complete=True,
+        adverse_events=0,
+        episode_phase="analysis",
+        action_history=["run_primary_analysis"],
+        seed=42,
+    )
+    defaults.update(overrides)
+    return TrialLatentState(**defaults)
+def _make_state(difficulty: float = 0.3) -> TrialState:
+    return TrialState(
+        episode_id="test-ep",
+        step_count=5,
+        difficulty=difficulty,
+        scenario_id="solid_tumor_chemo",
+        curriculum_tier="0",
+        curriculum_stats={},
+        action_diversity=0.8,
+        phase_compliance_rate=1.0,
+        is_resolved=False,
+    )
+def _make_action(
+    action_type: ActionType = ActionType.RUN_PRIMARY_ANALYSIS,
+    confidence: float = 0.5,
+    **params,
+) -> TrialAction:
+    return TrialAction(
+        action_type=action_type,
+        parameters=params,
+        justification="test",
+        confidence=confidence,
+    )
+# ---------------------------------------------------------------------------
+# Persona selection
+# ---------------------------------------------------------------------------
+def test_persona_junior():
+    assert _select_persona(0.0) == "junior"
+    assert _select_persona(0.39) == "junior"
+def test_persona_senior():
+    assert _select_persona(0.4) == "senior"
+    assert _select_persona(0.7) == "senior"
+def test_persona_principal():
+    assert _select_persona(0.71) == "principal"
+    assert _select_persona(1.0) == "principal"
+# ---------------------------------------------------------------------------
+# Layer 1: budget check
+# ---------------------------------------------------------------------------
+def test_budget_exhausted_fails():
+    judge = TrialJudge()
+    latent = _make_latent(budget_remaining=0.0)
+    result = judge.verify(_make_action(), _make_state(), latent)
+    assert not result.passed
+    assert any("budget" in v.lower() for v in result.violations)
+def test_budget_negative_fails():
+    judge = TrialJudge()
+    latent = _make_latent(budget_remaining=-100.0)
+    result = judge.verify(_make_action(), _make_state(), latent)
+    assert not result.passed
+    assert any("budget" in v.lower() for v in result.violations)
+def test_budget_positive_passes_budget_check():
+    judge = TrialJudge()
+    latent = _make_latent(budget_remaining=1.0)
+    # Other checks may still fail, but budget violation should not be present
+    result = judge.verify(_make_action(), _make_state(), latent)
+    assert not any("budget" in v.lower() for v in result.violations)
+# ---------------------------------------------------------------------------
+# Layer 1: power check
+# ---------------------------------------------------------------------------
+def test_low_power_fails():
+    judge = TrialJudge()
+    # Very small effect + few patients → low power
+    latent = _make_latent(true_effect_size=0.01, patients_enrolled=10)
+    result = judge.verify(_make_action(), _make_state(), latent)
+    assert not result.passed
+    assert any("power" in v.lower() for v in result.violations)
+def test_sufficient_power_no_power_violation():
+    judge = TrialJudge()
+    # Large effect + many patients → high power
+    latent = _make_latent(true_effect_size=1.5, patients_enrolled=500)
+    result = judge.verify(_make_action(), _make_state(), latent)
+    assert not any("power" in v.lower() for v in result.violations)
+# ---------------------------------------------------------------------------
+# Layer 1: p-value check
+# ---------------------------------------------------------------------------
+def test_nonsignificant_pvalue_fails():
+    judge = TrialJudge()
+    # Zero effect → p-value = 1.0
+    latent = _make_latent(true_effect_size=0.0, patients_enrolled=100)
+    result = judge.verify(_make_action(), _make_state(), latent)
+    assert not result.passed
+    assert any("p-value" in v.lower() for v in result.violations)
+def test_significant_pvalue_no_pvalue_violation():
+    judge = TrialJudge()
+    # Large effect + many patients → very small p-value
+    latent = _make_latent(true_effect_size=2.0, patients_enrolled=1000)
+    result = judge.verify(_make_action(), _make_state(), latent)
+    assert not any("p-value" in v.lower() for v in result.violations)
+# ---------------------------------------------------------------------------
+# Layer 1: FDA compliance
+# ---------------------------------------------------------------------------
+def test_fda_violation_propagated():
+    judge = TrialJudge()
+    # Action not permitted in current phase
+    latent = _make_latent(episode_phase="literature_review")
+    action = _make_action(action_type=ActionType.SUBMIT_TO_FDA_REVIEW)
+    result = judge.verify(action, _make_state(), latent)
+    assert not result.passed
+    assert len(result.violations) > 0
+# ---------------------------------------------------------------------------
+# Overconfidence penalty
+# ---------------------------------------------------------------------------
+def test_overconfidence_penalty_applied_when_high_confidence_and_wrong():
+    judge = TrialJudge()
+    latent = _make_latent(budget_remaining=0.0)  # guaranteed violation
+    action = _make_action(confidence=0.9)
+    result = judge.verify(action, _make_state(), latent)
+    assert not result.passed
+    assert result.overconfidence_penalty < 0.0
+def test_no_overconfidence_penalty_when_low_confidence():
+    judge = TrialJudge()
+    latent = _make_latent(budget_remaining=0.0)  # violation present
+    action = _make_action(confidence=0.5)
+    result = judge.verify(action, _make_state(), latent)
+    assert result.overconfidence_penalty == 0.0
+def test_no_overconfidence_penalty_when_passed():
+    judge = TrialJudge()
+    # Use large effect + many patients to pass power/p-value, valid phase/action
+    latent = _make_latent(
+        true_effect_size=2.0,
+        patients_enrolled=1000,
+        budget_remaining=500_000.0,
+        episode_phase="analysis",
+        interim_complete=True,
+        trial_complete=True,
+    )
+    action = _make_action(action_type=ActionType.RUN_PRIMARY_ANALYSIS, confidence=0.95)
+    result = judge.verify(action, _make_state(), latent)
+    if result.passed:
+        assert result.overconfidence_penalty == 0.0
+def test_overconfidence_penalty_scales_with_violation_count():
+    judge = TrialJudge()
+    # Multiple violations: budget + low power + non-significant p-value
+    latent = _make_latent(
+        budget_remaining=0.0,
+        true_effect_size=0.0,
+        patients_enrolled=1,
+    )
+    action = _make_action(confidence=0.9)
+    result = judge.verify(action, _make_state(), latent)
+    assert result.overconfidence_penalty <= -1.0  # at least 2 violations × -0.5
+# ---------------------------------------------------------------------------
+# Layer 2: persona in result
+# ---------------------------------------------------------------------------
+def test_junior_persona_in_result():
+    judge = TrialJudge()
+    latent = _make_latent(budget_remaining=0.0)
+    result = judge.verify(_make_action(), _make_state(difficulty=0.2), latent)
+    assert result.persona == "junior"
+def test_senior_persona_in_result():
+    judge = TrialJudge()
+    latent = _make_latent(budget_remaining=0.0)
+    result = judge.verify(_make_action(), _make_state(difficulty=0.5), latent)
+    assert result.persona == "senior"
+def test_principal_persona_in_result():
+    judge = TrialJudge()
+    latent = _make_latent(budget_remaining=0.0)
+    result = judge.verify(_make_action(), _make_state(difficulty=0.9), latent)
+    assert result.persona == "principal"
+# ---------------------------------------------------------------------------
+# Layer 2: hints
+# ---------------------------------------------------------------------------
+def test_junior_gets_hint_on_failure():
+    judge = TrialJudge()
+    latent = _make_latent(budget_remaining=0.0)
+    result = judge.verify(_make_action(), _make_state(difficulty=0.2), latent)
+    assert not result.passed
+    assert result.hint is not None and len(result.hint) > 0
+def test_senior_no_hint_on_failure():
+    judge = TrialJudge()
+    latent = _make_latent(budget_remaining=0.0)
+    result = judge.verify(_make_action(), _make_state(difficulty=0.5), latent)
+    assert result.hint is None
+def test_principal_no_hint_on_failure():
+    judge = TrialJudge()
+    latent = _make_latent(budget_remaining=0.0)
+    result = judge.verify(_make_action(), _make_state(difficulty=0.9), latent)
+    assert result.hint is None
+def test_junior_gets_hint_on_pass():
+    judge = TrialJudge()
+    latent = _make_latent(
+        true_effect_size=2.0,
+        patients_enrolled=1000,
+        budget_remaining=500_000.0,
+        episode_phase="analysis",
+        interim_complete=True,
+        trial_complete=True,
+    )
+    action = _make_action(action_type=ActionType.RUN_PRIMARY_ANALYSIS)
+    result = judge.verify(action, _make_state(difficulty=0.2), latent)
+    if result.passed:
+        assert result.hint is not None
+# ---------------------------------------------------------------------------
+# JudgeResult model
+# ---------------------------------------------------------------------------
+def test_judge_result_is_pydantic_model():
+    result = JudgeResult(
+        passed=True,
+        violations=[],
+        feedback="ok",
+        hint=None,
+        overconfidence_penalty=0.0,
+        persona="senior",
+    )
+    assert result.passed is True
+    assert result.persona == "senior"
+# ---------------------------------------------------------------------------
+# Req 10.4: no unhandled exceptions
+# ---------------------------------------------------------------------------
+@pytest.mark.parametrize(
+    "action_type",
+    list(ActionType),
+)
+def test_no_exception_for_any_action_type(action_type):
+    """TrialJudge.verify must never raise for any valid action type (req 10.4)."""
+    judge = TrialJudge()
+    latent = _make_latent()
+    state = _make_state()
+    action = TrialAction(
+        action_type=action_type,
+        parameters={},
+        justification="test",
+        confidence=0.5,
+    )
+    # Must not raise
+    result = judge.verify(action, state, latent)
+    assert isinstance(result, JudgeResult)

tests/test_noise_model.py CHANGED Viewed

@@ -44,16 +44,12 @@ class TestNoiseModelIdempotence:
         r2 = NoiseModel(seed=42).randomize(base_scenario)
         assert r1.time_budget_days == r2.time_budget_days
-    def test_same_seed_same_dropout_range(
-        self, base_scenario: ScenarioConfig
-    ) -> None:
         r1 = NoiseModel(seed=42).randomize(base_scenario)
         r2 = NoiseModel(seed=42).randomize(base_scenario)
         assert r1.dropout_rate_range == r2.dropout_rate_range
-    def test_same_seed_same_placebo_range(
-        self, base_scenario: ScenarioConfig
-    ) -> None:
         r1 = NoiseModel(seed=42).randomize(base_scenario)
         r2 = NoiseModel(seed=42).randomize(base_scenario)
         assert r1.placebo_response_range == r2.placebo_response_range
@@ -114,9 +110,7 @@ class TestNoiseModelRanges:
         assert result.side_effect_rate_range == base_scenario.side_effect_rate_range
         assert result.min_sample_size == base_scenario.min_sample_size
-    def test_time_budget_at_least_one_day(
-        self, base_scenario: ScenarioConfig
-    ) -> None:
         for seed in range(50):
             result = NoiseModel(seed=seed).randomize(base_scenario)
             assert result.time_budget_days >= 1

         r2 = NoiseModel(seed=42).randomize(base_scenario)
         assert r1.time_budget_days == r2.time_budget_days
+    def test_same_seed_same_dropout_range(self, base_scenario: ScenarioConfig) -> None:
         r1 = NoiseModel(seed=42).randomize(base_scenario)
         r2 = NoiseModel(seed=42).randomize(base_scenario)
         assert r1.dropout_rate_range == r2.dropout_rate_range
+    def test_same_seed_same_placebo_range(self, base_scenario: ScenarioConfig) -> None:
         r1 = NoiseModel(seed=42).randomize(base_scenario)
         r2 = NoiseModel(seed=42).randomize(base_scenario)
         assert r1.placebo_response_range == r2.placebo_response_range
         assert result.side_effect_rate_range == base_scenario.side_effect_rate_range
         assert result.min_sample_size == base_scenario.min_sample_size
+    def test_time_budget_at_least_one_day(self, base_scenario: ScenarioConfig) -> None:
         for seed in range(50):
             result = NoiseModel(seed=seed).randomize(base_scenario)
             assert result.time_budget_days >= 1

tests/test_output_generator.py ADDED Viewed

	@@ -0,0 +1,479 @@

+"""
+Tests for OutputGenerator — noisy TrialObservation generation (Task 15).
+Requirements 9.1, 9.2, 9.3, 9.4:
+  - OutputGenerator produces a TrialObservation from a TrialLatentState
+  - Agent never sees raw hidden values (noise is always injected)
+  - phase_data, resource_status, available_actions are correctly populated
+  - Measurement noise and site variability are applied via NoiseModel
+"""
+from __future__ import annotations
+import pytest
+from models import ActionType, TrialLatentState, TrialState
+from server.noise_model import NoiseModel
+from server.simulator.output_generator import OutputGenerator
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+@pytest.fixture()
+def base_latent() -> TrialLatentState:
+    """A minimal TrialLatentState for testing."""
+    return TrialLatentState(
+        true_effect_size=0.5,
+        true_side_effect_rate=0.10,
+        true_responder_population="BRCA1+",
+        true_responder_criteria=["BRCA1+", "age < 65"],
+        true_dose_response={10.0: 0.2, 20.0: 0.4, 40.0: 0.7},
+        true_mechanism="PARP inhibition",
+        placebo_response_rate=0.15,
+        dropout_rate=0.08,
+        site_variability=0.05,
+        measurement_noise=0.05,
+        budget_remaining=500_000.0,
+        time_remaining_days=200,
+        patients_enrolled=0,
+        phase_i_complete=False,
+        mtd_identified=False,
+        effect_estimated=False,
+        protocol_submitted=False,
+        interim_complete=False,
+        trial_complete=False,
+        adverse_events=0,
+        episode_phase="design",
+        action_history=[],
+        seed=42,
+    )
+@pytest.fixture()
+def trial_state() -> TrialState:
+    return TrialState(
+        episode_id="ep-001",
+        step_count=1,
+        difficulty=0.5,
+        scenario_id="solid_tumor_chemo",
+        curriculum_tier="tier_0",
+        curriculum_stats={},
+        action_diversity=0.0,
+        phase_compliance_rate=1.0,
+        is_resolved=False,
+    )
+@pytest.fixture()
+def generator() -> OutputGenerator:
+    return OutputGenerator(noise_model=NoiseModel(seed=42))
+def _make_obs(generator, latent, trial_state, **kwargs):
+    defaults = dict(
+        steps_taken=1,
+        max_steps=20,
+        rule_violations=[],
+        done=False,
+        reward=0.0,
+        scenario_description="Test scenario",
+        hint="",
+    )
+    defaults.update(kwargs)
+    return generator.generate(latent, trial_state, **defaults)
+# ---------------------------------------------------------------------------
+# Basic structure tests
+# ---------------------------------------------------------------------------
+class TestObservationStructure:
+    """TrialObservation has all required fields populated."""
+    def test_returns_trial_observation(self, generator, base_latent, trial_state):
+        from models import TrialObservation
+        obs = _make_obs(generator, base_latent, trial_state)
+        assert isinstance(obs, TrialObservation)
+    def test_scenario_description_passed_through(
+        self, generator, base_latent, trial_state
+    ):
+        obs = _make_obs(
+            generator, base_latent, trial_state, scenario_description="My scenario"
+        )
+        assert obs.scenario_description == "My scenario"
+    def test_steps_taken_and_max_steps(self, generator, base_latent, trial_state):
+        obs = _make_obs(
+            generator, base_latent, trial_state, steps_taken=5, max_steps=30
+        )
+        assert obs.steps_taken == 5
+        assert obs.max_steps == 30
+    def test_done_and_reward_passed_through(self, generator, base_latent, trial_state):
+        obs = _make_obs(generator, base_latent, trial_state, done=True, reward=1.5)
+        assert obs.done is True
+        assert obs.reward == 1.5
+    def test_rule_violations_passed_through(self, generator, base_latent, trial_state):
+        violations = ["violation A", "violation B"]
+        obs = _make_obs(generator, base_latent, trial_state, rule_violations=violations)
+        assert obs.rule_violations == violations
+    def test_hint_passed_through(self, generator, base_latent, trial_state):
+        obs = _make_obs(generator, base_latent, trial_state, hint="Try Phase I first")
+        assert obs.hint == "Try Phase I first"
+# ---------------------------------------------------------------------------
+# resource_status tests
+# ---------------------------------------------------------------------------
+class TestResourceStatus:
+    """resource_status reflects latent state resource fields."""
+    def test_budget_remaining(self, generator, base_latent, trial_state):
+        obs = _make_obs(generator, base_latent, trial_state)
+        assert obs.resource_status["budget_remaining"] == base_latent.budget_remaining
+    def test_time_remaining_days(self, generator, base_latent, trial_state):
+        obs = _make_obs(generator, base_latent, trial_state)
+        assert (
+            obs.resource_status["time_remaining_days"]
+            == base_latent.time_remaining_days
+        )
+    def test_patients_enrolled(self, generator, base_latent, trial_state):
+        latent = base_latent.model_copy(update={"patients_enrolled": 50})
+        obs = _make_obs(generator, latent, trial_state)
+        assert obs.resource_status["patients_enrolled"] == 50
+    def test_resource_status_has_three_keys(self, generator, base_latent, trial_state):
+        obs = _make_obs(generator, base_latent, trial_state)
+        assert set(obs.resource_status.keys()) == {
+            "budget_remaining",
+            "time_remaining_days",
+            "patients_enrolled",
+        }
+# ---------------------------------------------------------------------------
+# phase_data tests — noise injection
+# ---------------------------------------------------------------------------
+class TestPhaseDataNoiseInjection:
+    """Agent never sees raw hidden values — noise is always injected."""
+    def test_true_effect_size_not_in_phase_data(
+        self, generator, base_latent, trial_state
+    ):
+        """Raw true_effect_size must never appear directly in phase_data."""
+        latent = base_latent.model_copy(update={"effect_estimated": True})
+        obs = _make_obs(generator, latent, trial_state)
+        # observed_effect_size should differ from true value (noise injected)
+        # We can't guarantee they differ by chance, but the key should be present
+        assert "observed_effect_size" in obs.phase_data
+    def test_effect_size_not_exposed_before_estimation(
+        self, generator, base_latent, trial_state
+    ):
+        """observed_effect_size should not appear before ESTIMATE_EFFECT_SIZE."""
+        obs = _make_obs(generator, base_latent, trial_state)
+        assert "observed_effect_size" not in obs.phase_data
+    def test_effect_size_exposed_after_estimation(
+        self, generator, base_latent, trial_state
+    ):
+        latent = base_latent.model_copy(update={"effect_estimated": True})
+        obs = _make_obs(generator, latent, trial_state)
+        assert "observed_effect_size" in obs.phase_data
+        assert "effect_size_ci" in obs.phase_data
+    def test_ae_rate_not_exposed_before_phase_i(
+        self, generator, base_latent, trial_state
+    ):
+        """Adverse event rate should not appear before Phase I or safety signal."""
+        obs = _make_obs(generator, base_latent, trial_state)
+        assert "observed_adverse_event_rate" not in obs.phase_data
+    def test_ae_rate_exposed_after_phase_i(self, generator, base_latent, trial_state):
+        latent = base_latent.model_copy(update={"phase_i_complete": True})
+        obs = _make_obs(generator, latent, trial_state)
+        assert "observed_adverse_event_rate" in obs.phase_data
+    def test_ae_rate_exposed_after_safety_signal(
+        self, generator, base_latent, trial_state
+    ):
+        latent = base_latent.model_copy(
+            update={"action_history": [ActionType.OBSERVE_SAFETY_SIGNAL.value]}
+        )
+        obs = _make_obs(generator, latent, trial_state)
+        assert "observed_adverse_event_rate" in obs.phase_data
+    def test_ae_rate_is_clipped_to_0_1(self, generator, base_latent, trial_state):
+        latent = base_latent.model_copy(
+            update={"phase_i_complete": True, "true_side_effect_rate": 0.99}
+        )
+        obs = _make_obs(generator, latent, trial_state)
+        rate = obs.phase_data["observed_adverse_event_rate"]
+        assert 0.0 <= rate <= 1.0
+    def test_placebo_response_not_exposed_before_interim(
+        self, generator, base_latent, trial_state
+    ):
+        obs = _make_obs(generator, base_latent, trial_state)
+        assert "observed_placebo_response" not in obs.phase_data
+    def test_placebo_response_exposed_after_interim(
+        self, generator, base_latent, trial_state
+    ):
+        latent = base_latent.model_copy(update={"interim_complete": True})
+        obs = _make_obs(generator, latent, trial_state)
+        assert "observed_placebo_response" in obs.phase_data
+    def test_dose_response_not_exposed_before_phase_i(
+        self, generator, base_latent, trial_state
+    ):
+        obs = _make_obs(generator, base_latent, trial_state)
+        assert "observed_dose_response" not in obs.phase_data
+    def test_dose_response_exposed_after_phase_i(
+        self, generator, base_latent, trial_state
+    ):
+        latent = base_latent.model_copy(update={"phase_i_complete": True})
+        obs = _make_obs(generator, latent, trial_state)
+        assert "observed_dose_response" in obs.phase_data
+        # All dose-response values should be clipped to [0, 1]
+        for v in obs.phase_data["observed_dose_response"].values():
+            assert 0.0 <= v <= 1.0
+    def test_dropout_rate_not_exposed_before_enrollment(
+        self, generator, base_latent, trial_state
+    ):
+        obs = _make_obs(generator, base_latent, trial_state)
+        assert "observed_dropout_rate" not in obs.phase_data
+    def test_dropout_rate_exposed_after_enrollment(
+        self, generator, base_latent, trial_state
+    ):
+        latent = base_latent.model_copy(update={"patients_enrolled": 10})
+        obs = _make_obs(generator, latent, trial_state)
+        assert "observed_dropout_rate" in obs.phase_data
+    def test_responder_population_hint_not_exposed_without_biomarker(
+        self, generator, base_latent, trial_state
+    ):
+        obs = _make_obs(generator, base_latent, trial_state)
+        assert "responder_population_hint" not in obs.phase_data
+    def test_responder_population_hint_exposed_after_biomarker(
+        self, generator, base_latent, trial_state
+    ):
+        latent = base_latent.model_copy(
+            update={"action_history": [ActionType.ADD_BIOMARKER_STRATIFICATION.value]}
+        )
+        obs = _make_obs(generator, latent, trial_state)
+        assert "responder_population_hint" in obs.phase_data
+        # Population label is revealed but NOT the true criteria
+        assert obs.phase_data["responder_population_hint"] == "BRCA1+"
+        assert "true_responder_criteria" not in obs.phase_data
+    def test_milestone_flags_in_phase_data(self, generator, base_latent, trial_state):
+        """Milestone flags are observable (not hidden values)."""
+        obs = _make_obs(generator, base_latent, trial_state)
+        assert "phase_i_complete" in obs.phase_data
+        assert "mtd_identified" in obs.phase_data
+        assert "effect_estimated" in obs.phase_data
+        assert "protocol_submitted" in obs.phase_data
+        assert "interim_complete" in obs.phase_data
+        assert "trial_complete" in obs.phase_data
+    def test_true_mechanism_not_in_phase_data(
+        self, generator, base_latent, trial_state
+    ):
+        """true_mechanism is a hidden value and must never appear in phase_data."""
+        obs = _make_obs(generator, base_latent, trial_state)
+        assert "true_mechanism" not in obs.phase_data
+    def test_true_responder_criteria_not_in_phase_data(
+        self, generator, base_latent, trial_state
+    ):
+        """true_responder_criteria is hidden and must never appear in phase_data."""
+        obs = _make_obs(generator, base_latent, trial_state)
+        assert "true_responder_criteria" not in obs.phase_data
+# ---------------------------------------------------------------------------
+# available_actions tests
+# ---------------------------------------------------------------------------
+class TestAvailableActions:
+    """available_actions reflects phase-permitted actions filtered by prerequisites."""
+    def test_available_actions_is_list_of_strings(
+        self, generator, base_latent, trial_state
+    ):
+        obs = _make_obs(generator, base_latent, trial_state)
+        assert isinstance(obs.available_actions, list)
+        assert all(isinstance(a, str) for a in obs.available_actions)
+    def test_design_phase_actions(self, generator, base_latent, trial_state):
+        """In design phase with empty history, basic design actions are available."""
+        obs = _make_obs(generator, base_latent, trial_state)
+        # SET_SAMPLE_SIZE, SET_INCLUSION_CRITERIA, SET_EXCLUSION_CRITERIA should be available
+        assert ActionType.SET_SAMPLE_SIZE.value in obs.available_actions
+        assert ActionType.SET_INCLUSION_CRITERIA.value in obs.available_actions
+        assert ActionType.SET_EXCLUSION_CRITERIA.value in obs.available_actions
+    def test_dosing_schedule_requires_primary_endpoint(
+        self, generator, base_latent, trial_state
+    ):
+        """SET_DOSING_SCHEDULE requires SET_PRIMARY_ENDPOINT in history."""
+        obs = _make_obs(generator, base_latent, trial_state)
+        # Without SET_PRIMARY_ENDPOINT in history, SET_DOSING_SCHEDULE should not be available
+        assert ActionType.SET_DOSING_SCHEDULE.value not in obs.available_actions
+    def test_dosing_schedule_available_after_primary_endpoint(
+        self, generator, base_latent, trial_state
+    ):
+        latent = base_latent.model_copy(
+            update={"action_history": [ActionType.SET_PRIMARY_ENDPOINT.value]}
+        )
+        obs = _make_obs(generator, latent, trial_state)
+        assert ActionType.SET_DOSING_SCHEDULE.value in obs.available_actions
+    def test_synthesize_conclusion_requires_trial_complete(
+        self, generator, base_latent, trial_state
+    ):
+        latent = base_latent.model_copy(
+            update={"episode_phase": "submission", "trial_complete": False}
+        )
+        obs = _make_obs(generator, latent, trial_state)
+        assert ActionType.SYNTHESIZE_CONCLUSION.value not in obs.available_actions
+    def test_synthesize_conclusion_available_when_trial_complete(
+        self, generator, base_latent, trial_state
+    ):
+        latent = base_latent.model_copy(
+            update={"episode_phase": "submission", "trial_complete": True}
+        )
+        obs = _make_obs(generator, latent, trial_state)
+        assert ActionType.SYNTHESIZE_CONCLUSION.value in obs.available_actions
+    def test_run_interim_analysis_requires_patients(
+        self, generator, base_latent, trial_state
+    ):
+        latent = base_latent.model_copy(
+            update={"episode_phase": "monitoring", "patients_enrolled": 0}
+        )
+        obs = _make_obs(generator, latent, trial_state)
+        assert ActionType.RUN_INTERIM_ANALYSIS.value not in obs.available_actions
+    def test_run_interim_analysis_available_with_patients(
+        self, generator, base_latent, trial_state
+    ):
+        latent = base_latent.model_copy(
+            update={"episode_phase": "monitoring", "patients_enrolled": 50}
+        )
+        obs = _make_obs(generator, latent, trial_state)
+        assert ActionType.RUN_INTERIM_ANALYSIS.value in obs.available_actions
+    def test_run_primary_analysis_requires_interim_complete(
+        self, generator, base_latent, trial_state
+    ):
+        latent = base_latent.model_copy(
+            update={"episode_phase": "analysis", "interim_complete": False}
+        )
+        obs = _make_obs(generator, latent, trial_state)
+        assert ActionType.RUN_PRIMARY_ANALYSIS.value not in obs.available_actions
+    def test_run_primary_analysis_available_after_interim(
+        self, generator, base_latent, trial_state
+    ):
+        latent = base_latent.model_copy(
+            update={"episode_phase": "analysis", "interim_complete": True}
+        )
+        obs = _make_obs(generator, latent, trial_state)
+        assert ActionType.RUN_PRIMARY_ANALYSIS.value in obs.available_actions
+    def test_unknown_phase_returns_empty_actions(
+        self, generator, base_latent, trial_state
+    ):
+        latent = base_latent.model_copy(update={"episode_phase": "unknown_phase"})
+        obs = _make_obs(generator, latent, trial_state)
+        assert obs.available_actions == []
+# ---------------------------------------------------------------------------
+# Determinism tests
+# ---------------------------------------------------------------------------
+class TestDeterminism:
+    """Same seed + same latent state → same observation (requirement 9.2)."""
+    def test_same_seed_same_observed_effect(self, base_latent, trial_state):
+        latent = base_latent.model_copy(update={"effect_estimated": True})
+        obs1 = OutputGenerator(NoiseModel(seed=99)).generate(
+            latent,
+            trial_state,
+            steps_taken=1,
+            max_steps=20,
+            rule_violations=[],
+            done=False,
+            reward=0.0,
+            scenario_description="S",
+            hint="",
+        )
+        obs2 = OutputGenerator(NoiseModel(seed=99)).generate(
+            latent,
+            trial_state,
+            steps_taken=1,
+            max_steps=20,
+            rule_violations=[],
+            done=False,
+            reward=0.0,
+            scenario_description="S",
+            hint="",
+        )
+        assert (
+            obs1.phase_data["observed_effect_size"]
+            == obs2.phase_data["observed_effect_size"]
+        )
+    def test_different_seeds_different_observed_effect(self, base_latent, trial_state):
+        latent = base_latent.model_copy(update={"effect_estimated": True})
+        obs1 = OutputGenerator(NoiseModel(seed=1)).generate(
+            latent,
+            trial_state,
+            steps_taken=1,
+            max_steps=20,
+            rule_violations=[],
+            done=False,
+            reward=0.0,
+            scenario_description="S",
+            hint="",
+        )
+        obs2 = OutputGenerator(NoiseModel(seed=2)).generate(
+            latent,
+            trial_state,
+            steps_taken=1,
+            max_steps=20,
+            rule_violations=[],
+            done=False,
+            reward=0.0,
+            scenario_description="S",
+            hint="",
+        )
+        # Different seeds should (almost certainly) produce different noisy values
+        assert (
+            obs1.phase_data["observed_effect_size"]
+            != obs2.phase_data["observed_effect_size"]
+        )

tests/test_phase_detector.py ADDED Viewed

	@@ -0,0 +1,207 @@

+"""
+Tests for server/phase_detector.py
+Validates Requirements 8.5 and 9.4:
+  - detect_phase classifies actions into correct clinical workflow phases
+  - phase_order_correct is True for valid transitions, False for regressions/skips
+  - compute_phase_ordering_reward returns correct bonus/penalty values
+"""
+from __future__ import annotations
+import pytest
+from models import ActionType, TrialAction
+from server.phase_detector import (
+    PHASE_BONUS,
+    PHASE_ORDER,
+    PHASE_SKIP_PENALTY,
+    compute_phase_ordering_reward,
+    detect_phase,
+)
+def _action(action_type: ActionType) -> TrialAction:
+    return TrialAction(
+        action_type=action_type,
+        parameters={},
+        justification="test",
+        confidence=0.5,
+    )
+# ---------------------------------------------------------------------------
+# Phase mapping tests
+# ---------------------------------------------------------------------------
+class TestPhaseMapping:
+    def test_hypothesis_actions(self):
+        for at in [
+            ActionType.ESTIMATE_EFFECT_SIZE,
+            ActionType.ADD_BIOMARKER_STRATIFICATION,
+        ]:
+            phase, _ = detect_phase(_action(at), [])
+            assert phase == "hypothesis", f"{at} should map to hypothesis"
+    def test_design_actions(self):
+        design_actions = [
+            ActionType.SET_PRIMARY_ENDPOINT,
+            ActionType.SET_SAMPLE_SIZE,
+            ActionType.SET_INCLUSION_CRITERIA,
+            ActionType.SET_EXCLUSION_CRITERIA,
+            ActionType.SET_DOSING_SCHEDULE,
+            ActionType.SET_CONTROL_ARM,
+            ActionType.SET_RANDOMIZATION_RATIO,
+            ActionType.SET_BLINDING,
+            ActionType.REQUEST_PROTOCOL_AMENDMENT,
+        ]
+        for at in design_actions:
+            phase, _ = detect_phase(_action(at), [])
+            assert phase == "design", f"{at} should map to design"
+    def test_enrollment_action(self):
+        phase, _ = detect_phase(_action(ActionType.ENROLL_PATIENTS), [])
+        assert phase == "enrollment"
+    def test_monitoring_actions(self):
+        monitoring_actions = [
+            ActionType.RUN_DOSE_ESCALATION,
+            ActionType.OBSERVE_SAFETY_SIGNAL,
+            ActionType.RUN_INTERIM_ANALYSIS,
+            ActionType.MODIFY_SAMPLE_SIZE,
+        ]
+        for at in monitoring_actions:
+            phase, _ = detect_phase(_action(at), [])
+            assert phase == "monitoring", f"{at} should map to monitoring"
+    def test_analysis_actions(self):
+        for at in [ActionType.RUN_PRIMARY_ANALYSIS, ActionType.SYNTHESIZE_CONCLUSION]:
+            phase, _ = detect_phase(_action(at), [])
+            assert phase == "analysis", f"{at} should map to analysis"
+    def test_submission_action(self):
+        phase, _ = detect_phase(_action(ActionType.SUBMIT_TO_FDA_REVIEW), [])
+        assert phase == "submission"
+# ---------------------------------------------------------------------------
+# Phase order correctness tests
+# ---------------------------------------------------------------------------
+class TestPhaseOrderCorrectness:
+    def test_empty_history_always_correct(self):
+        for at in ActionType:
+            _, correct = detect_phase(_action(at), [])
+            assert correct is True, f"Empty history should always be correct for {at}"
+    def test_same_phase_is_correct(self):
+        _, correct = detect_phase(_action(ActionType.SET_SAMPLE_SIZE), ["design"])
+        assert correct is True
+    def test_advance_one_phase_is_correct(self):
+        _, correct = detect_phase(_action(ActionType.ENROLL_PATIENTS), ["design"])
+        assert correct is True
+    def test_regression_is_incorrect(self):
+        # Going from enrollment back to design
+        _, correct = detect_phase(_action(ActionType.SET_SAMPLE_SIZE), ["enrollment"])
+        assert correct is False
+    def test_skip_one_phase_is_incorrect(self):
+        # Jumping from hypothesis to enrollment (skipping design)
+        _, correct = detect_phase(_action(ActionType.ENROLL_PATIENTS), ["hypothesis"])
+        assert correct is False
+    def test_skip_multiple_phases_is_incorrect(self):
+        # Jumping from design to analysis (skipping enrollment + monitoring)
+        _, correct = detect_phase(_action(ActionType.RUN_PRIMARY_ANALYSIS), ["design"])
+        assert correct is False
+    def test_valid_full_sequence(self):
+        """Walk through the full phase sequence and verify all transitions are correct."""
+        history: list[str] = []
+        sequence = [
+            ActionType.ESTIMATE_EFFECT_SIZE,  # hypothesis
+            ActionType.SET_PRIMARY_ENDPOINT,  # design
+            ActionType.ENROLL_PATIENTS,  # enrollment
+            ActionType.RUN_DOSE_ESCALATION,  # monitoring
+            ActionType.RUN_PRIMARY_ANALYSIS,  # analysis
+            ActionType.SUBMIT_TO_FDA_REVIEW,  # submission
+        ]
+        for at in sequence:
+            phase, correct = detect_phase(_action(at), history)
+            assert correct is True, (
+                f"Expected correct order for {at} with history {history}"
+            )
+            history.append(phase)
+# ---------------------------------------------------------------------------
+# PHASE_ORDER constant
+# ---------------------------------------------------------------------------
+class TestPhaseOrderConstant:
+    def test_phase_order_has_seven_phases(self):
+        assert len(PHASE_ORDER) == 7
+    def test_phase_order_sequence(self):
+        assert PHASE_ORDER == [
+            "literature_review",
+            "hypothesis",
+            "design",
+            "enrollment",
+            "monitoring",
+            "analysis",
+            "submission",
+        ]
+# ---------------------------------------------------------------------------
+# compute_phase_ordering_reward tests
+# ---------------------------------------------------------------------------
+class TestComputePhaseOrderingReward:
+    def test_empty_history_returns_bonus(self):
+        reward = compute_phase_ordering_reward(_action(ActionType.SET_SAMPLE_SIZE), [])
+        assert reward == PHASE_BONUS
+    def test_correct_advance_returns_bonus(self):
+        reward = compute_phase_ordering_reward(
+            _action(ActionType.ENROLL_PATIENTS), ["design"]
+        )
+        assert reward == PHASE_BONUS
+    def test_same_phase_returns_bonus(self):
+        reward = compute_phase_ordering_reward(
+            _action(ActionType.SET_SAMPLE_SIZE), ["design"]
+        )
+        assert reward == PHASE_BONUS
+    def test_regression_returns_zero(self):
+        reward = compute_phase_ordering_reward(
+            _action(ActionType.SET_SAMPLE_SIZE), ["enrollment"]
+        )
+        assert reward == 0.0
+    def test_skip_one_phase_returns_single_penalty(self):
+        # hypothesis → enrollment skips design (1 skip)
+        reward = compute_phase_ordering_reward(
+            _action(ActionType.ENROLL_PATIENTS), ["hypothesis"]
+        )
+        assert reward == pytest.approx(PHASE_SKIP_PENALTY * 1)
+    def test_skip_two_phases_returns_double_penalty(self):
+        # design → monitoring skips enrollment (1 skip)
+        # design → analysis skips enrollment + monitoring (2 skips)
+        reward = compute_phase_ordering_reward(
+            _action(ActionType.RUN_PRIMARY_ANALYSIS), ["design"]
+        )
+        assert reward == pytest.approx(PHASE_SKIP_PENALTY * 2)
+    def test_constants_values(self):
+        assert PHASE_BONUS == 0.2
+        assert PHASE_SKIP_PENALTY == -0.3