| import os |
| from uuid import uuid4 |
|
|
| import numpy as np |
| from openenv.core.env_server.interfaces import Environment |
| from openenv.core.env_server.types import State |
|
|
| try: |
| from ..core.b_cell import BCellAgent |
| from ..core.reward import compute_reward |
| from ..core.t_cell import TCellAgent |
| from ..core.tumor import Tumor |
| from ..data.sampler import get_random_episode_params, sample_tumor_params |
| from ..models import B_CELL_ACTIONS, T_CELL_ACTIONS, TICEAction, TICEObservation |
| except ImportError: |
| from core.b_cell import BCellAgent |
| from core.reward import compute_reward |
| from core.t_cell import TCellAgent |
| from core.tumor import Tumor |
| from data.sampler import get_random_episode_params, sample_tumor_params |
| from models import B_CELL_ACTIONS, T_CELL_ACTIONS, TICEAction, TICEObservation |
|
|
|
|
| TREND_THRESHOLD = 0.02 |
| DETECTION_NOISE_STD = 0.03 |
| EARLY_PHASE_END = 15 |
| MID_PHASE_END = 35 |
| EFFECTIVENESS_HIGH_THRESHOLD = 0.6 |
| EFFECTIVENESS_MEDIUM_THRESHOLD = 0.3 |
| RESOURCE_ABUNDANT_THRESHOLD = 0.6 |
| RESOURCE_MODERATE_THRESHOLD = 0.3 |
| STRONG_RESPONSE_THRESHOLD = 0.08 |
| WEAK_RESPONSE_THRESHOLD = 0.03 |
| INITIAL_TUMOR_SIZE = 0.3 |
| DEFAULT_LAST_T_RESULT = { |
| "effective_damage": 0.0, |
| "tissue_damage": 0.0, |
| "base_damage": 0.0, |
| } |
|
|
|
|
| class TICEEnvironment(Environment): |
| SUPPORTS_CONCURRENT_SESSIONS: bool = True |
|
|
| def __init__( |
| self, |
| archetype: str | None = None, |
| difficulty: str | None = None, |
| max_steps: int = 50, |
| ): |
| self.archetype = archetype or os.getenv("TICE_ARCHETYPE") |
| self.difficulty = difficulty or os.getenv("TICE_DIFFICULTY") |
| self.max_steps = int(max_steps) |
| self.tumor = Tumor() |
| self.b_cell = BCellAgent() |
| self.t_cell = TCellAgent() |
| self._state = State(episode_id=str(uuid4()), step_count=0) |
| self._prev_tumor_size = INITIAL_TUMOR_SIZE |
| self._last_t_result = dict(DEFAULT_LAST_T_RESULT) |
| self._current_archetype = "immune_hot" |
| self._current_difficulty = "medium" |
|
|
| def reset( |
| self, |
| archetype: str | None = None, |
| difficulty: str | None = None, |
| **_: dict, |
| ) -> TICEObservation: |
| self._state = State(episode_id=str(uuid4()), step_count=0) |
| selected_archetype = archetype or self.archetype |
| selected_difficulty = difficulty or self.difficulty |
|
|
| if selected_archetype and selected_difficulty: |
| params = sample_tumor_params(selected_archetype, selected_difficulty) |
| else: |
| params = get_random_episode_params() |
|
|
| self._current_archetype = params["archetype"] |
| self._current_difficulty = params["difficulty"] |
| self.tumor.reset(params) |
| self.b_cell.reset() |
| self.t_cell.reset() |
| self._prev_tumor_size = INITIAL_TUMOR_SIZE |
| self._last_t_result = dict(DEFAULT_LAST_T_RESULT) |
|
|
| return self._make_observation( |
| reward=0.0, |
| feedback="Episode started.", |
| done=False, |
| ) |
|
|
| def step(self, action: TICEAction) -> TICEObservation: |
| self._state.step_count += 1 |
|
|
| b_action = action.b_cell_action |
| t_action = action.t_cell_action |
|
|
| |
| |
| if b_action not in B_CELL_ACTIONS: |
| b_action = "MAINTAIN" |
| if t_action not in T_CELL_ACTIONS: |
| t_action = "ATTACK_LOW" |
|
|
| prev_size = float(self.tumor.tumor_size) |
| t_cell_pressure = float( |
| np.clip( |
| (self.t_cell.fatigue * 0.3) + (self.b_cell.detection_level * 0.3), |
| 0.0, |
| 1.0, |
| ) |
| ) |
|
|
| |
| |
| self.tumor.step(t_cell_pressure) |
|
|
| t_result = self.t_cell.step( |
| t_action, |
| self.b_cell.detection_level, |
| self.tumor.pdl1_suppression, |
| self.tumor.resistance, |
| ) |
| self.b_cell.step(b_action) |
| self._last_t_result = t_result |
|
|
| self.tumor.tumor_size = float( |
| np.clip(self.tumor.tumor_size - t_result["effective_damage"], 0.0, 1.0) |
| ) |
| self._prev_tumor_size = prev_size |
|
|
| is_eradicated = self.tumor.is_eradicated() |
| is_escaped = self.tumor.is_escaped() |
| is_timeout = self.tumor.timestep >= self.max_steps |
| done = bool(is_eradicated or is_escaped or is_timeout) |
|
|
| reward = compute_reward( |
| prev_size, |
| self.tumor.tumor_size, |
| t_result, |
| self.b_cell.get_state(), |
| self.t_cell.get_state(), |
| is_eradicated, |
| is_escaped, |
| ) |
|
|
| if is_eradicated: |
| feedback = "VICTORY: Tumor eradicated." |
| elif is_escaped: |
| feedback = "DEFEAT: Tumor escaped." |
| elif is_timeout: |
| feedback = "TIMEOUT: Episode limit reached." |
| else: |
| feedback = ( |
| f"Tumor: {self.tumor.tumor_size:.3f} | " |
| f"Damage: {t_result['effective_damage']:.3f} | " |
| f"Reward: {reward:+.3f}" |
| ) |
|
|
| return self._make_observation(reward=reward, feedback=feedback, done=done) |
|
|
| @property |
| def state(self) -> State: |
| return self._state |
|
|
| def _make_observation( |
| self, |
| reward: float, |
| feedback: str, |
| done: bool, |
| ) -> TICEObservation: |
| delta = self.tumor.tumor_size - self._prev_tumor_size |
| if delta > TREND_THRESHOLD: |
| trend = "increasing" |
| elif delta < -TREND_THRESHOLD: |
| trend = "decreasing" |
| else: |
| trend = "stable" |
|
|
| raw_detection = self.b_cell.detection_level * (1.0 - self.tumor.stealth_level) |
| detection = float( |
| np.clip(raw_detection + np.random.normal(0.0, DETECTION_NOISE_STD), 0.0, 1.0) |
| ) |
| |
| |
|
|
| effectiveness_score = (1.0 - self.tumor.pdl1_suppression) * (1.0 - self.t_cell.fatigue) |
| if effectiveness_score > EFFECTIVENESS_HIGH_THRESHOLD: |
| effectiveness = "high" |
| elif effectiveness_score > EFFECTIVENESS_MEDIUM_THRESHOLD: |
| effectiveness = "medium" |
| else: |
| effectiveness = "low" |
|
|
| average_energy = (self.b_cell.energy + self.t_cell.energy) / 2.0 |
| if average_energy > RESOURCE_ABUNDANT_THRESHOLD: |
| resource_level = "abundant" |
| elif average_energy > RESOURCE_MODERATE_THRESHOLD: |
| resource_level = "moderate" |
| else: |
| resource_level = "scarce" |
|
|
| last_damage = self._last_t_result["effective_damage"] |
| if last_damage > STRONG_RESPONSE_THRESHOLD: |
| recent_outcome = "strong_response" |
| elif last_damage > WEAK_RESPONSE_THRESHOLD: |
| recent_outcome = "weak_response" |
| else: |
| recent_outcome = "no_effect" |
|
|
| timestep = int(self.tumor.timestep) |
| if timestep < EARLY_PHASE_END: |
| episode_phase = "early" |
| elif timestep < MID_PHASE_END: |
| episode_phase = "mid" |
| else: |
| episode_phase = "late" |
|
|
| return TICEObservation( |
| tumor_trend=trend, |
| detection_signal=round(detection, 2), |
| t_cell_effectiveness=effectiveness, |
| resource_level=resource_level, |
| b_cell_fatigue=round(self.b_cell.fatigue, 2), |
| t_cell_fatigue=round(self.t_cell.fatigue, 2), |
| recent_outcome=recent_outcome, |
| timestep=timestep, |
| episode_phase=episode_phase, |
| archetype=self._current_archetype, |
| difficulty=self._current_difficulty, |
| feedback=feedback, |
| done=done, |
| reward=reward, |
| ) |
|
|
|
|
| TiceEnvironment = TICEEnvironment |
|
|