import os
from uuid import uuid4

import numpy as np
from openenv.core.env_server.interfaces import Environment
from openenv.core.env_server.types import State

try:
    from ..core.b_cell import BCellAgent
    from ..core.reward import compute_reward
    from ..core.t_cell import TCellAgent
    from ..core.tumor import Tumor
    from ..data.sampler import get_random_episode_params, sample_tumor_params
    from ..models import B_CELL_ACTIONS, T_CELL_ACTIONS, TICEAction, TICEObservation
except ImportError:
    from core.b_cell import BCellAgent
    from core.reward import compute_reward
    from core.t_cell import TCellAgent
    from core.tumor import Tumor
    from data.sampler import get_random_episode_params, sample_tumor_params
    from models import B_CELL_ACTIONS, T_CELL_ACTIONS, TICEAction, TICEObservation


TREND_THRESHOLD = 0.02
DETECTION_NOISE_STD = 0.03
EARLY_PHASE_END = 15
MID_PHASE_END = 35
EFFECTIVENESS_HIGH_THRESHOLD = 0.6
EFFECTIVENESS_MEDIUM_THRESHOLD = 0.3
RESOURCE_ABUNDANT_THRESHOLD = 0.6
RESOURCE_MODERATE_THRESHOLD = 0.3
STRONG_RESPONSE_THRESHOLD = 0.08
WEAK_RESPONSE_THRESHOLD = 0.03
INITIAL_TUMOR_SIZE = 0.3
DEFAULT_LAST_T_RESULT = {
    "effective_damage": 0.0,
    "tissue_damage": 0.0,
    "base_damage": 0.0,
}


class TICEEnvironment(Environment):
    SUPPORTS_CONCURRENT_SESSIONS: bool = True

    def __init__(
        self,
        archetype: str | None = None,
        difficulty: str | None = None,
        max_steps: int = 50,
    ):
        self.archetype = archetype or os.getenv("TICE_ARCHETYPE")
        self.difficulty = difficulty or os.getenv("TICE_DIFFICULTY")
        self.max_steps = int(max_steps)
        self.tumor = Tumor()
        self.b_cell = BCellAgent()
        self.t_cell = TCellAgent()
        self._state = State(episode_id=str(uuid4()), step_count=0)
        self._prev_tumor_size = INITIAL_TUMOR_SIZE
        self._last_t_result = dict(DEFAULT_LAST_T_RESULT)
        self._current_archetype = "immune_hot"
        self._current_difficulty = "medium"

    def reset(
        self,
        archetype: str | None = None,
        difficulty: str | None = None,
        **_: dict,
    ) -> TICEObservation:
        self._state = State(episode_id=str(uuid4()), step_count=0)
        selected_archetype = archetype or self.archetype
        selected_difficulty = difficulty or self.difficulty

        if selected_archetype and selected_difficulty:
            params = sample_tumor_params(selected_archetype, selected_difficulty)
        else:
            params = get_random_episode_params()

        self._current_archetype = params["archetype"]
        self._current_difficulty = params["difficulty"]
        self.tumor.reset(params)
        self.b_cell.reset()
        self.t_cell.reset()
        self._prev_tumor_size = INITIAL_TUMOR_SIZE
        self._last_t_result = dict(DEFAULT_LAST_T_RESULT)

        return self._make_observation(
            reward=0.0,
            feedback="Episode started.",
            done=False,
        )

    def step(self, action: TICEAction) -> TICEObservation:  # type: ignore[override]
        self._state.step_count += 1

        b_action = action.b_cell_action
        t_action = action.t_cell_action

        # The server is the source of truth; invalid client actions are coerced to safe defaults
        # so training/eval can continue without crashing on formatting errors.
        if b_action not in B_CELL_ACTIONS:
            b_action = "MAINTAIN"
        if t_action not in T_CELL_ACTIONS:
            t_action = "ATTACK_LOW"

        prev_size = float(self.tumor.tumor_size)
        t_cell_pressure = float(
            np.clip(
                (self.t_cell.fatigue * 0.3) + (self.b_cell.detection_level * 0.3),
                0.0,
                1.0,
            )
        )

        # Update order matters for partial observability: the tumor advances first, then the
        # immune subsystems act, so the agent is always reacting with a one-step lag.
        self.tumor.step(t_cell_pressure)

        t_result = self.t_cell.step(
            t_action,
            self.b_cell.detection_level,
            self.tumor.pdl1_suppression,
            self.tumor.resistance,
        )
        self.b_cell.step(b_action)
        self._last_t_result = t_result

        self.tumor.tumor_size = float(
            np.clip(self.tumor.tumor_size - t_result["effective_damage"], 0.0, 1.0)
        )
        self._prev_tumor_size = prev_size

        is_eradicated = self.tumor.is_eradicated()
        is_escaped = self.tumor.is_escaped()
        is_timeout = self.tumor.timestep >= self.max_steps
        done = bool(is_eradicated or is_escaped or is_timeout)

        reward = compute_reward(
            prev_size,
            self.tumor.tumor_size,
            t_result,
            self.b_cell.get_state(),
            self.t_cell.get_state(),
            is_eradicated,
            is_escaped,
        )

        if is_eradicated:
            feedback = "VICTORY: Tumor eradicated."
        elif is_escaped:
            feedback = "DEFEAT: Tumor escaped."
        elif is_timeout:
            feedback = "TIMEOUT: Episode limit reached."
        else:
            feedback = (
                f"Tumor: {self.tumor.tumor_size:.3f} | "
                f"Damage: {t_result['effective_damage']:.3f} | "
                f"Reward: {reward:+.3f}"
            )

        return self._make_observation(reward=reward, feedback=feedback, done=done)

    @property
    def state(self) -> State:
        return self._state

    def _make_observation(
        self,
        reward: float,
        feedback: str,
        done: bool,
    ) -> TICEObservation:
        delta = self.tumor.tumor_size - self._prev_tumor_size
        if delta > TREND_THRESHOLD:
            trend = "increasing"
        elif delta < -TREND_THRESHOLD:
            trend = "decreasing"
        else:
            trend = "stable"

        raw_detection = self.b_cell.detection_level * (1.0 - self.tumor.stealth_level)
        detection = float(
            np.clip(raw_detection + np.random.normal(0.0, DETECTION_NOISE_STD), 0.0, 1.0)
        )
        # Detection signal is intentionally noisy/ambiguous: a drop could mean weaker B cells
        # or increased tumor stealth. This forces inference instead of direct state access.

        effectiveness_score = (1.0 - self.tumor.pdl1_suppression) * (1.0 - self.t_cell.fatigue)
        if effectiveness_score > EFFECTIVENESS_HIGH_THRESHOLD:
            effectiveness = "high"
        elif effectiveness_score > EFFECTIVENESS_MEDIUM_THRESHOLD:
            effectiveness = "medium"
        else:
            effectiveness = "low"

        average_energy = (self.b_cell.energy + self.t_cell.energy) / 2.0
        if average_energy > RESOURCE_ABUNDANT_THRESHOLD:
            resource_level = "abundant"
        elif average_energy > RESOURCE_MODERATE_THRESHOLD:
            resource_level = "moderate"
        else:
            resource_level = "scarce"

        last_damage = self._last_t_result["effective_damage"]
        if last_damage > STRONG_RESPONSE_THRESHOLD:
            recent_outcome = "strong_response"
        elif last_damage > WEAK_RESPONSE_THRESHOLD:
            recent_outcome = "weak_response"
        else:
            recent_outcome = "no_effect"

        timestep = int(self.tumor.timestep)
        if timestep < EARLY_PHASE_END:
            episode_phase = "early"
        elif timestep < MID_PHASE_END:
            episode_phase = "mid"
        else:
            episode_phase = "late"

        return TICEObservation(
            tumor_trend=trend,
            detection_signal=round(detection, 2),
            t_cell_effectiveness=effectiveness,
            resource_level=resource_level,
            b_cell_fatigue=round(self.b_cell.fatigue, 2),
            t_cell_fatigue=round(self.t_cell.fatigue, 2),
            recent_outcome=recent_outcome,
            timestep=timestep,
            episode_phase=episode_phase,
            archetype=self._current_archetype,
            difficulty=self._current_difficulty,
            feedback=feedback,
            done=done,
            reward=reward,
        )


TiceEnvironment = TICEEnvironment