"""ADHD Task Initiation Coaching Evaluation Environment.

Evaluates ADHD coaching responses by scoring tool calling and response quality.
V2: Multiple scenarios, state tracking, rubric-based scoring.
"""

import random
from typing import Optional
from uuid import uuid4

from openenv.core.env_server.interfaces import Environment
from openenv.core.env_server.types import State

from models import ADHDAction, ADHDObservation
from reward import score_rubric


# ADHD task initiation scenarios
ADHD_SCENARIOS = [
    "I can't start writing the email to my manager",
    "I've been staring at this blank document for 30 minutes",
    "I need to make a phone call but I keep putting it off",
    "I'm stuck on starting this presentation",
    "I've been avoiding this report all day",
    "I don't know how to begin this project proposal",
    "I keep switching tabs instead of starting my work",
    "I'm overwhelmed by this task list",
    "I can't focus on writing this code review",
    "I've been procrastinating on this assignment for hours",
]

# Non-ADHD scenarios: (prompt, expected_tool or None)
NON_ADHD_SCENARIOS = [
    ("What's the weather like today?", "web_search_tool"),
    ("What is the latest revenue for IBM?", "web_search_tool"),
    ("What is the capital of France?", "web_search_tool"),
    ("Write me a poem about cats", None),
    ("Translate this sentence to Spanish", None),
]


def generate_user_state() -> dict:
    """Generate randomized user state (the 'knobs')."""
    hour = random.randint(6, 22)
    minute = random.randint(0, 59)
    return {
        "time_of_day": f"{hour:02d}:{minute:02d}",
        "position_in_chair": random.choice(["normal", "slouching", "standing"]),
        "minutes_since_last_stood": random.randint(0, 240),
    }


class ADHDEnvironment(Environment):
    """ADHD Task Initiation Coaching Evaluation Environment.

    Evaluates coaching responses for ADHD task initiation paralysis.
    Innovation: state tracking + tool calling evaluation.

    V2: Multiple scenarios, state tracking, rubric-based scoring.
    - 10 ADHD scenarios + 5 non-ADHD scenarios
    - 3 state variables (time_of_day, position_in_chair, minutes_since_last_stood)
    - Rubric with tool calling + state awareness scoring

    Single-turn: reset() -> step() -> done=True
    """

    SUPPORTS_CONCURRENT_SESSIONS: bool = True

    def __init__(self):
        self._state = State(episode_id=str(uuid4()), step_count=0)
        self.current_scenario: str = ""
        self.current_user_state: dict = {}
        self.is_adhd_scenario: bool = True
        self.expected_tool: Optional[str] = None

    def reset(self) -> ADHDObservation:
        """Generate new episode with randomized scenario and user state."""
        self._state = State(episode_id=str(uuid4()), step_count=0)
        self.current_user_state = generate_user_state()

        # Pick ADHD 80% / non-ADHD 20%
        if random.random() < 0.7:
            self.current_scenario = random.choice(ADHD_SCENARIOS)
            self.is_adhd_scenario = True
            self.expected_tool = "adhd_coach_tool"
        else:
            scenario_tuple = random.choice(NON_ADHD_SCENARIOS)
            self.current_scenario = scenario_tuple[0]
            self.is_adhd_scenario = False
            self.expected_tool = scenario_tuple[1]

        return ADHDObservation(
            scenario=self.current_scenario,
            state=self.current_user_state,
            done=False,
            reward=0.0,
            scoring={
                "version": "v2.1",
                "available_tools": [
                    "adhd_coach_tool",
                    "web_search_tool",
                ],
            },
        )

    def step(self, action: ADHDAction) -> ADHDObservation:  # type: ignore[override]
        """Score a coaching response.

        Single-turn: returns done=True after scoring.
        """
        self._state.step_count += 1

        scoring = score_rubric(
            action,
            self.current_scenario,
            self.current_user_state,
            self.is_adhd_scenario,
            self.expected_tool,
        )
        scoring["action"] = {
            "tool_calls": action.tool_calls,
            "message": action.message,
        }

        return ADHDObservation(
            scenario=self.current_scenario,
            state=self.current_user_state,
            done=True,
            reward=scoring["total_score"],
            scoring=scoring,
        )

    @property
    def state(self) -> State:
        return self._state