"""Pydantic models for the ODSE Sandbox Environment.

Defines the two actions (RunCode, Submit), observations, step results,
and supporting types for a code-execution sandbox where agents write
and execute Python code to solve data-science tasks.

Architecture
------------
Instead of a fixed DSL with enumerated action types, the sandbox
exposes only two actions:

* ``RunCodeAction`` : execute arbitrary Python in a persistent namespace.
* ``SubmitAction``  : submit predictions and terminate the episode.

The observation gives the agent execution feedback (stdout/stderr),
workspace state (variables, shapes), scoring context, and dataset
metadata so it can plan its next code cell.
"""

from __future__ import annotations

from enum import Enum
from typing import Any, Dict, List, Literal, Optional, Tuple, Union

from pydantic import BaseModel, Field
from typing_extensions import Annotated


# ============================================================================
# Enums
# ============================================================================

class ProblemType(str, Enum):
    """Type of ML problem the agent must solve."""

    CLASSIFICATION = "classification"
    REGRESSION = "regression"


class Difficulty(str, Enum):
    """Difficulty Level - controls dataset noise, nulls, and step budget."""

    EASY = "easy"
    MEDIUM = "medium"
    HARD = "hard"


class ExecutionStatus(str, Enum):
    """Outcome of a single code execution."""

    SUCCESS = "success"
    ERROR = "error"
    TIMEOUT = "timeout"


# ============================================================================
# Actions
# ============================================================================

class RunCodeAction(BaseModel):
    """Execute Python code in the sandbox.

    The code runs in a persistent namespace pre-loaded with:

    * ``train_df``      : Training DataFrame (features **+** target)
    * ``val_features``  : Validation features (target hidden)
    * ``test_features`` : Test features (target hidden)
    * ``target_column`` : Name of the target column (str)
    * ``pd``, ``np``    : pandas and numpy
    * ``evaluate(preds)`` : Score predictions against hidden **validation** labels

    Variables persist across ``RunCode`` calls (notebook-style kernel).
    Assign your **test-set** predictions to the variable ``predictions``
    before calling ``SubmitAction``.
    """

    action_type: Literal["run_code"] = Field(default="run_code", frozen=True)
    code: str = Field(description="Python code to execute in the sandbox")


class SubmitAction(BaseModel):
    """Submit predictions and terminate the episode.

    Reads the ``predictions`` variable from the sandbox namespace and
    scores it against the **hidden test labels**. The variable must be
    an array-like whose length matches ``test_features``.
    """

    action_type: Literal["submit"] = Field(default="submit", frozen=True)


Action = Annotated[
    Union[RunCodeAction, SubmitAction],
    Field(discriminator="action_type"),
]


# ============================================================================
# Dataset / Column metadata
# ============================================================================

class ColumnSchema(BaseModel):
    """Schema information for a single column in the dataset."""

    name: str
    dtype: str
    null_count: int = Field(ge=0)
    is_numeric: bool
    unique_count: int = Field(ge=0)
    sample_values: List[Any] = Field(default_factory=list, max_length=5)


class DatasetInfo(BaseModel):
    """Metadata about the dataset, provided to the agent on every observation."""

    train_shape: Tuple[int, int]
    val_shape: Tuple[int, int]
    test_shape: Tuple[int, int]
    target_column: str
    problem_description: str = ""
    problem_type: str  # "classification" or "regression"
    metric: str  # primary metric name (e.g. "accuracy", "r2")
    columns: List[ColumnSchema]
    target_classes: Optional[List[Any]] = None  # classification only
    target_stats: Optional[Dict[str, float]] = None  # regression only


# ============================================================================
# Namespace summary
# ============================================================================

class VariableInfo(BaseModel):
    """Summary of one variable in the agent's sandbox namespace."""

    name: str
    type_name: str
    shape: Optional[Tuple[int, ...]] = None
    preview: str = Field(default="", max_length=500)


# ============================================================================
# Observation
# ============================================================================

class Observation(BaseModel):
    """Observation returned after every ``reset()`` or ``step()``."""

    # - Execution result (empty on reset) ------------------------------------
    stdout: str = Field(
        default="",
        description="Captured stdout from last code execution",
    )
    stderr: str = Field(
        default="",
        description="Captured stderr / traceback from last execution",
    )
    execution_status: Optional[ExecutionStatus] = Field(
        default=None,
        description="Status of the last code execution (None on reset)",
    )
    execution_time_ms: float = Field(
        default=0.0,
        ge=0.0,
        description="Wall-clock time of last execution in milliseconds",
    )

    # - Workspace state ------------------------------------------------------
    namespace_summary: List[VariableInfo] = Field(
        default_factory=list,
        description="User-visible variables in the sandbox namespace",
    )

    # - Scoring --------------------------------------------------------------
    validation_score: Optional[float] = Field(
        default=None,
        description="Latest validation score (from evaluate() or auto-detected)",
    )
    best_validation_score: Optional[float] = Field(
        default=None,
        description="Best validation score achieved this episode",
    )

    test_score: Optional[float] = Field(
        default=None,
        description="Test-set score (populated only after submit)",
    )
    test_report: Optional[Dict[str, Any]] = Field(
        default=None,
        description="Full test evaluation report (populated only after submit)",
    )


    # - Episode context ------------------------------------------------------
    step_count: int = Field(ge=0, description="Steps taken so far")
    max_steps: int = Field(ge=1, description="Step budget for this episode")
    dataset_info: DatasetInfo
    task_description: str = Field(
        description="Human-readable description of the agent's objective",
    )
    done: bool = Field(default=False, description="Whether the episode has ended")


# ============================================================================
# Step result
# ============================================================================

class StepResult(BaseModel):
    """Result of a single ``env.step()`` call."""

    observation: Observation
    reward: float = Field(description="Scalar reward for this step")
    done: bool = Field(description="Whether the episode has terminated")
    info: Dict[str, Any] = Field(
        default_factory=dict,
        description="Additional diagnostics (scores, timing, breakdown, ...)",
    )