# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""
Data models for the Toxic Royale Env Environment.

The toxic_royale_env environment is a text-based Clash Royale-inspired simulator
where an agent learns to deploy cards and optionally use emotes (BM) that affect
an opponent "tilt" state.
"""

from __future__ import annotations

from typing import Any, Literal

from openenv.core.env_server.types import Action, Observation
from pydantic import Field


class ToxicRoyaleAction(Action):
    """
    One simulator action.

    Notes:
    - This is a single-step action schema for the OpenEnv server.
    - For LLM training with TRL/OpenEnv tool-calling, we will typically wrap this
      environment with tool methods (e.g. `play(...)`, `wait(...)`) in a trainer-side
      environment_factory wrapper that calls into the OpenEnv client.
    """

    kind: Literal["play", "wait"] = Field(
        ...,
        description="Action type. 'play' deploys a card; 'wait' takes no deployment action.",
    )

    # --- play fields ---
    card: str | None = Field(
        default=None,
        description="Card name when kind='play'. Must be in your current hand.",
    )
    zone: str | None = Field(
        default=None,
        description="Placement zone when kind='play'. Example: 'bridge_left', 'back_right'.",
    )

    # --- optional emote channel (BM) ---
    emote: str | None = Field(
        default=None,
        description="Optional emote. Example: 'laugh', 'yawn', 'cry', 'thanks', 'chicken', 'wp'.",
    )

    # Optional free-form reasoning (kept for demo/story; not used for reward directly).
    reasoning: str | None = Field(
        default=None,
        description="Optional reasoning text (may include <think>...</think>).",
    )


class ToxicRoyaleObservation(Observation):
    """Observation returned after each step/reset."""

    state_text: str = Field(
        default="",
        description="Human/LLM-readable state snapshot (the main observation).",
    )
    state: dict[str, Any] = Field(
        default_factory=dict,
        description="Structured state payload for programmatic inspection.",
    )
    reward_breakdown: dict[str, float] = Field(
        default_factory=dict,
        description="Named reward components (for debugging/training plots).",
    )