# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. """ Data models for the Toxic Royale Env Environment. The toxic_royale_env environment is a text-based Clash Royale-inspired simulator where an agent learns to deploy cards and optionally use emotes (BM) that affect an opponent "tilt" state. """ from __future__ import annotations from typing import Any, Literal from openenv.core.env_server.types import Action, Observation from pydantic import Field class ToxicRoyaleAction(Action): """ One simulator action. Notes: - This is a single-step action schema for the OpenEnv server. - For LLM training with TRL/OpenEnv tool-calling, we will typically wrap this environment with tool methods (e.g. `play(...)`, `wait(...)`) in a trainer-side environment_factory wrapper that calls into the OpenEnv client. """ kind: Literal["play", "wait"] = Field( ..., description="Action type. 'play' deploys a card; 'wait' takes no deployment action.", ) # --- play fields --- card: str | None = Field( default=None, description="Card name when kind='play'. Must be in your current hand.", ) zone: str | None = Field( default=None, description="Placement zone when kind='play'. Example: 'bridge_left', 'back_right'.", ) # --- optional emote channel (BM) --- emote: str | None = Field( default=None, description="Optional emote. Example: 'laugh', 'yawn', 'cry', 'thanks', 'chicken', 'wp'.", ) # Optional free-form reasoning (kept for demo/story; not used for reward directly). reasoning: str | None = Field( default=None, description="Optional reasoning text (may include ...).", ) class ToxicRoyaleObservation(Observation): """Observation returned after each step/reset.""" state_text: str = Field( default="", description="Human/LLM-readable state snapshot (the main observation).", ) state: dict[str, Any] = Field( default_factory=dict, description="Structured state payload for programmatic inspection.", ) reward_breakdown: dict[str, float] = Field( default_factory=dict, description="Named reward components (for debugging/training plots).", )