Spaces:
Sleeping
Sleeping
| # Copyright (c) Meta Platforms, Inc. and affiliates. | |
| # All rights reserved. | |
| # | |
| # This source code is licensed under the BSD-style license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| """ | |
| Data models for the Toxic Royale Env Environment. | |
| The toxic_royale_env environment is a text-based Clash Royale-inspired simulator | |
| where an agent learns to deploy cards and optionally use emotes (BM) that affect | |
| an opponent "tilt" state. | |
| """ | |
| from __future__ import annotations | |
| from typing import Any, Literal | |
| from openenv.core.env_server.types import Action, Observation | |
| from pydantic import Field | |
| class ToxicRoyaleAction(Action): | |
| """ | |
| One simulator action. | |
| Notes: | |
| - This is a single-step action schema for the OpenEnv server. | |
| - For LLM training with TRL/OpenEnv tool-calling, we will typically wrap this | |
| environment with tool methods (e.g. `play(...)`, `wait(...)`) in a trainer-side | |
| environment_factory wrapper that calls into the OpenEnv client. | |
| """ | |
| kind: Literal["play", "wait"] = Field( | |
| ..., | |
| description="Action type. 'play' deploys a card; 'wait' takes no deployment action.", | |
| ) | |
| # --- play fields --- | |
| card: str | None = Field( | |
| default=None, | |
| description="Card name when kind='play'. Must be in your current hand.", | |
| ) | |
| zone: str | None = Field( | |
| default=None, | |
| description="Placement zone when kind='play'. Example: 'bridge_left', 'back_right'.", | |
| ) | |
| # --- optional emote channel (BM) --- | |
| emote: str | None = Field( | |
| default=None, | |
| description="Optional emote. Example: 'laugh', 'yawn', 'cry', 'thanks', 'chicken', 'wp'.", | |
| ) | |
| # Optional free-form reasoning (kept for demo/story; not used for reward directly). | |
| reasoning: str | None = Field( | |
| default=None, | |
| description="Optional reasoning text (may include <think>...</think>).", | |
| ) | |
| class ToxicRoyaleObservation(Observation): | |
| """Observation returned after each step/reset.""" | |
| state_text: str = Field( | |
| default="", | |
| description="Human/LLM-readable state snapshot (the main observation).", | |
| ) | |
| state: dict[str, Any] = Field( | |
| default_factory=dict, | |
| description="Structured state payload for programmatic inspection.", | |
| ) | |
| reward_breakdown: dict[str, float] = Field( | |
| default_factory=dict, | |
| description="Named reward components (for debugging/training plots).", | |
| ) | |