toxic-royale-env / models.py
omm7's picture
Upload folder using huggingface_hub
05a09dc verified
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""
Data models for the Toxic Royale Env Environment.
The toxic_royale_env environment is a text-based Clash Royale-inspired simulator
where an agent learns to deploy cards and optionally use emotes (BM) that affect
an opponent "tilt" state.
"""
from __future__ import annotations
from typing import Any, Literal
from openenv.core.env_server.types import Action, Observation
from pydantic import Field
class ToxicRoyaleAction(Action):
"""
One simulator action.
Notes:
- This is a single-step action schema for the OpenEnv server.
- For LLM training with TRL/OpenEnv tool-calling, we will typically wrap this
environment with tool methods (e.g. `play(...)`, `wait(...)`) in a trainer-side
environment_factory wrapper that calls into the OpenEnv client.
"""
kind: Literal["play", "wait"] = Field(
...,
description="Action type. 'play' deploys a card; 'wait' takes no deployment action.",
)
# --- play fields ---
card: str | None = Field(
default=None,
description="Card name when kind='play'. Must be in your current hand.",
)
zone: str | None = Field(
default=None,
description="Placement zone when kind='play'. Example: 'bridge_left', 'back_right'.",
)
# --- optional emote channel (BM) ---
emote: str | None = Field(
default=None,
description="Optional emote. Example: 'laugh', 'yawn', 'cry', 'thanks', 'chicken', 'wp'.",
)
# Optional free-form reasoning (kept for demo/story; not used for reward directly).
reasoning: str | None = Field(
default=None,
description="Optional reasoning text (may include <think>...</think>).",
)
class ToxicRoyaleObservation(Observation):
"""Observation returned after each step/reset."""
state_text: str = Field(
default="",
description="Human/LLM-readable state snapshot (the main observation).",
)
state: dict[str, Any] = Field(
default_factory=dict,
description="Structured state payload for programmatic inspection.",
)
reward_breakdown: dict[str, float] = Field(
default_factory=dict,
description="Named reward components (for debugging/training plots).",
)