Spaces:

omm7
/

toxic-royale-env

Sleeping

App Files Files Community

toxic-royale-env / models.py

omm7

Upload folder using huggingface_hub

05a09dc verified about 1 month ago

raw

history blame contribute delete

2.46 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the BSD-style license found in the
	# LICENSE file in the root directory of this source tree.

	"""
	Data models for the Toxic Royale Env Environment.

	The toxic_royale_env environment is a text-based Clash Royale-inspired simulator
	where an agent learns to deploy cards and optionally use emotes (BM) that affect
	an opponent "tilt" state.
	"""

	from __future__ import annotations

	from typing import Any, Literal

	from openenv.core.env_server.types import Action, Observation
	from pydantic import Field


	class ToxicRoyaleAction(Action):
	"""
	One simulator action.

	Notes:
	- This is a single-step action schema for the OpenEnv server.
	- For LLM training with TRL/OpenEnv tool-calling, we will typically wrap this
	environment with tool methods (e.g. `play(...)`, `wait(...)`) in a trainer-side
	environment_factory wrapper that calls into the OpenEnv client.
	"""

	kind: Literal["play", "wait"] = Field(
	...,
	description="Action type. 'play' deploys a card; 'wait' takes no deployment action.",
	)

	# --- play fields ---
	card: str \| None = Field(
	default=None,
	description="Card name when kind='play'. Must be in your current hand.",
	)
	zone: str \| None = Field(
	default=None,
	description="Placement zone when kind='play'. Example: 'bridge_left', 'back_right'.",
	)

	# --- optional emote channel (BM) ---
	emote: str \| None = Field(
	default=None,
	description="Optional emote. Example: 'laugh', 'yawn', 'cry', 'thanks', 'chicken', 'wp'.",
	)

	# Optional free-form reasoning (kept for demo/story; not used for reward directly).
	reasoning: str \| None = Field(
	default=None,
	description="Optional reasoning text (may include <think>...</think>).",
	)


	class ToxicRoyaleObservation(Observation):
	"""Observation returned after each step/reset."""

	state_text: str = Field(
	default="",
	description="Human/LLM-readable state snapshot (the main observation).",
	)
	state: dict[str, Any] = Field(
	default_factory=dict,
	description="Structured state payload for programmatic inspection.",
	)
	reward_breakdown: dict[str, float] = Field(
	default_factory=dict,
	description="Named reward components (for debugging/training plots).",
	)