# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. """ Data models for the Unity ML-Agents Environment. The Unity environment wraps Unity ML-Agents environments (PushBlock, 3DBall, GridWorld, etc.) providing a unified interface for reinforcement learning. """ from typing import Any, Dict, List, Optional from pydantic import Field # Support both in-repo and standalone imports try: # In-repo imports (when running from OpenEnv repository) from openenv.core.env_server.types import Action, Observation, State except ImportError: # Standalone imports (when environment is standalone with openenv from pip) from openenv.core.env_server.types import Action, Observation, State class UnityAction(Action): """ Action for Unity ML-Agents environments. Supports both discrete and continuous action spaces. Unity environments may use either or both types of actions: - Discrete actions: Integer indices for categorical choices (e.g., movement direction: 0=forward, 1=backward, 2=left, 3=right) - Continuous actions: Float values typically in [-1, 1] range (e.g., joint rotations, force magnitudes) Example (PushBlock - discrete): >>> action = UnityAction(discrete_actions=[3]) # Rotate left Example (Walker - continuous): >>> action = UnityAction(continuous_actions=[0.5, -0.3, 0.0, ...]) Attributes: discrete_actions: List of discrete action indices for each action branch. For PushBlock: [0-6] where 0=noop, 1=forward, 2=backward, 3=rotate_left, 4=rotate_right, 5=strafe_left, 6=strafe_right continuous_actions: List of continuous action values, typically in [-1, 1]. metadata: Additional action parameters. """ discrete_actions: Optional[List[int]] = Field( default=None, description="Discrete action indices for each action branch", ) continuous_actions: Optional[List[float]] = Field( default=None, description="Continuous action values, typically in [-1, 1] range", ) class UnityObservation(Observation): """ Observation from Unity ML-Agents environments. Contains vector observations (sensor readings) and optionally visual observations (rendered images). Most Unity environments provide vector observations; visual observations are optional and must be requested. Attributes: vector_observations: Flattened array of all vector observations. Size and meaning depends on the specific environment. For PushBlock: 70 values from 14 ray-casts detecting walls/goals/blocks. visual_observations: Optional list of base64-encoded images (PNG format). Only included when include_visual=True in reset/step. behavior_name: Name of the Unity behavior (agent type). action_spec_info: Information about the action space for this environment. observation_spec_info: Information about the observation space. """ vector_observations: List[float] = Field( default_factory=list, description="Flattened vector observations from the environment", ) visual_observations: Optional[List[str]] = Field( default=None, description="Base64-encoded PNG images (when include_visual=True)", ) behavior_name: str = Field( default="", description="Name of the Unity behavior/agent type", ) action_spec_info: Dict[str, Any] = Field( default_factory=dict, description="Information about the action space", ) observation_spec_info: Dict[str, Any] = Field( default_factory=dict, description="Information about the observation space", ) class UnityState(State): """ Extended state for Unity ML-Agents environments. Provides additional metadata about the currently loaded environment, including action and observation space specifications. Attributes: episode_id: Unique identifier for the current episode. step_count: Number of steps taken in the current episode. env_id: Identifier of the currently loaded Unity environment. behavior_name: Name of the Unity behavior (agent type). action_spec: Detailed specification of the action space. observation_spec: Detailed specification of the observation space. available_envs: List of available environment identifiers. """ env_id: str = Field( default="PushBlock", description="Identifier of the loaded Unity environment", ) behavior_name: str = Field( default="", description="Name of the Unity behavior/agent type", ) action_spec: Dict[str, Any] = Field( default_factory=dict, description="Specification of the action space", ) observation_spec: Dict[str, Any] = Field( default_factory=dict, description="Specification of the observation space", ) available_envs: List[str] = Field( default_factory=list, description="List of available Unity environments", ) # Available Unity environments from the ML-Agents registry # These are pre-built environments that can be downloaded automatically AVAILABLE_UNITY_ENVIRONMENTS = [ "PushBlock", "3DBall", "3DBallHard", "GridWorld", "Basic", "VisualPushBlock", # Note: More environments may be available in newer versions of ML-Agents ] # Action descriptions for PushBlock (most commonly used example) PUSHBLOCK_ACTIONS = { 0: "noop", 1: "forward", 2: "backward", 3: "rotate_left", 4: "rotate_right", 5: "strafe_left", 6: "strafe_right", }