#!/usr/bin/env python3 # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. """ Unity ML-Agents Environment Example Usage This script demonstrates how to use the Unity ML-Agents environment through the OpenEnv interface, with support for direct mode, server mode, and Docker-based deployment. ============================================================================= USAGE EXAMPLES (run from the OpenEnv repository root) ============================================================================= 1. DIRECT MODE (Recommended for quick testing - no server required) ---------------------------------------------------------------- Runs the Unity environment directly in-process. This is the simplest way to get started. # Run with graphics (default: 1280x720 window) python examples/unity_simple.py --direct # Run with custom window size python examples/unity_simple.py --direct --width 1920 --height 1080 # Run headless (no graphics, faster for training) python examples/unity_simple.py --direct --no-graphics --time-scale 20 # Run 3DBall environment for 5 episodes python examples/unity_simple.py --direct --env 3DBall --episodes 5 # Run alternating between PushBlock and 3DBall python examples/unity_simple.py --direct --env both --episodes 6 2. SERVER MODE (For client-server architecture) --------------------------------------------- First, start the server in one terminal, then connect with this script. Step 1: Start the server (in Terminal 1): cd envs/unity_env uvicorn server.app:app --host 0.0.0.0 --port 8000 Or with environment variables for custom settings: UNITY_WIDTH=1920 UNITY_HEIGHT=1080 uvicorn server.app:app --port 8000 UNITY_NO_GRAPHICS=1 UNITY_TIME_SCALE=20 uvicorn server.app:app --port 8000 Step 2: Run this script (in Terminal 2, from repo root): python examples/unity_simple.py --url http://localhost:8000 python examples/unity_simple.py --url http://localhost:8000 --env 3DBall --episodes 5 3. DOCKER MODE (For containerized deployment) ------------------------------------------- Automatically starts a Docker container and connects to it. First, build the Docker image: cd envs/unity_env docker build -f server/Dockerfile -t unity-env:latest . Then run (from repo root): python examples/unity_simple.py --docker python examples/unity_simple.py --docker --width 1280 --height 720 python examples/unity_simple.py --docker --no-graphics --time-scale 20 python examples/unity_simple.py --docker --env 3DBall --episodes 10 ============================================================================= The first run will download Unity environment binaries (~500MB). Subsequent runs use cached binaries from ~/.mlagents-cache/ """ import argparse import random import sys import time from pathlib import Path from typing import Optional # Add paths for imports sys.path.insert(0, str(Path(__file__).parent.parent / "src")) sys.path.insert(0, str(Path(__file__).parent.parent)) from envs.unity_env.client import UnityEnv from envs.unity_env.models import UnityAction def run_pushblock_episode( client: UnityEnv, max_steps: int = 1000, verbose: bool = True, ) -> dict: """ Run a single episode of PushBlock with random actions. Args: client: Connected UnityEnv client max_steps: Maximum steps per episode verbose: Print progress information Returns: Dictionary with episode statistics """ # Reset to PushBlock environment result = client.reset(env_id="PushBlock") if verbose: print(f"Environment: PushBlock") print(f"Behavior: {result.observation.behavior_name}") print(f"Vector obs dims: {len(result.observation.vector_observations)}") action_spec = result.observation.action_spec_info print(f"Action spec: {action_spec}") print() episode_reward = 0.0 step_count = 0 while not result.done and step_count < max_steps: # PushBlock has 7 discrete actions: # 0=noop, 1=forward, 2=backward, 3=rotate_left, # 4=rotate_right, 5=strafe_left, 6=strafe_right action_idx = random.randint(0, 6) action = UnityAction(discrete_actions=[action_idx]) result = client.step(action) episode_reward += result.reward or 0.0 step_count += 1 if verbose and step_count % 100 == 0: print(f" Step {step_count}: cumulative reward = {episode_reward:.2f}") return { "steps": step_count, "reward": episode_reward, "done": result.done, } def run_3dball_episode( client: UnityEnv, max_steps: int = 500, verbose: bool = True, ) -> dict: """ Run a single episode of 3DBall with random actions. Args: client: Connected UnityEnv client max_steps: Maximum steps per episode verbose: Print progress information Returns: Dictionary with episode statistics """ # Reset to 3DBall environment result = client.reset(env_id="3DBall") if verbose: print(f"Environment: 3DBall") print(f"Behavior: {result.observation.behavior_name}") print(f"Vector obs dims: {len(result.observation.vector_observations)}") action_spec = result.observation.action_spec_info print(f"Action spec: {action_spec}") print() episode_reward = 0.0 step_count = 0 while not result.done and step_count < max_steps: # 3DBall has 2 continuous actions for X and Z rotation action = UnityAction( continuous_actions=[ random.uniform(-1, 1), # X rotation random.uniform(-1, 1), # Z rotation ] ) result = client.step(action) episode_reward += result.reward or 0.0 step_count += 1 if verbose and step_count % 100 == 0: print(f" Step {step_count}: cumulative reward = {episode_reward:.2f}") return { "steps": step_count, "reward": episode_reward, "done": result.done, } def run_episodes( client: UnityEnv, env_name: str, episodes: int, max_steps: int, verbose: bool, ) -> list: """Run multiple episodes and collect results.""" all_results = [] for episode in range(episodes): print(f"\n--- Episode {episode + 1}/{episodes} ---") if env_name == "PushBlock": result = run_pushblock_episode( client, max_steps=max_steps, verbose=verbose, ) elif env_name == "3DBall": result = run_3dball_episode( client, max_steps=max_steps, verbose=verbose, ) else: # both if episode % 2 == 0: result = run_pushblock_episode( client, max_steps=max_steps, verbose=verbose, ) else: result = run_3dball_episode( client, max_steps=max_steps, verbose=verbose, ) all_results.append(result) print( f"Episode {episode + 1}: {result['steps']} steps, " f"reward: {result['reward']:.2f}" ) return all_results def print_summary(all_results: list) -> None: """Print summary statistics.""" print("\n" + "=" * 60) print("Summary") print("=" * 60) total_steps = sum(r["steps"] for r in all_results) avg_reward = sum(r["reward"] for r in all_results) / len(all_results) max_reward = max(r["reward"] for r in all_results) min_reward = min(r["reward"] for r in all_results) print(f"Total episodes: {len(all_results)}") print(f"Total steps: {total_steps}") print(f"Average reward: {avg_reward:.2f}") print(f"Max reward: {max_reward:.2f}") print(f"Min reward: {min_reward:.2f}") print("=" * 60) def run_with_server(args) -> None: """Run using a connection to an existing server.""" print("=" * 60) print("Unity ML-Agents Environment - Server Mode") print("=" * 60) print(f"\nConnecting to: {args.url}") print(f"Environment: {args.env}") print(f"Episodes: {args.episodes}") print(f"Max steps: {args.max_steps}") print() # Connect to the environment server with UnityEnv(base_url=args.url) as client: all_results = run_episodes( client, env_name=args.env, episodes=args.episodes, max_steps=args.max_steps, verbose=not args.quiet, ) print_summary(all_results) def run_with_docker(args) -> None: """Run using Docker (automatically starts container).""" print("=" * 60) print("Unity ML-Agents Environment - Docker Mode") print("=" * 60) print(f"\nDocker image: {args.docker_image}") print(f"Environment: {args.env}") print(f"Episodes: {args.episodes}") print(f"Max steps: {args.max_steps}") print(f"Window size: {args.width}x{args.height}") print(f"Graphics: {'Disabled (headless)' if args.no_graphics else 'Enabled'}") print() # Build environment variables for Docker env_vars = { "UNITY_NO_GRAPHICS": "1" if args.no_graphics else "0", "UNITY_WIDTH": str(args.width), "UNITY_HEIGHT": str(args.height), "UNITY_TIME_SCALE": str(args.time_scale), "UNITY_QUALITY_LEVEL": str(args.quality_level), } print("Starting Docker container...") print(f" Environment variables: {env_vars}") print() try: # Use from_docker_image to automatically start and connect client = UnityEnv.from_docker_image( args.docker_image, environment=env_vars, ) try: all_results = run_episodes( client, env_name=args.env, episodes=args.episodes, max_steps=args.max_steps, verbose=not args.quiet, ) print_summary(all_results) finally: print("\nClosing Docker container...") client.close() except Exception as e: print(f"\nError running with Docker: {e}") print("\nTroubleshooting:") print(" 1. Ensure Docker is running") print(" 2. Build the image first:") print(f" docker build -f server/Dockerfile -t {args.docker_image} .") print(" 3. Or use server mode instead:") print(" python examples/unity_simple.py --url http://localhost:8000") sys.exit(1) def run_direct(args) -> None: """ Run Unity environment in direct mode (local server started automatically). This mode starts an embedded local server and connects to it, providing the convenience of direct execution while maintaining client-server separation. Useful for quick testing and debugging. For production, use server or Docker mode. """ print("=" * 60) print("Unity ML-Agents Environment - Direct Mode") print("=" * 60) print(f"\nEnvironment: {args.env}") print(f"Episodes: {args.episodes}") print(f"Max steps: {args.max_steps}") print(f"Window size: {args.width}x{args.height}") print(f"Graphics: {'Disabled (headless)' if args.no_graphics else 'Enabled'}") print(f"Time scale: {args.time_scale}x") print() print("Starting local Unity server...") print("(First run will download binaries - this may take a few minutes)") print() # Use from_direct() to start an embedded server and get a client client = UnityEnv.from_direct( env_id=args.env if args.env != "both" else "PushBlock", no_graphics=args.no_graphics, time_scale=args.time_scale, width=args.width, height=args.height, quality_level=args.quality_level, ) try: all_results = [] for episode in range(args.episodes): print(f"\n--- Episode {episode + 1}/{args.episodes} ---") # Determine which environment to use if args.env == "both": current_env = "PushBlock" if episode % 2 == 0 else "3DBall" else: current_env = args.env # Reset environment result = client.reset(env_id=current_env) if not args.quiet: print(f"Environment: {current_env}") print(f"Behavior: {result.observation.behavior_name}") print(f"Vector obs dims: {len(result.observation.vector_observations)}") print(f"Action spec: {result.observation.action_spec_info}") print() episode_reward = 0.0 step_count = 0 while not result.done and step_count < args.max_steps: # Generate action based on environment type if current_env == "3DBall": action = UnityAction( continuous_actions=[ random.uniform(-1, 1), random.uniform(-1, 1), ] ) else: action = UnityAction(discrete_actions=[random.randint(0, 6)]) result = client.step(action) episode_reward += result.reward or 0.0 step_count += 1 if not args.quiet and step_count % 100 == 0: print( f" Step {step_count}: cumulative reward = {episode_reward:.2f}" ) episode_result = { "steps": step_count, "reward": episode_reward, "done": result.done, } all_results.append(episode_result) print( f"Episode {episode + 1}: {episode_result['steps']} steps, " f"reward: {episode_result['reward']:.2f}" ) print_summary(all_results) finally: print("\nClosing Unity environment...") client.close() def main(): parser = argparse.ArgumentParser( description="Run Unity ML-Agents environment examples", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Connect to running server (default) %(prog)s --url http://localhost:8000 # Run via Docker %(prog)s --docker # Run directly without server (for testing) %(prog)s --direct # With graphics window (800x600 default) %(prog)s --direct --width 1280 --height 720 # Headless mode (faster training) %(prog)s --direct --no-graphics --time-scale 20 # Run 3DBall environment %(prog)s --direct --env 3DBall --episodes 5 """, ) # Mode selection mode_group = parser.add_mutually_exclusive_group() mode_group.add_argument( "--docker", action="store_true", help="Run via Docker (automatically starts container)", ) mode_group.add_argument( "--direct", action="store_true", help="Run Unity environment directly without server", ) # Connection settings parser.add_argument( "--url", default="http://localhost:8000", help="Base URL of the Unity environment server (default: http://localhost:8000)", ) parser.add_argument( "--docker-image", default="unity-env:latest", help="Docker image to use (default: unity-env:latest)", ) # Environment settings parser.add_argument( "--env", choices=["PushBlock", "3DBall", "both"], default="PushBlock", help="Which environment to run (default: PushBlock)", ) parser.add_argument( "--episodes", type=int, default=3, help="Number of episodes to run (default: 3)", ) parser.add_argument( "--max-steps", type=int, default=500, help="Maximum steps per episode (default: 500)", ) # Graphics settings parser.add_argument( "--width", type=int, default=1280, help="Window width in pixels (default: 800)", ) parser.add_argument( "--height", type=int, default=720, help="Window height in pixels (default: 600)", ) parser.add_argument( "--no-graphics", action="store_true", help="Run in headless mode without graphics (faster training)", ) parser.add_argument( "--time-scale", type=float, default=1.0, help="Simulation speed multiplier (default: 1.0, use 20.0 for fast training)", ) parser.add_argument( "--quality-level", type=int, default=5, choices=[0, 1, 2, 3, 4, 5], help="Graphics quality level 0-5 (default: 5)", ) # Output settings parser.add_argument( "--quiet", action="store_true", help="Reduce output verbosity", ) args = parser.parse_args() # Run in appropriate mode if args.docker: run_with_docker(args) elif args.direct: run_direct(args) else: run_with_server(args) if __name__ == "__main__": main()