gridworld-env / OpenEnv /examples /unity_simple.py
Abhilasha Kakoty
Initial deploy
7078f4d
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""
Unity ML-Agents Environment Example Usage
This script demonstrates how to use the Unity ML-Agents environment
through the OpenEnv interface, with support for direct mode, server mode,
and Docker-based deployment.
=============================================================================
USAGE EXAMPLES (run from the OpenEnv repository root)
=============================================================================
1. DIRECT MODE (Recommended for quick testing - no server required)
----------------------------------------------------------------
Runs the Unity environment directly in-process.
This is the simplest way to get started.
# Run with graphics (default: 1280x720 window)
python examples/unity_simple.py --direct
# Run with custom window size
python examples/unity_simple.py --direct --width 1920 --height 1080
# Run headless (no graphics, faster for training)
python examples/unity_simple.py --direct --no-graphics --time-scale 20
# Run 3DBall environment for 5 episodes
python examples/unity_simple.py --direct --env 3DBall --episodes 5
# Run alternating between PushBlock and 3DBall
python examples/unity_simple.py --direct --env both --episodes 6
2. SERVER MODE (For client-server architecture)
---------------------------------------------
First, start the server in one terminal, then connect with this script.
Step 1: Start the server (in Terminal 1):
cd envs/unity_env
uvicorn server.app:app --host 0.0.0.0 --port 8000
Or with environment variables for custom settings:
UNITY_WIDTH=1920 UNITY_HEIGHT=1080 uvicorn server.app:app --port 8000
UNITY_NO_GRAPHICS=1 UNITY_TIME_SCALE=20 uvicorn server.app:app --port 8000
Step 2: Run this script (in Terminal 2, from repo root):
python examples/unity_simple.py --url http://localhost:8000
python examples/unity_simple.py --url http://localhost:8000 --env 3DBall --episodes 5
3. DOCKER MODE (For containerized deployment)
-------------------------------------------
Automatically starts a Docker container and connects to it.
First, build the Docker image:
cd envs/unity_env
docker build -f server/Dockerfile -t unity-env:latest .
Then run (from repo root):
python examples/unity_simple.py --docker
python examples/unity_simple.py --docker --width 1280 --height 720
python examples/unity_simple.py --docker --no-graphics --time-scale 20
python examples/unity_simple.py --docker --env 3DBall --episodes 10
=============================================================================
The first run will download Unity environment binaries (~500MB).
Subsequent runs use cached binaries from ~/.mlagents-cache/
"""
import argparse
import random
import sys
import time
from pathlib import Path
from typing import Optional
# Add paths for imports
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
sys.path.insert(0, str(Path(__file__).parent.parent))
from envs.unity_env.client import UnityEnv
from envs.unity_env.models import UnityAction
def run_pushblock_episode(
client: UnityEnv,
max_steps: int = 1000,
verbose: bool = True,
) -> dict:
"""
Run a single episode of PushBlock with random actions.
Args:
client: Connected UnityEnv client
max_steps: Maximum steps per episode
verbose: Print progress information
Returns:
Dictionary with episode statistics
"""
# Reset to PushBlock environment
result = client.reset(env_id="PushBlock")
if verbose:
print(f"Environment: PushBlock")
print(f"Behavior: {result.observation.behavior_name}")
print(f"Vector obs dims: {len(result.observation.vector_observations)}")
action_spec = result.observation.action_spec_info
print(f"Action spec: {action_spec}")
print()
episode_reward = 0.0
step_count = 0
while not result.done and step_count < max_steps:
# PushBlock has 7 discrete actions:
# 0=noop, 1=forward, 2=backward, 3=rotate_left,
# 4=rotate_right, 5=strafe_left, 6=strafe_right
action_idx = random.randint(0, 6)
action = UnityAction(discrete_actions=[action_idx])
result = client.step(action)
episode_reward += result.reward or 0.0
step_count += 1
if verbose and step_count % 100 == 0:
print(f" Step {step_count}: cumulative reward = {episode_reward:.2f}")
return {
"steps": step_count,
"reward": episode_reward,
"done": result.done,
}
def run_3dball_episode(
client: UnityEnv,
max_steps: int = 500,
verbose: bool = True,
) -> dict:
"""
Run a single episode of 3DBall with random actions.
Args:
client: Connected UnityEnv client
max_steps: Maximum steps per episode
verbose: Print progress information
Returns:
Dictionary with episode statistics
"""
# Reset to 3DBall environment
result = client.reset(env_id="3DBall")
if verbose:
print(f"Environment: 3DBall")
print(f"Behavior: {result.observation.behavior_name}")
print(f"Vector obs dims: {len(result.observation.vector_observations)}")
action_spec = result.observation.action_spec_info
print(f"Action spec: {action_spec}")
print()
episode_reward = 0.0
step_count = 0
while not result.done and step_count < max_steps:
# 3DBall has 2 continuous actions for X and Z rotation
action = UnityAction(
continuous_actions=[
random.uniform(-1, 1), # X rotation
random.uniform(-1, 1), # Z rotation
]
)
result = client.step(action)
episode_reward += result.reward or 0.0
step_count += 1
if verbose and step_count % 100 == 0:
print(f" Step {step_count}: cumulative reward = {episode_reward:.2f}")
return {
"steps": step_count,
"reward": episode_reward,
"done": result.done,
}
def run_episodes(
client: UnityEnv,
env_name: str,
episodes: int,
max_steps: int,
verbose: bool,
) -> list:
"""Run multiple episodes and collect results."""
all_results = []
for episode in range(episodes):
print(f"\n--- Episode {episode + 1}/{episodes} ---")
if env_name == "PushBlock":
result = run_pushblock_episode(
client,
max_steps=max_steps,
verbose=verbose,
)
elif env_name == "3DBall":
result = run_3dball_episode(
client,
max_steps=max_steps,
verbose=verbose,
)
else: # both
if episode % 2 == 0:
result = run_pushblock_episode(
client,
max_steps=max_steps,
verbose=verbose,
)
else:
result = run_3dball_episode(
client,
max_steps=max_steps,
verbose=verbose,
)
all_results.append(result)
print(
f"Episode {episode + 1}: {result['steps']} steps, "
f"reward: {result['reward']:.2f}"
)
return all_results
def print_summary(all_results: list) -> None:
"""Print summary statistics."""
print("\n" + "=" * 60)
print("Summary")
print("=" * 60)
total_steps = sum(r["steps"] for r in all_results)
avg_reward = sum(r["reward"] for r in all_results) / len(all_results)
max_reward = max(r["reward"] for r in all_results)
min_reward = min(r["reward"] for r in all_results)
print(f"Total episodes: {len(all_results)}")
print(f"Total steps: {total_steps}")
print(f"Average reward: {avg_reward:.2f}")
print(f"Max reward: {max_reward:.2f}")
print(f"Min reward: {min_reward:.2f}")
print("=" * 60)
def run_with_server(args) -> None:
"""Run using a connection to an existing server."""
print("=" * 60)
print("Unity ML-Agents Environment - Server Mode")
print("=" * 60)
print(f"\nConnecting to: {args.url}")
print(f"Environment: {args.env}")
print(f"Episodes: {args.episodes}")
print(f"Max steps: {args.max_steps}")
print()
# Connect to the environment server
with UnityEnv(base_url=args.url) as client:
all_results = run_episodes(
client,
env_name=args.env,
episodes=args.episodes,
max_steps=args.max_steps,
verbose=not args.quiet,
)
print_summary(all_results)
def run_with_docker(args) -> None:
"""Run using Docker (automatically starts container)."""
print("=" * 60)
print("Unity ML-Agents Environment - Docker Mode")
print("=" * 60)
print(f"\nDocker image: {args.docker_image}")
print(f"Environment: {args.env}")
print(f"Episodes: {args.episodes}")
print(f"Max steps: {args.max_steps}")
print(f"Window size: {args.width}x{args.height}")
print(f"Graphics: {'Disabled (headless)' if args.no_graphics else 'Enabled'}")
print()
# Build environment variables for Docker
env_vars = {
"UNITY_NO_GRAPHICS": "1" if args.no_graphics else "0",
"UNITY_WIDTH": str(args.width),
"UNITY_HEIGHT": str(args.height),
"UNITY_TIME_SCALE": str(args.time_scale),
"UNITY_QUALITY_LEVEL": str(args.quality_level),
}
print("Starting Docker container...")
print(f" Environment variables: {env_vars}")
print()
try:
# Use from_docker_image to automatically start and connect
client = UnityEnv.from_docker_image(
args.docker_image,
environment=env_vars,
)
try:
all_results = run_episodes(
client,
env_name=args.env,
episodes=args.episodes,
max_steps=args.max_steps,
verbose=not args.quiet,
)
print_summary(all_results)
finally:
print("\nClosing Docker container...")
client.close()
except Exception as e:
print(f"\nError running with Docker: {e}")
print("\nTroubleshooting:")
print(" 1. Ensure Docker is running")
print(" 2. Build the image first:")
print(f" docker build -f server/Dockerfile -t {args.docker_image} .")
print(" 3. Or use server mode instead:")
print(" python examples/unity_simple.py --url http://localhost:8000")
sys.exit(1)
def run_direct(args) -> None:
"""
Run Unity environment in direct mode (local server started automatically).
This mode starts an embedded local server and connects to it, providing
the convenience of direct execution while maintaining client-server separation.
Useful for quick testing and debugging. For production, use server or Docker mode.
"""
print("=" * 60)
print("Unity ML-Agents Environment - Direct Mode")
print("=" * 60)
print(f"\nEnvironment: {args.env}")
print(f"Episodes: {args.episodes}")
print(f"Max steps: {args.max_steps}")
print(f"Window size: {args.width}x{args.height}")
print(f"Graphics: {'Disabled (headless)' if args.no_graphics else 'Enabled'}")
print(f"Time scale: {args.time_scale}x")
print()
print("Starting local Unity server...")
print("(First run will download binaries - this may take a few minutes)")
print()
# Use from_direct() to start an embedded server and get a client
client = UnityEnv.from_direct(
env_id=args.env if args.env != "both" else "PushBlock",
no_graphics=args.no_graphics,
time_scale=args.time_scale,
width=args.width,
height=args.height,
quality_level=args.quality_level,
)
try:
all_results = []
for episode in range(args.episodes):
print(f"\n--- Episode {episode + 1}/{args.episodes} ---")
# Determine which environment to use
if args.env == "both":
current_env = "PushBlock" if episode % 2 == 0 else "3DBall"
else:
current_env = args.env
# Reset environment
result = client.reset(env_id=current_env)
if not args.quiet:
print(f"Environment: {current_env}")
print(f"Behavior: {result.observation.behavior_name}")
print(f"Vector obs dims: {len(result.observation.vector_observations)}")
print(f"Action spec: {result.observation.action_spec_info}")
print()
episode_reward = 0.0
step_count = 0
while not result.done and step_count < args.max_steps:
# Generate action based on environment type
if current_env == "3DBall":
action = UnityAction(
continuous_actions=[
random.uniform(-1, 1),
random.uniform(-1, 1),
]
)
else:
action = UnityAction(discrete_actions=[random.randint(0, 6)])
result = client.step(action)
episode_reward += result.reward or 0.0
step_count += 1
if not args.quiet and step_count % 100 == 0:
print(
f" Step {step_count}: cumulative reward = {episode_reward:.2f}"
)
episode_result = {
"steps": step_count,
"reward": episode_reward,
"done": result.done,
}
all_results.append(episode_result)
print(
f"Episode {episode + 1}: {episode_result['steps']} steps, "
f"reward: {episode_result['reward']:.2f}"
)
print_summary(all_results)
finally:
print("\nClosing Unity environment...")
client.close()
def main():
parser = argparse.ArgumentParser(
description="Run Unity ML-Agents environment examples",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Connect to running server (default)
%(prog)s --url http://localhost:8000
# Run via Docker
%(prog)s --docker
# Run directly without server (for testing)
%(prog)s --direct
# With graphics window (800x600 default)
%(prog)s --direct --width 1280 --height 720
# Headless mode (faster training)
%(prog)s --direct --no-graphics --time-scale 20
# Run 3DBall environment
%(prog)s --direct --env 3DBall --episodes 5
""",
)
# Mode selection
mode_group = parser.add_mutually_exclusive_group()
mode_group.add_argument(
"--docker",
action="store_true",
help="Run via Docker (automatically starts container)",
)
mode_group.add_argument(
"--direct",
action="store_true",
help="Run Unity environment directly without server",
)
# Connection settings
parser.add_argument(
"--url",
default="http://localhost:8000",
help="Base URL of the Unity environment server (default: http://localhost:8000)",
)
parser.add_argument(
"--docker-image",
default="unity-env:latest",
help="Docker image to use (default: unity-env:latest)",
)
# Environment settings
parser.add_argument(
"--env",
choices=["PushBlock", "3DBall", "both"],
default="PushBlock",
help="Which environment to run (default: PushBlock)",
)
parser.add_argument(
"--episodes",
type=int,
default=3,
help="Number of episodes to run (default: 3)",
)
parser.add_argument(
"--max-steps",
type=int,
default=500,
help="Maximum steps per episode (default: 500)",
)
# Graphics settings
parser.add_argument(
"--width",
type=int,
default=1280,
help="Window width in pixels (default: 800)",
)
parser.add_argument(
"--height",
type=int,
default=720,
help="Window height in pixels (default: 600)",
)
parser.add_argument(
"--no-graphics",
action="store_true",
help="Run in headless mode without graphics (faster training)",
)
parser.add_argument(
"--time-scale",
type=float,
default=1.0,
help="Simulation speed multiplier (default: 1.0, use 20.0 for fast training)",
)
parser.add_argument(
"--quality-level",
type=int,
default=5,
choices=[0, 1, 2, 3, 4, 5],
help="Graphics quality level 0-5 (default: 5)",
)
# Output settings
parser.add_argument(
"--quiet",
action="store_true",
help="Reduce output verbosity",
)
args = parser.parse_args()
# Run in appropriate mode
if args.docker:
run_with_docker(args)
elif args.direct:
run_direct(args)
else:
run_with_server(args)
if __name__ == "__main__":
main()