Spaces:

kaustubhg73
/

data-cleaning-openenv

Paused

App Files Files Community

data-cleaning-openenv / OpenEnv /docs /source /getting_started /plot_02_using_environments.py

kaustubhg73

Upload folder using huggingface_hub

e5f64b3 verified about 2 months ago

raw

history blame contribute delete

29.4 kB

	"""
	Using Environments
	==================

	Part 2 of 5 in the OpenEnv Getting Started Series

	This notebook covers how to use OpenEnv environments: connecting to them,
	creating AI policies, running evaluations, and working with different games.

	.. note::
	Time: ~15 minutes \| Difficulty: Beginner-Intermediate \| GPU Required: No

	What You'll Learn
	-----------------

	- Connection Methods: Hub, Docker, and direct URL connections
	- Available Environments: OpenSpiel games, coding, browsing, and more
	- Creating Policies: Random, heuristic, and learning-based strategies
	- Running Evaluations: Measuring and comparing policy performance
	"""

	# %%
	# Part 1: Setup
	# -------------
	#
	# Let's set up our environment and imports.

	import random
	import subprocess
	import sys
	from pathlib import Path

	import nest_asyncio
	nest_asyncio.apply()

	# Detect environment
	try:
	import google.colab

	IN_COLAB = True
	except ImportError:
	IN_COLAB = False

	if IN_COLAB:
	print("=" * 70)
	print(" GOOGLE COLAB DETECTED - Installing OpenEnv...")
	print("=" * 70)

	subprocess.run(
	[sys.executable, "-m", "pip", "install", "-q", "openenv-core"],
	capture_output=True,
	)
	print(" OpenEnv installed!")
	print("=" * 70)
	else:
	print("=" * 70)
	print(" RUNNING LOCALLY")
	print("=" * 70)

	# Add src and envs to path for local development
	src_path = Path.cwd().parent.parent.parent / "src"
	if src_path.exists():
	sys.path.insert(0, str(src_path))
	envs_path = Path.cwd().parent.parent.parent / "envs"
	if envs_path.exists():
	sys.path.insert(0, str(envs_path.parent))

	print("=" * 70)

	print()

	# %%
	# Part 2: Available Environments
	# ------------------------------
	#
	# OpenEnv includes a growing collection of environments for different RL tasks.
	#
	# OpenSpiel Games
	# ~~~~~~~~~~~~~~~
	#
	# OpenSpiel (from DeepMind) provides 70+ game environments. OpenEnv wraps
	# several of these:
	#
	# +------------------+-------------+------------------------------------------+
	# \| Game \| Players \| Description \|
	# +==================+=============+==========================================+
	# \| Catch \| 1 \| Catch falling ball with paddle \|
	# +------------------+-------------+------------------------------------------+
	# \| 2048 \| 1 \| Slide tiles to combine numbers \|
	# +------------------+-------------+------------------------------------------+
	# \| Blackjack \| 1 \| Classic card game vs dealer \|
	# +------------------+-------------+------------------------------------------+
	# \| Cliff Walking\| 1 \| Navigate grid, avoid cliffs \|
	# +------------------+-------------+------------------------------------------+
	# \| Tic-Tac-Toe \| 2 \| Classic 3x3 grid game \|
	# +------------------+-------------+------------------------------------------+
	# \| Kuhn Poker \| 2 \| Simplified poker with 3 cards \|
	# +------------------+-------------+------------------------------------------+
	#
	# Other Environment Types
	# ~~~~~~~~~~~~~~~~~~~~~~~
	#
	# +------------------+--------------------------------------------------+
	# \| Environment \| Description \|
	# +==================+==================================================+
	# \| Coding Env \| Execute and evaluate code solutions \|
	# +------------------+--------------------------------------------------+
	# \| BrowserGym \| Web browsing and interaction \|
	# +------------------+--------------------------------------------------+
	# \| TextArena \| Text-based game environments \|
	# +------------------+--------------------------------------------------+
	# \| Atari \| Classic Atari 2600 games \|
	# +------------------+--------------------------------------------------+
	# \| Snake \| Classic snake game \|
	# +------------------+--------------------------------------------------+

	# %%
	# Part 3: Connecting to Environments
	# ----------------------------------
	#
	# OpenEnv provides three ways to connect to environments.

	print("=" * 70)
	print(" CONNECTION METHODS")
	print("=" * 70)

	# Import the environment client
	try:
	from openspiel_env.client import OpenSpielEnv
	from openspiel_env.models import OpenSpielAction, OpenSpielObservation, OpenSpielState

	IMPORTS_OK = True
	print("✓ Imports successful")
	except ImportError as e:
	IMPORTS_OK = False
	print(f"✗ Import error: {e}")

	# %%
	# Method 1: From Hugging Face Hub
	# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	#
	# The easiest way to get started - automatically downloads and runs the container.
	# Let's examine the actual method signature:

	print("\n" + "-" * 70)
	print("METHOD 1: FROM HUGGING FACE HUB")
	print("-" * 70)

	if IMPORTS_OK:
	import inspect

	if hasattr(OpenSpielEnv, "from_hub"):
	sig = inspect.signature(OpenSpielEnv.from_hub)
	print(f"\nSignature: OpenSpielEnv.from_hub{sig}")

	# Show docstring if available
	if OpenSpielEnv.from_hub.__doc__:
	doc_lines = OpenSpielEnv.from_hub.__doc__.strip().split("\n")[:3]
	print(f"Purpose: {doc_lines[0].strip()}")
	else:
	print("\nfrom_hub method not available in this version")

	print("\nUsage:")
	print(" env = OpenSpielEnv.from_hub('openenv/openspiel-env')")
	print("\nWhat happens:")
	print(" 1. Pulls Docker image from HF registry")
	print(" 2. Starts container on available port")
	print(" 3. Connects via WebSocket")
	print(" 4. Cleans up on close()")
	else:
	print("\n(OpenEnv not installed - showing expected signature)")
	print("\nSignature: OpenSpielEnv.from_hub(repo_id, *, use_docker=True, ...)")

	# %%
	# Method 2: From Docker Image
	# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
	#
	# Use a locally built or pulled Docker image:

	print("\n" + "-" * 70)
	print("METHOD 2: FROM DOCKER IMAGE")
	print("-" * 70)

	if IMPORTS_OK:
	if hasattr(OpenSpielEnv, "from_docker_image"):
	sig = inspect.signature(OpenSpielEnv.from_docker_image)
	print(f"\nSignature: OpenSpielEnv.from_docker_image{sig}")

	if OpenSpielEnv.from_docker_image.__doc__:
	doc_lines = OpenSpielEnv.from_docker_image.__doc__.strip().split("\n")[:3]
	print(f"Purpose: {doc_lines[0].strip()}")
	else:
	print("\nfrom_docker_image method not available in this version")

	print("\nUsage:")
	print(" # Build image first:")
	print(" # docker build -t openspiel-env:latest ./envs/openspiel_env/server")
	print(" env = OpenSpielEnv.from_docker_image('openspiel-env:latest')")
	else:
	print("\n(OpenEnv not installed - showing expected signature)")
	print("\nSignature: OpenSpielEnv.from_docker_image(image, provider=None, ...)")

	# %%
	# Method 3: Direct URL Connection
	# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	#
	# Connect to an already-running server:

	print("\n" + "-" * 70)
	print("METHOD 3: DIRECT URL CONNECTION")
	print("-" * 70)

	if IMPORTS_OK:
	sig = inspect.signature(OpenSpielEnv.__init__)
	print(f"\nSignature: OpenSpielEnv{sig}")
	print("\nUsage:")
	print(" # Start server first:")
	print(" # docker run -p 8000:8000 openenv/openspiel-env:latest")
	print(" env = OpenSpielEnv(base_url='http://localhost:8000')")
	print("\nNote: Does NOT manage container lifecycle - you control the server")
	else:
	print("\n(OpenEnv not installed - showing expected signature)")
	print("\nSignature: OpenSpielEnv(base_url, connect_timeout_s=10.0, ...)")

	# %%
	# Using Context Managers
	# ~~~~~~~~~~~~~~~~~~~~~~
	#
	# Always use context managers to ensure proper cleanup. Let's verify the
	# client supports the context manager protocol:

	print("\n" + "-" * 70)
	print("CONTEXT MANAGER SUPPORT")
	print("-" * 70)

	if IMPORTS_OK:
	has_enter = hasattr(OpenSpielEnv, "__enter__")
	has_exit = hasattr(OpenSpielEnv, "__exit__")
	print(f"\n__enter__ method: {'✓ Present' if has_enter else '✗ Missing'}")
	print(f"__exit__ method: {'✓ Present' if has_exit else '✗ Missing'}")

	if has_enter and has_exit:
	print("\n✓ Context manager supported! Use with 'with' statement:")
	print(" with OpenSpielEnv(base_url='...') as env:")
	print(" result = env.reset()")
	print(" # ... use env ...")
	print(" # Automatically cleaned up")
	else:
	print("\n(OpenEnv not installed)")
	print("Context managers are supported for automatic cleanup")

	# %%
	# Part 4: The Environment Loop
	# ----------------------------
	#
	# Every OpenEnv interaction follows the same pattern:
	#
	# 1. ``reset()`` - Start a new episode
	# 2. ``step(action)`` - Take action, get observation/reward
	# 3. Repeat until ``done``
	# 4. ``state()`` - Get episode metadata (optional)
	#
	# Let's demonstrate this with an actual episode:

	print("=" * 70)
	print(" THE ENVIRONMENT LOOP - LIVE DEMO")
	print("=" * 70)
	print()

	# Run an actual demo episode
	GRID_HEIGHT = 10
	GRID_WIDTH = 5

	# Create mock observation for demonstration
	class DemoObservation:
	def __init__(self, info_state, legal_actions, done=False):
	self.info_state = info_state
	self.legal_actions = legal_actions
	self.done = done

	class DemoResult:
	def __init__(self, observation, reward=0.0, done=False):
	self.observation = observation
	self.reward = reward
	self.done = done

	# Initialize episode
	ball_col = random.randint(0, GRID_WIDTH - 1)
	paddle_col = GRID_WIDTH // 2

	print(f"Episode Starting:")
	print(f" Ball column: {ball_col}")
	print(f" Paddle column: {paddle_col}")
	print()

	# Simulate the environment loop
	step_count = 0
	total_reward = 0.0

	print("Step \| Ball Row \| Paddle \| Action \| Info State (first 10)")
	print("-" * 65)

	for ball_row in range(GRID_HEIGHT):
	# Build observation (same format as real OpenSpiel Catch)
	info_state = [0.0] * (GRID_HEIGHT * GRID_WIDTH)
	info_state[ball_row * GRID_WIDTH + ball_col] = 1.0 # Ball
	info_state[(GRID_HEIGHT - 1) * GRID_WIDTH + paddle_col] = 1.0 # Paddle

	obs = DemoObservation(info_state=info_state, legal_actions=[0, 1, 2])

	# Choose action (smart policy - move toward ball)
	if paddle_col < ball_col:
	action_id = 2 # RIGHT
	elif paddle_col > ball_col:
	action_id = 0 # LEFT
	else:
	action_id = 1 # STAY

	action_names = {0: "LEFT", 1: "STAY", 2: "RIGHT"}

	# Show state before action
	info_preview = [f"{v:.0f}" for v in info_state[:10]]
	print(f" {step_count:2d} \| {ball_row:2d} \| {paddle_col} \| {action_names[action_id]:<5} \| {info_preview}")

	# Execute action
	if action_id == 0:
	paddle_col = max(0, paddle_col - 1)
	elif action_id == 2:
	paddle_col = min(GRID_WIDTH - 1, paddle_col + 1)

	step_count += 1

	# Calculate final reward
	caught = (paddle_col == ball_col)
	reward = 1.0 if caught else 0.0

	print("-" * 65)
	print()
	print(f"Episode Complete:")
	print(f" Steps: {step_count}")
	print(f" Ball landed at: column {ball_col}")
	print(f" Paddle position: column {paddle_col}")
	print(f" Reward: {reward}")
	print(f" Result: {'CAUGHT! ✓' if caught else 'MISSED! ✗'}")
	print()
	print("This is the exact same loop you'd run with a live server,")
	print("just using local simulation for the game logic.")

	# %%
	# Part 5: Creating AI Policies
	# ----------------------------
	#
	# A policy is a function that chooses actions based on observations.
	# Let's create several policies of increasing sophistication.

	import random
	from typing import List
	from dataclasses import dataclass


	@dataclass
	class PolicyResult:
	"""Result of evaluating a policy."""

	name: str
	episodes: int
	wins: int
	total_reward: float
	avg_steps: float

	@property
	def win_rate(self) -> float:
	return self.wins / self.episodes if self.episodes > 0 else 0.0


	# %%
	# Policy 1: Random Policy
	# ~~~~~~~~~~~~~~~~~~~~~~~
	#
	# The simplest policy - randomly choose from legal actions:


	class RandomPolicy:
	"""
	Random policy - baseline for comparison.

	Always picks a random action from the legal actions.
	Expected win rate for Catch: ~20% (1 in 5 columns)
	"""

	name = "Random"

	def choose_action(self, observation) -> int:
	"""Choose a random legal action."""
	return random.choice(observation.legal_actions)


	# %%
	# Policy 2: Heuristic Policy
	# ~~~~~~~~~~~~~~~~~~~~~~~~~~
	#
	# A hand-coded policy that uses domain knowledge:


	class SmartCatchPolicy:
	"""
	Smart heuristic policy for the Catch game.

	Tracks the ball position and moves paddle toward it.
	Expected win rate: ~100% (optimal for Catch)
	"""

	name = "Smart (Heuristic)"

	def __init__(self, grid_width: int = 5):
	self.grid_width = grid_width

	def choose_action(self, observation) -> int:
	"""Move paddle toward ball position."""
	info_state = observation.info_state
	grid_width = self.grid_width

	# Find ball position (first 1.0 in the grid, excluding last row)
	ball_col = None
	for idx, val in enumerate(info_state[:-grid_width]):
	if abs(val - 1.0) < 0.01:
	ball_col = idx % grid_width
	break

	# Find paddle position (1.0 in last row)
	last_row = info_state[-grid_width:]
	paddle_col = None
	for idx, val in enumerate(last_row):
	if abs(val - 1.0) < 0.01:
	paddle_col = idx
	break

	if ball_col is None or paddle_col is None:
	return 1 # STAY if can't determine positions

	# Move toward ball
	if paddle_col < ball_col:
	return 2 # RIGHT
	elif paddle_col > ball_col:
	return 0 # LEFT
	else:
	return 1 # STAY


	# %%
	# Policy 3: Epsilon-Greedy Policy
	# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	#
	# Combines exploration (random) with exploitation (smart):


	class EpsilonGreedyPolicy:
	"""
	Epsilon-greedy policy - balances exploration and exploitation.

	With probability epsilon, takes random action (explore).
	Otherwise, uses smart policy (exploit).
	Epsilon decays over time to favor exploitation.
	"""

	name = "Epsilon-Greedy"

	def __init__(self, epsilon: float = 0.3, decay: float = 0.99):
	self.epsilon = epsilon
	self.decay = decay
	self.smart_policy = SmartCatchPolicy()
	self.steps = 0

	def choose_action(self, observation) -> int:
	"""Choose action with epsilon-greedy strategy."""
	self.steps += 1

	# Decay epsilon
	current_epsilon = self.epsilon * (self.decay**self.steps)

	if random.random() < current_epsilon:
	# Explore: random action
	return random.choice(observation.legal_actions)
	else:
	# Exploit: use smart policy
	return self.smart_policy.choose_action(observation)


	# %%
	# Policy 4: Always Stay Policy
	# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	#
	# A deliberately bad policy for comparison:


	class AlwaysStayPolicy:
	"""
	Always stay policy - deliberately bad baseline.

	Never moves the paddle. Only wins if ball lands on starting column.
	Expected win rate: ~20% (same as random)
	"""

	name = "Always Stay"

	def choose_action(self, observation) -> int:
	"""Always return STAY action."""
	return 1 # STAY


	# %%
	# Part 6: Running Evaluations
	# ---------------------------
	#
	# Let's evaluate our policies! First, we'll create an evaluation function.


	def evaluate_policy_live(
	policy,
	env,
	num_episodes: int = 50,
	game_name: str = "catch",
	) -> PolicyResult:
	"""
	Evaluate a policy against a live environment.

	Args:
	policy: Policy object with choose_action method
	env: Connected OpenSpielEnv client
	num_episodes: Number of episodes to run
	game_name: Name of the game to play

	Returns:
	PolicyResult with evaluation metrics
	"""
	wins = 0
	total_reward = 0.0
	total_steps = 0

	for _ in range(num_episodes):
	result = env.reset()
	episode_steps = 0

	while not result.done:
	action_id = policy.choose_action(result.observation)
	action = OpenSpielAction(action_id=action_id, game_name=game_name)
	result = env.step(action)
	episode_steps += 1

	total_reward += result.reward if result.reward else 0
	total_steps += episode_steps
	if result.reward and result.reward > 0:
	wins += 1

	return PolicyResult(
	name=policy.name,
	episodes=num_episodes,
	wins=wins,
	total_reward=total_reward,
	avg_steps=total_steps / num_episodes,
	)


	def evaluate_policy_simulated(
	policy,
	num_episodes: int = 50,
	grid_height: int = 10,
	grid_width: int = 5,
	) -> PolicyResult:
	"""
	Evaluate a policy using local simulation (no server needed).

	This simulates the Catch game locally for testing without a server.

	Args:
	policy: Policy object with choose_action method
	num_episodes: Number of episodes to run
	grid_height: Height of the game grid
	grid_width: Width of the game grid

	Returns:
	PolicyResult with evaluation metrics
	"""
	wins = 0
	total_reward = 0.0
	total_steps = 0

	# Create a mock observation class
	class MockObservation:
	def __init__(self, info_state, legal_actions):
	self.info_state = info_state
	self.legal_actions = legal_actions

	for _ in range(num_episodes):
	# Initialize game
	ball_col = random.randint(0, grid_width - 1)
	paddle_col = grid_width // 2 # Start in center

	for step in range(grid_height):
	# Create observation
	info_state = [0.0] * (grid_height * grid_width)
	info_state[step * grid_width + ball_col] = 1.0 # Ball position
	info_state[(grid_height - 1) * grid_width + paddle_col] = 1.0 # Paddle

	observation = MockObservation(
	info_state=info_state, legal_actions=[0, 1, 2]
	)

	# Get action from policy
	action = policy.choose_action(observation)

	# Execute action
	if action == 0: # LEFT
	paddle_col = max(0, paddle_col - 1)
	elif action == 2: # RIGHT
	paddle_col = min(grid_width - 1, paddle_col + 1)
	# action == 1 is STAY, no movement

	total_steps += 1

	# Check if caught
	if paddle_col == ball_col:
	wins += 1
	total_reward += 1.0

	return PolicyResult(
	name=policy.name,
	episodes=num_episodes,
	wins=wins,
	total_reward=total_reward,
	avg_steps=total_steps / num_episodes,
	)


	# %%
	# Part 7: Policy Competition
	# --------------------------
	#
	# Let's run a competition between all our policies!

	# Create policy instances
	policies = [
	RandomPolicy(),
	AlwaysStayPolicy(),
	SmartCatchPolicy(),
	EpsilonGreedyPolicy(epsilon=0.3),
	]

	# Check if we can connect to a live server
	SERVER_URL = "http://localhost:8000"
	USE_LIVE = False

	if IMPORTS_OK:
	try:
	test_env = OpenSpielEnv(base_url=SERVER_URL)
	with test_env.sync() as client:
	pass # Quick test to verify connection
	USE_LIVE = True
	print(f"✓ Connected to server at {SERVER_URL}")
	except Exception as e:
	USE_LIVE = False
	print(f"✗ No server running at {SERVER_URL}: {e}")

	print("=" * 70)
	if USE_LIVE:
	print(" POLICY COMPETITION - LIVE SERVER")
	else:
	print(" POLICY COMPETITION - SIMULATION MODE")
	print("=" * 70)
	print()

	NUM_EPISODES = 50
	print(f"Running {NUM_EPISODES} episodes per policy...\n")

	results = []

	for policy in policies:
	print(f" Evaluating {policy.name}...", end=" ", flush=True)

	if USE_LIVE:
	env = OpenSpielEnv(base_url=SERVER_URL)
	with env.sync() as client:
	result = evaluate_policy_live(policy, client, NUM_EPISODES)
	else:
	result = evaluate_policy_simulated(policy, NUM_EPISODES)

	results.append(result)
	print(f"Win rate: {result.win_rate * 100:.1f}%")

	# %%
	# Display Results
	# ~~~~~~~~~~~~~~~

	print()
	print("=" * 70)
	print(" FINAL RESULTS")
	print("=" * 70)
	print()

	# Sort by win rate (descending)
	results.sort(key=lambda r: r.win_rate, reverse=True)

	# Display leaderboard
	print(f"{'Rank':<6}{'Policy':<20}{'Win Rate':<12}{'Avg Steps':<12}{'Wins'}")
	print("-" * 60)

	for i, result in enumerate(results):
	rank = f"#{i + 1}"
	bar = "█" * int(result.win_rate * 20)
	print(
	f"{rank:<6}{result.name:<20}{result.win_rate * 100:>5.1f}%{'':<5}"
	f"{result.avg_steps:>6.1f}{'':<6}{result.wins}/{result.episodes}"
	)

	print()
	print("-" * 70)
	print()
	print("Key Insights:")
	print(" • Random/AlwaysStay: ~20% (baseline - relies on luck)")
	print(" • Smart Heuristic: ~100% (optimal for Catch)")
	print(" • Epsilon-Greedy: ~85%+ (balances exploration/exploitation)")
	print()

	# %%
	# Part 8: Working with Different Games
	# ------------------------------------
	#
	# OpenSpiel supports multiple games. Let's create actual action instances
	# for different games and examine their structure:

	print("=" * 70)
	print(" SWITCHING GAMES - ACTUAL ACTION INSTANCES")
	print("=" * 70)
	print()

	# Create actual action instances for different games
	if IMPORTS_OK:
	from openspiel_env.models import OpenSpielAction as ActionModel

	# Catch actions
	print("CATCH GAME ACTIONS:")
	print("-" * 40)
	catch_actions = {
	0: "Move LEFT",
	1: "STAY in place",
	2: "Move RIGHT",
	}
	for action_id, description in catch_actions.items():
	action = ActionModel(action_id=action_id, game_name="catch")
	print(f" {action} # {description}")

	print()

	# 2048 actions
	print("2048 GAME ACTIONS:")
	print("-" * 40)
	game_2048_actions = {
	0: "Slide UP",
	1: "Slide RIGHT",
	2: "Slide DOWN",
	3: "Slide LEFT",
	}
	for action_id, description in game_2048_actions.items():
	action = ActionModel(action_id=action_id, game_name="2048")
	print(f" {action} # {description}")

	print()

	# Tic-Tac-Toe actions
	print("TIC-TAC-TOE ACTIONS:")
	print("-" * 40)
	print(" Grid positions 0-8 (left-to-right, top-to-bottom):")
	print(" 0 \| 1 \| 2")
	print(" ---\|---\|---")
	print(" 3 \| 4 \| 5")
	print(" ---\|---\|---")
	print(" 6 \| 7 \| 8")
	print()
	# Show a few examples
	for pos in [0, 4, 8]:
	action = ActionModel(action_id=pos, game_name="tic_tac_toe")
	corner = {0: "top-left", 4: "center", 8: "bottom-right"}[pos]
	print(f" {action} # {corner}")

	print()

	# Blackjack actions
	print("BLACKJACK ACTIONS:")
	print("-" * 40)
	blackjack_actions = {
	0: "STAND (keep current hand)",
	1: "HIT (request another card)",
	}
	for action_id, description in blackjack_actions.items():
	action = ActionModel(action_id=action_id, game_name="blackjack")
	print(f" {action} # {description}")

	else:
	# Fallback using dataclass
	from dataclasses import dataclass

	@dataclass
	class ActionDemo:
	action_id: int
	game_name: str

	print("CATCH GAME ACTIONS:")
	print("-" * 40)
	for action_id, desc in [(0, "LEFT"), (1, "STAY"), (2, "RIGHT")]:
	print(f" ActionDemo(action_id={action_id}, game_name='catch') # {desc}")

	print()
	print("2048 GAME ACTIONS:")
	print("-" * 40)
	for action_id, desc in [(0, "UP"), (1, "RIGHT"), (2, "DOWN"), (3, "LEFT")]:
	print(f" ActionDemo(action_id={action_id}, game_name='2048') # {desc}")

	print()
	print("-" * 70)
	print("Each game has its own action space - check legal_actions in observation!")

	# %%
	# Part 9: Multi-Player Games
	# --------------------------
	#
	# Some games like Tic-Tac-Toe and Kuhn Poker support multiple players.
	# Let's create actual observation instances to understand the structure:

	print("=" * 70)
	print(" MULTI-PLAYER GAMES - OBSERVATION STRUCTURE")
	print("=" * 70)
	print()

	# Create observation instances for multi-player games
	if IMPORTS_OK:
	from openspiel_env.models import OpenSpielObservation as ObsModel

	# Single-player observation (like Catch)
	print("SINGLE-PLAYER OBSERVATION (Catch):")
	print("-" * 50)
	single_player_obs = ObsModel(
	info_state=[0.0, 0.0, 1.0, 0.0, 0.0] + [0.0] * 45,
	legal_actions=[0, 1, 2],
	game_phase="playing",
	current_player_id=0,
	opponent_last_action=None,
	)
	print(f" current_player_id: {single_player_obs.current_player_id} # Always 0 (you)")
	print(f" opponent_last_action: {single_player_obs.opponent_last_action} # None (no opponent)")
	print(f" legal_actions: {single_player_obs.legal_actions}")
	print(f" game_phase: {single_player_obs.game_phase!r}")
	print()

	# Multi-player observation - your turn (like Tic-Tac-Toe)
	print("MULTI-PLAYER OBSERVATION (Tic-Tac-Toe, YOUR turn):")
	print("-" * 50)
	your_turn_obs = ObsModel(
	info_state=[1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0], # X at 0, O at 4
	legal_actions=[1, 2, 3, 5, 6, 7, 8], # Available positions
	game_phase="playing",
	current_player_id=0, # Your turn!
	opponent_last_action=4, # Opponent played center
	)
	print(f" current_player_id: {your_turn_obs.current_player_id} # 0 = YOUR turn")
	print(f" opponent_last_action: {your_turn_obs.opponent_last_action} # Opponent played position 4 (center)")
	print(f" legal_actions: {your_turn_obs.legal_actions}")
	print(f" game_phase: {your_turn_obs.game_phase!r}")
	print()

	# Multi-player observation - opponent's turn
	print("MULTI-PLAYER OBSERVATION (Tic-Tac-Toe, OPPONENT's turn):")
	print("-" * 50)
	opponent_turn_obs = ObsModel(
	info_state=[1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 1.0], # X at 0,8; O at 4
	legal_actions=[], # No actions available when it's opponent's turn
	game_phase="playing",
	current_player_id=1, # Opponent's turn
	opponent_last_action=None, # Will be set after they move
	)
	print(f" current_player_id: {opponent_turn_obs.current_player_id} # 1 = OPPONENT's turn")
	print(f" legal_actions: {opponent_turn_obs.legal_actions} # Empty - wait for opponent")
	print(f" game_phase: {opponent_turn_obs.game_phase!r}")
	print()

	# Terminal state observation
	print("TERMINAL OBSERVATION (Game Over):")
	print("-" * 50)
	terminal_obs = ObsModel(
	info_state=[1.0, 1.0, 1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0], # X wins top row
	legal_actions=[], # No more moves
	game_phase="terminal",
	current_player_id=-1, # No current player
	opponent_last_action=4,
	)
	print(f" current_player_id: {terminal_obs.current_player_id} # -1 = Game over")
	print(f" game_phase: {terminal_obs.game_phase!r}")
	print(f" legal_actions: {terminal_obs.legal_actions} # Empty - game ended")

	else:
	# Fallback demonstration
	from dataclasses import dataclass
	from typing import List, Optional

	@dataclass
	class ObsDemo:
	current_player_id: int
	opponent_last_action: Optional[int]
	legal_actions: List[int]
	game_phase: str

	print("SINGLE-PLAYER (Catch):")
	print(f" current_player_id: 0 # Always your turn")
	print(f" opponent_last_action: None")
	print()

	print("MULTI-PLAYER - YOUR TURN (Tic-Tac-Toe):")
	print(f" current_player_id: 0 # 0 = your turn")
	print(f" opponent_last_action: 4 # What opponent just played")
	print(f" legal_actions: [1, 2, 3, 5, 6, 7, 8] # Available moves")
	print()

	print("MULTI-PLAYER - OPPONENT'S TURN:")
	print(f" current_player_id: 1 # Wait for opponent")
	print(f" legal_actions: [] # Can't move during opponent's turn")

	print()
	print("-" * 70)
	print("KEY INSIGHT: Only act when current_player_id == 0 (your turn)!")
	print("The environment automatically handles opponent moves.")

	# %%
	# Summary
	# -------
	#
	# In this notebook, you learned:
	#
	# Connection Methods:
	#
	# - ``from_hub()`` - Auto-download from Hugging Face
	# - ``from_docker_image()`` - Use local Docker image
	# - Direct URL - Connect to running server
	#
	# Creating Policies:
	#
	# - Random: Baseline comparison
	# - Heuristic: Domain knowledge encoded
	# - Epsilon-Greedy: Balance exploration/exploitation
	#
	# Running Evaluations:
	#
	# - Measure win rates and rewards
	# - Compare policy performance
	# - Run competitions
	#
	# Multi-Game Support:
	#
	# - Switch games via ``game_name`` parameter
	# - Handle multi-player games
	# - Work with different action spaces
	#
	# Next Steps
	# ----------
	#
	# Continue to Notebook 3: Building & Sharing Environments
	#
	# In the next notebook, you'll:
	#
	# - Create your own custom environment
	# - Package it with Docker
	# - Deploy to Hugging Face Hub
	# - Share with the community