Spaces:

ViditOstwal
/

maze_env

Running

App Files Files Community

maze_env / models.py

ViditOstwal

Upload folder using huggingface_hub

dad5fe1 verified about 1 month ago

raw

history blame contribute delete

5.19 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the BSD-style license found in the
	# LICENSE file in the root directory of this source tree.

	"""
	Data models for the Ice Maze Environment.

	The maze environment loads ice-sliding puzzle levels and exposes them
	to LLM agents via the OpenEnv protocol.
	"""

	from enum import Enum
	from typing import List

	from openenv.core.env_server.types import Action, Observation
	from pydantic import Field, field_validator


	class MazeDirection(str, Enum):
	"""Cardinal direction for a single Ice Maze step (all players move together)."""

	LEFT = "LEFT"
	RIGHT = "RIGHT"
	UP = "UP"
	DOWN = "DOWN"


	class MazeAction(Action):
	"""
	Action for the Ice Maze environment.

	The agent specifies a direction to slide all players simultaneously.
	On ice, players slide until they hit a wall (#) or another player.
	Exit cells (e) do NOT stop sliding — players slide through them.
	"""

	direction: MazeDirection = Field(
	...,
	description="Direction to move all players simultaneously: LEFT, RIGHT, UP, or DOWN.",
	)

	@field_validator("direction", mode="before")
	@classmethod
	def _coerce_direction(cls, v: object) -> object:
	if isinstance(v, MazeDirection):
	return v
	if isinstance(v, str):
	key = v.strip().upper()
	if key in MazeDirection.__members__:
	return MazeDirection[key]
	return v


	class MazeObservation(Observation):
	"""
	Observation from the Ice Maze environment.

	Primary agent-facing fields: current board, step budget, action history,
	and (on reset) the system prompt with rules and layout context.
	Additional fields support tooling and state introspection.

	Inherited from Observation base:
	done (bool) — True when all players are simultaneously on exit cells
	reward (float\|None) — Reward signal for this step
	metadata (dict) — Extra info such as action history (no oracle path)
	"""

	board: str = Field(
	default="",
	description=(
	"Current board rendered as spaced rows joined by newlines. "
	"Symbols: # wall, . ice, a player on non-exit, e unoccupied exit, "
	"b player currently on an exit."
	),
	)
	step_count: int = Field(
	default=0,
	description="Number of steps taken so far in this episode.",
	)
	max_steps: int = Field(
	default=0,
	description="Maximum steps allowed for this episode before a hard limit (set on reset).",
	)
	previous_actions: List[str] = Field(
	default_factory=list,
	description=(
	"Directions applied so far in order, each value one of "
	"LEFT, RIGHT, UP, DOWN (same vocabulary as MazeAction)."
	),
	)
	system_prompt: str = Field(
	default="",
	description=(
	"Instructions for the LLM: maze rules, valid actions, symbols, layout, "
	"step count vs max steps, and previous actions (oldest first). "
	"Refreshed on reset() and on each step()."
	),
	)
	agent_positions: List[List[int]] = Field(
	default_factory=list,
	description=(
	"Current interior coordinates of each player as [[row, col], ...]. "
	"Interior coords are 0-indexed from the top-left non-wall cell "
	"(i.e. board_row - 1, board_col - 1)."
	),
	)
	exit_positions: List[List[int]] = Field(
	default_factory=list,
	description=(
	"Interior coordinates of all exit cells as [[row, col], ...]. "
	"Exit cells are shared — any player can use any exit. "
	"Fixed for the duration of the episode."
	),
	)
	num_players: int = Field(
	default=1,
	description="Number of players in this level.",
	)
	level_index: int = Field(
	default=-1,
	description="Dataset level index for the current episode.",
	)
	message: str = Field(
	default="",
	description=(
	"Human-readable status message describing what just happened, "
	"e.g. 'Moved LEFT. Step 3.', 'Solved! All players reached an exit in 6 steps.', "
	"'Invalid direction. Use: LEFT, RIGHT, UP, DOWN'."
	),
	)

	def __str__(self) -> str:
	parts = []

	parts.append(f"done={self.done} \| reward={self.reward}")
	parts.append(f"step={self.step_count}/{self.max_steps}")
	parts.append(f"level_index={self.level_index}")
	parts.append(f"players={self.agent_positions} exits={self.exit_positions}")
	if self.board:
	parts.append(f"board:\n{self.board}")

	if self.previous_actions:
	parts.append(f"actions={self.previous_actions}")

	if self.message:
	parts.append(f"message={self.message}")

	# 👇 Full system prompt (clearly separated)
	if self.system_prompt:
	parts.append("\n=== SYSTEM PROMPT ===")
	parts.append(self.system_prompt)

	return "\n".join(parts)