from openenv.core.env_server.types import Action, Observation, State from pydantic import Field from typing import List, Dict, Tuple class AppAction(Action): """Action for the App environment""" placement: Dict[str, Tuple[int, int, int, bool]] = Field( default_factory=dict, description="Placement of the object in a 3D grid" ) isSegmentation: bool = Field( default=True, description="Whether the model is segmenting the objects" ) findObjects: Dict[str, Tuple[int, int, int, bool]] = Field( default_factory=dict, description="Dictionary of objects" ) adjust: Tuple[str, str, int] = Field( default=("", "", 0), description="Adjustment action for moving or rotating objects. Format: (object_name, direction, amount)", ) class AppObservation(Observation): """Observation from the App environment""" currentGrid: List[List[List[int]]] = Field( default_factory=list, description="Current placement of the objects in a 3D grid", ) positions: Dict[str, Tuple[int, int, int, bool]] = Field( default_factory=dict, description="Dictionary of objects with their positions in the environment", ) objectsLeft: List[str] = Field( default_factory=list, description="List of unorganised objects left in the environment", ) objectsFound: List[str] = Field( default_factory=list, description="List of objects found in the environment", ) reward: float = Field( default=0.0, description="Reward received after taking the action" ) isDone: bool = Field(default=False, description="Whether the episode has ended") rewardFeedback: list[str] = Field( default_factory=list, description="List of feedback strings describing the reward received after taking the action", ) rewardList: list[float] = Field( default_factory=list, description="List of reward values received after taking the action", ) numberPlaced: int = Field( default=0, description="Number of objects successfully placed in the environment", ) ObjectsPlaced: Dict[str, Tuple[int, int, int, bool]] = Field( default_factory=dict, description="Objects that have been successfully placed in the environment", ) rewardListSegment: list[float] = Field( default_factory=list, description="List of reward values received after taking the action", ) rewardFeedbackSegment: list[str] = Field( default_factory=list, description="List of feedback strings describing the reward received after taking the action", ) rewardListPlace: list[float] = Field( default_factory=list, description="List of feedback strings describing the reward received after taking the action", ) rewardFeedbackPlace: list[str] = Field( default_factory=list, description="List of feedback strings describing the reward received after taking the action", ) rewardListAdjust: list[float] = Field( default_factory=list, description="List of feedback strings describing the reward received after taking the action", ) rewardFeedbackAdjust: list[str] = Field( default_factory=list, description="List of feedback strings describing the reward received after taking the action", ) class AppState(State): """State for the App environment""" currentGrid: List[List[List[int]]] = Field( default_factory=list, description="Initial state of the environment with unorganised objects", ) weightedGrid: List[List[List[float]]] = Field( default_factory=list, description="Weighted grid used when scoring placements", ) objectsLeft: List[str] = Field( default_factory=list, description="List of unorganised objects left in the environment", ) objectsFound: List[str] = Field( default_factory=list, description="List of objects found in the environment", ) reward: float = Field( default=0.0, description="Reward received after taking the action" ) isDone: bool = Field(default=False, description="Whether the episode has ended") ObjectsPresent: Dict[str, Tuple[int, int, int, bool]] = Field( default_factory=dict, description="Placed objects and their current positions in the environment", ) ObjectsPlaced: Dict[str, Tuple[int, int, int, bool]] = Field( default_factory=dict, description="Objects that have been successfully placed in the environment", ) rewardFeedback: list[str] = Field( default_factory=list, description="List of feedback strings describing the reward received after taking the action", ) rewardList: list[float] = Field( default_factory=list, description="List of reward values received after taking the action", ) numberPlaced: int = Field( default=0, description="Number of objects successfully placed in the environment", ) rewardListSegment: list[float] = Field( default_factory=list, description="List of reward values received after taking the action", ) rewardFeedbackSegment: list[str] = Field( default_factory=list, description="List of feedback strings describing the reward received after taking the action", ) rewardListPlace: list[float] = Field( default_factory=list, description="List of feedback strings describing the reward received after taking the action", ) rewardFeedbackPlace: list[str] = Field( default_factory=list, description="List of feedback strings describing the reward received after taking the action", ) rewardListAdjust: list[float] = Field( default_factory=list, description="List of feedback strings describing the reward received after taking the action", ) rewardFeedbackAdjust: list[str] = Field( default_factory=list, description="List of feedback strings describing the reward received after taking the action", )