from openenv.core.env_server.types import Action, Observation, State
from pydantic import Field
from typing import List, Dict, Tuple


class AppAction(Action):
    """Action for the App environment"""

    placement: Dict[str, Tuple[int, int, int, bool]] = Field(
        default_factory=dict, description="Placement of the object in a 3D grid"
    )

    isSegmentation: bool = Field(
        default=True, description="Whether the model is segmenting the objects"
    )

    findObjects: Dict[str, Tuple[int, int, int, bool]] = Field(
        default_factory=dict, description="Dictionary of objects"
    )

    adjust: Tuple[str, str, int] = Field(
        default=("", "", 0),
        description="Adjustment action for moving or rotating objects. Format: (object_name, direction, amount)",
    )


class AppObservation(Observation):
    """Observation from the App environment"""

    currentGrid: List[List[List[int]]] = Field(
        default_factory=list,
        description="Current placement of the objects in a 3D grid",
    )

    positions: Dict[str, Tuple[int, int, int, bool]] = Field(
        default_factory=dict,
        description="Dictionary of objects with their positions in the environment",
    )

    objectsLeft: List[str] = Field(
        default_factory=list,
        description="List of unorganised objects left in the environment",
    )

    objectsFound: List[str] = Field(
        default_factory=list,
        description="List of objects found in the environment",
    )

    reward: float = Field(
        default=0.0, description="Reward received after taking the action"
    )

    isDone: bool = Field(default=False, description="Whether the episode has ended")

    rewardFeedback: list[str] = Field(
        default_factory=list,
        description="List of feedback strings describing the reward received after taking the action",
    )

    rewardList: list[float] = Field(
        default_factory=list,
        description="List of reward values received after taking the action",
    )

    numberPlaced: int = Field(
        default=0,
        description="Number of objects successfully placed in the environment",
    )

    ObjectsPlaced: Dict[str, Tuple[int, int, int, bool]] = Field(
        default_factory=dict,
        description="Objects that have been successfully placed in the environment",
    )

    rewardListSegment: list[float] = Field(
        default_factory=list,
        description="List of reward values received after taking the action",
    )

    rewardFeedbackSegment: list[str] = Field(
        default_factory=list,
        description="List of feedback strings describing the reward received after taking the action",
    )

    rewardListPlace: list[float] = Field(
        default_factory=list,
        description="List of feedback strings describing the reward received after taking the action",
    )

    rewardFeedbackPlace: list[str] = Field(
        default_factory=list,
        description="List of feedback strings describing the reward received after taking the action",
    )

    rewardListAdjust: list[float] = Field(
        default_factory=list,
        description="List of feedback strings describing the reward received after taking the action",
    )

    rewardFeedbackAdjust: list[str] = Field(
        default_factory=list,
        description="List of feedback strings describing the reward received after taking the action",
    )


class AppState(State):
    """State for the App environment"""

    currentGrid: List[List[List[int]]] = Field(
        default_factory=list,
        description="Initial state of the environment with unorganised objects",
    )

    weightedGrid: List[List[List[float]]] = Field(
        default_factory=list,
        description="Weighted grid used when scoring placements",
    )

    objectsLeft: List[str] = Field(
        default_factory=list,
        description="List of unorganised objects left in the environment",
    )

    objectsFound: List[str] = Field(
        default_factory=list,
        description="List of objects found in the environment",
    )

    reward: float = Field(
        default=0.0, description="Reward received after taking the action"
    )

    isDone: bool = Field(default=False, description="Whether the episode has ended")

    ObjectsPresent: Dict[str, Tuple[int, int, int, bool]] = Field(
        default_factory=dict,
        description="Placed objects and their current positions in the environment",
    )

    ObjectsPlaced: Dict[str, Tuple[int, int, int, bool]] = Field(
        default_factory=dict,
        description="Objects that have been successfully placed in the environment",
    )

    rewardFeedback: list[str] = Field(
        default_factory=list,
        description="List of feedback strings describing the reward received after taking the action",
    )

    rewardList: list[float] = Field(
        default_factory=list,
        description="List of reward values received after taking the action",
    )

    numberPlaced: int = Field(
        default=0,
        description="Number of objects successfully placed in the environment",
    )

    rewardListSegment: list[float] = Field(
        default_factory=list,
        description="List of reward values received after taking the action",
    )

    rewardFeedbackSegment: list[str] = Field(
        default_factory=list,
        description="List of feedback strings describing the reward received after taking the action",
    )

    rewardListPlace: list[float] = Field(
        default_factory=list,
        description="List of feedback strings describing the reward received after taking the action",
    )

    rewardFeedbackPlace: list[str] = Field(
        default_factory=list,
        description="List of feedback strings describing the reward received after taking the action",
    )

    rewardListAdjust: list[float] = Field(
        default_factory=list,
        description="List of feedback strings describing the reward received after taking the action",
    )

    rewardFeedbackAdjust: list[str] = Field(
        default_factory=list,
        description="List of feedback strings describing the reward received after taking the action",
    )