File size: 6,229 Bytes
395aabe
 
 
 
 
 
 
 
 
 
 
c17891d
395aabe
 
 
c17891d
395aabe
 
 
 
c17891d
 
 
 
 
395aabe
 
 
 
 
 
 
 
c17891d
395aabe
 
 
 
c17891d
395aabe
 
 
 
c17891d
395aabe
 
 
 
c17891d
395aabe
 
 
c17891d
395aabe
 
 
 
 
 
 
 
 
 
 
 
c17891d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
395aabe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c17891d
395aabe
 
 
 
c17891d
395aabe
 
 
c17891d
395aabe
 
 
 
 
 
 
c17891d
 
 
 
 
395aabe
 
 
 
 
 
 
 
 
c17891d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
from openenv.core.env_server.types import Action, Observation, State
from pydantic import Field
from typing import List, Dict, Tuple


class AppAction(Action):
    """Action for the App environment"""

    placement: Dict[str, Tuple[int, int, int, bool]] = Field(
        default_factory=dict, description="Placement of the object in a 3D grid"
    )

    isSegmentation: bool = Field(
        default=True, description="Whether the model is segmenting the objects"
    )

    findObjects: Dict[str, Tuple[int, int, int, bool]] = Field(
        default_factory=dict, description="Dictionary of objects"
    )

    adjust: Tuple[str, str, int] = Field(
        default=("", "", 0),
        description="Adjustment action for moving or rotating objects. Format: (object_name, direction, amount)",
    )


class AppObservation(Observation):
    """Observation from the App environment"""

    currentGrid: List[List[List[int]]] = Field(
        default_factory=list,
        description="Current placement of the objects in a 3D grid",
    )

    positions: Dict[str, Tuple[int, int, int, bool]] = Field(
        default_factory=dict,
        description="Dictionary of objects with their positions in the environment",
    )

    objectsLeft: List[str] = Field(
        default_factory=list,
        description="List of unorganised objects left in the environment",
    )

    objectsFound: List[str] = Field(
        default_factory=list,
        description="List of objects found in the environment",
    )

    reward: float = Field(
        default=0.0, description="Reward received after taking the action"
    )

    isDone: bool = Field(default=False, description="Whether the episode has ended")

    rewardFeedback: list[str] = Field(
        default_factory=list,
        description="List of feedback strings describing the reward received after taking the action",
    )

    rewardList: list[float] = Field(
        default_factory=list,
        description="List of reward values received after taking the action",
    )

    numberPlaced: int = Field(
        default=0,
        description="Number of objects successfully placed in the environment",
    )

    ObjectsPlaced: Dict[str, Tuple[int, int, int, bool]] = Field(
        default_factory=dict,
        description="Objects that have been successfully placed in the environment",
    )

    rewardListSegment: list[float] = Field(
        default_factory=list,
        description="List of reward values received after taking the action",
    )

    rewardFeedbackSegment: list[str] = Field(
        default_factory=list,
        description="List of feedback strings describing the reward received after taking the action",
    )

    rewardListPlace: list[float] = Field(
        default_factory=list,
        description="List of feedback strings describing the reward received after taking the action",
    )

    rewardFeedbackPlace: list[str] = Field(
        default_factory=list,
        description="List of feedback strings describing the reward received after taking the action",
    )

    rewardListAdjust: list[float] = Field(
        default_factory=list,
        description="List of feedback strings describing the reward received after taking the action",
    )

    rewardFeedbackAdjust: list[str] = Field(
        default_factory=list,
        description="List of feedback strings describing the reward received after taking the action",
    )


class AppState(State):
    """State for the App environment"""

    currentGrid: List[List[List[int]]] = Field(
        default_factory=list,
        description="Initial state of the environment with unorganised objects",
    )

    weightedGrid: List[List[List[float]]] = Field(
        default_factory=list,
        description="Weighted grid used when scoring placements",
    )

    objectsLeft: List[str] = Field(
        default_factory=list,
        description="List of unorganised objects left in the environment",
    )

    objectsFound: List[str] = Field(
        default_factory=list,
        description="List of objects found in the environment",
    )

    reward: float = Field(
        default=0.0, description="Reward received after taking the action"
    )

    isDone: bool = Field(default=False, description="Whether the episode has ended")

    ObjectsPresent: Dict[str, Tuple[int, int, int, bool]] = Field(
        default_factory=dict,
        description="Placed objects and their current positions in the environment",
    )

    ObjectsPlaced: Dict[str, Tuple[int, int, int, bool]] = Field(
        default_factory=dict,
        description="Objects that have been successfully placed in the environment",
    )

    rewardFeedback: list[str] = Field(
        default_factory=list,
        description="List of feedback strings describing the reward received after taking the action",
    )

    rewardList: list[float] = Field(
        default_factory=list,
        description="List of reward values received after taking the action",
    )

    numberPlaced: int = Field(
        default=0,
        description="Number of objects successfully placed in the environment",
    )

    rewardListSegment: list[float] = Field(
        default_factory=list,
        description="List of reward values received after taking the action",
    )

    rewardFeedbackSegment: list[str] = Field(
        default_factory=list,
        description="List of feedback strings describing the reward received after taking the action",
    )

    rewardListPlace: list[float] = Field(
        default_factory=list,
        description="List of feedback strings describing the reward received after taking the action",
    )

    rewardFeedbackPlace: list[str] = Field(
        default_factory=list,
        description="List of feedback strings describing the reward received after taking the action",
    )

    rewardListAdjust: list[float] = Field(
        default_factory=list,
        description="List of feedback strings describing the reward received after taking the action",
    )

    rewardFeedbackAdjust: list[str] = Field(
        default_factory=list,
        description="List of feedback strings describing the reward received after taking the action",
    )