Jibrann commited on
Commit
2d55563
·
verified ·
1 Parent(s): 77768cf

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. client.py +4 -0
  2. inference.py +1 -1
  3. server/app_environment.py +46 -34
  4. utils.py +38 -14
client.py CHANGED
@@ -28,6 +28,8 @@ class AppEnv(EnvClient[AppAction, AppObservation, AppState]):
28
  objectsFound=obs_data.get("objectsFound", []),
29
  reward=obs_data.get("reward", 0.0),
30
  isDone=obs_data.get("isDone", False),
 
 
31
  )
32
 
33
  return StepResult(
@@ -48,4 +50,6 @@ class AppEnv(EnvClient[AppAction, AppObservation, AppState]):
48
  objectsLeft=payload.get("objectsLeft", []),
49
  objectsFound=payload.get("objectsFound", []),
50
  ObjectsPresent=payload.get("ObjectsPresent", {}),
 
 
51
  )
 
28
  objectsFound=obs_data.get("objectsFound", []),
29
  reward=obs_data.get("reward", 0.0),
30
  isDone=obs_data.get("isDone", False),
31
+ rewardFeedback=obs_data.get("rewardFeedback", []),
32
+ rewardList=obs_data.get("rewardList", []),
33
  )
34
 
35
  return StepResult(
 
50
  objectsLeft=payload.get("objectsLeft", []),
51
  objectsFound=payload.get("objectsFound", []),
52
  ObjectsPresent=payload.get("ObjectsPresent", {}),
53
+ rewardFeedback=payload.get("rewardFeedback", []),
54
+ rewardList=payload.get("rewardList", []),
55
  )
inference.py CHANGED
@@ -162,7 +162,7 @@ def main() -> None:
162
  if observation.isDone:
163
  break
164
 
165
- time.sleep(10000)
166
 
167
  print(HISTORY)
168
 
 
162
  if observation.isDone:
163
  break
164
 
165
+ time.sleep(100)
166
 
167
  print(HISTORY)
168
 
server/app_environment.py CHANGED
@@ -21,15 +21,27 @@ class AppEnvironment(Environment):
21
  self._state = self._new_state()
22
  self._reset_count = 0
23
 
 
 
 
 
 
 
 
 
 
 
 
24
  def _new_state(self) -> AppState:
25
  grid, placed = initGrid()
 
26
 
27
  return AppState(
28
  episode_id=str(uuid4()),
29
  step_count=0,
30
  currentGrid=grid,
31
- weightedGrid=initWeightedGrid(),
32
- objectsLeft=list(OBJECTS.keys()),
33
  objectsFound=[],
34
  reward=0.0,
35
  isDone=False,
@@ -53,63 +65,63 @@ class AppEnvironment(Environment):
53
  )
54
 
55
  def step(self, action: AppAction) -> AppObservation:
56
- if not isinstance(self._state, AppState):
57
- self._state = self._new_state()
58
 
59
- self._state.step_count += 1
 
60
 
 
61
  reward = 0.0
62
 
63
  if action is None:
64
  reward -= 10.0
65
  appendRewardFeedback(
66
- self._state,
67
  "No action is of invalid schema or format. Penalty applied.",
68
  reward,
69
  )
70
  return AppObservation(
71
- currentGrid=self._state.currentGrid,
72
- positions=self._state.ObjectsPresent,
73
- objectsLeft=self._state.objectsLeft,
74
- objectsFound=self._state.objectsFound,
75
- reward=self._state.reward,
76
- isDone=self._state.isDone,
77
- rewardFeedback=self._state.rewardFeedback,
78
- rewardList=self._state.rewardList,
79
  )
80
 
81
  if action.isSegmentation and action is not None:
82
  reward += 10.0
83
- appendRewardFeedback(self._state, "Segmentation successful.", reward)
84
 
85
  if action.placement and action is not None:
86
- reward += place(action.isSegmentation, action.placement, self._state)
87
- appendRewardFeedback(self._state, "Object placed successfully.", reward)
88
 
89
  if action.findObjects and action is not None:
90
- reward += findobject(action.isSegmentation, action.findObjects, self._state)
91
- appendRewardFeedback(self._state, "Object found successfully.", reward)
92
 
93
- if len(self._state.objectsLeft) == 0:
94
- self._state.isDone = True
95
  reward += 10.0
96
- appendRewardFeedback(
97
- self._state, "All objects found. Episode completed!", reward
98
- )
99
 
100
- self._state.reward += reward / (10**self._state.step_count)
101
 
102
  return AppObservation(
103
- currentGrid=self._state.currentGrid,
104
- positions=self._state.ObjectsPresent,
105
- objectsLeft=self._state.objectsLeft,
106
- objectsFound=self._state.objectsFound,
107
- reward=self._state.reward,
108
- isDone=self._state.isDone,
109
- rewardFeedback=self._state.rewardFeedback,
110
- rewardList=self._state.rewardList,
111
  )
112
 
113
  @property
114
  def state(self) -> dict:
115
- return self._state.model_dump()
 
 
21
  self._state = self._new_state()
22
  self._reset_count = 0
23
 
24
+ def _coerce_state(self) -> AppState:
25
+ if isinstance(self._state, AppState):
26
+ return self._state
27
+
28
+ if isinstance(self._state, dict):
29
+ self._state = AppState(**self._state)
30
+ return self._state
31
+
32
+ self._state = self._new_state()
33
+ return self._state
34
+
35
  def _new_state(self) -> AppState:
36
  grid, placed = initGrid()
37
+ grid_shape = (len(grid), len(grid[0]), len(grid[0][0]))
38
 
39
  return AppState(
40
  episode_id=str(uuid4()),
41
  step_count=0,
42
  currentGrid=grid,
43
+ weightedGrid=initWeightedGrid(grid_shape),
44
+ objectsLeft=list(placed.keys()),
45
  objectsFound=[],
46
  reward=0.0,
47
  isDone=False,
 
65
  )
66
 
67
  def step(self, action: AppAction) -> AppObservation:
68
+ state = self._coerce_state()
 
69
 
70
+ if isinstance(action, dict):
71
+ action = AppAction(**action)
72
 
73
+ state.step_count += 1
74
  reward = 0.0
75
 
76
  if action is None:
77
  reward -= 10.0
78
  appendRewardFeedback(
79
+ state,
80
  "No action is of invalid schema or format. Penalty applied.",
81
  reward,
82
  )
83
  return AppObservation(
84
+ currentGrid=state.currentGrid,
85
+ positions=state.ObjectsPresent,
86
+ objectsLeft=state.objectsLeft,
87
+ objectsFound=state.objectsFound,
88
+ reward=state.reward,
89
+ isDone=state.isDone,
90
+ rewardFeedback=state.rewardFeedback,
91
+ rewardList=state.rewardList,
92
  )
93
 
94
  if action.isSegmentation and action is not None:
95
  reward += 10.0
96
+ appendRewardFeedback(state, "Segmentation successful.", reward)
97
 
98
  if action.placement and action is not None:
99
+ reward += place(action.isSegmentation, action.placement, state)
100
+ appendRewardFeedback(state, "Object placed successfully.", reward)
101
 
102
  if action.findObjects and action is not None:
103
+ reward += findobject(action.isSegmentation, action.findObjects, state)
104
+ appendRewardFeedback(state, "Object found successfully.", reward)
105
 
106
+ if len(state.objectsLeft) == 0:
107
+ state.isDone = True
108
  reward += 10.0
109
+ appendRewardFeedback(state, "All objects found. Episode completed!", reward)
 
 
110
 
111
+ state.reward += reward / (10**state.step_count)
112
 
113
  return AppObservation(
114
+ currentGrid=state.currentGrid,
115
+ positions=state.ObjectsPresent,
116
+ objectsLeft=state.objectsLeft,
117
+ objectsFound=state.objectsFound,
118
+ reward=state.reward,
119
+ isDone=state.isDone,
120
+ rewardFeedback=state.rewardFeedback,
121
+ rewardList=state.rewardList,
122
  )
123
 
124
  @property
125
  def state(self) -> dict:
126
+ state = self._coerce_state()
127
+ return state.model_dump()
utils.py CHANGED
@@ -122,8 +122,11 @@ def initGrid():
122
  return (grid, placed)
123
 
124
 
125
- def initWeightedGrid():
126
- grid = random.uniform(0, 1, (randint(5, 11), randint(5, 11), randint(5, 11)))
 
 
 
127
 
128
  x_mid = grid.shape[0] // 2
129
  x_span = grid.shape[0] // 4
@@ -134,6 +137,23 @@ def initWeightedGrid():
134
  return grid
135
 
136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  def place(segment, objects, state):
138
  dims = state.currentGrid
139
  weight = state.weightedGrid
@@ -189,17 +209,22 @@ def place(segment, objects, state):
189
  elif (
190
  dims[pos[0] + i][pos[1] + j][pos[2] + k] > 0 and pos[3] == True
191
  ):
192
- if pos[2] + k + 1 <= len(objGrid[0][0]):
193
  dims[pos[0] + i][pos[1] + j][pos[2] + k + 1] += 1
194
- reward += (
195
- weight[pos[0] + i][pos[1] + j][pos[2] + k + 1]
 
 
 
 
 
196
  * reward_per_obj_placed
197
  )
 
198
  appendRewardFeedback(
199
  state,
200
- f"Object '{obj_name}' placed with stacking. Bonus: {weight[pos[0] + i][pos[1] + j][pos[2] + k + 1] * reward_per_obj_placed:.2f}",
201
- weight[pos[0] + i][pos[1] + j][pos[2] + k + 1]
202
- * reward_per_obj_placed,
203
  )
204
  else:
205
  reward -= reward_per_obj_placed
@@ -214,15 +239,14 @@ def place(segment, objects, state):
214
 
215
  else:
216
  dims[pos[0] + i][pos[1] + j][pos[2] + k] = 1
217
- reward += (
218
- reward_per_obj_placed
219
- * weight[pos[0] + i][pos[1] + j][pos[2] + k]
220
  )
 
221
  appendRewardFeedback(
222
  state,
223
- f"Object '{obj_name}' placed successfully. Bonus: {weight[pos[0] + i][pos[1] + j][pos[2] + k] * reward_per_obj_placed:.2f}",
224
- weight[pos[0] + i][pos[1] + j][pos[2] + k]
225
- * reward_per_obj_placed,
226
  )
227
  if placement_failed:
228
  break
 
122
  return (grid, placed)
123
 
124
 
125
+ def initWeightedGrid(shape=None):
126
+ if shape is None:
127
+ shape = (randint(5, 11), randint(5, 11), randint(5, 11))
128
+
129
+ grid = random.uniform(0, 1, shape)
130
 
131
  x_mid = grid.shape[0] // 2
132
  x_span = grid.shape[0] // 4
 
137
  return grid
138
 
139
 
140
+ def _get_weight_value(weight, x, y, z):
141
+ if not weight or not weight[0] or not weight[0][0]:
142
+ return 0.0
143
+
144
+ if (
145
+ x < 0
146
+ or y < 0
147
+ or z < 0
148
+ or x >= len(weight)
149
+ or y >= len(weight[0])
150
+ or z >= len(weight[0][0])
151
+ ):
152
+ return 0.0
153
+
154
+ return weight[x][y][z]
155
+
156
+
157
  def place(segment, objects, state):
158
  dims = state.currentGrid
159
  weight = state.weightedGrid
 
209
  elif (
210
  dims[pos[0] + i][pos[1] + j][pos[2] + k] > 0 and pos[3] == True
211
  ):
212
+ if pos[2] + k + 1 < len(dims[0][0]):
213
  dims[pos[0] + i][pos[1] + j][pos[2] + k + 1] += 1
214
+ bonus = (
215
+ _get_weight_value(
216
+ weight,
217
+ pos[0] + i,
218
+ pos[1] + j,
219
+ pos[2] + k + 1,
220
+ )
221
  * reward_per_obj_placed
222
  )
223
+ reward += bonus
224
  appendRewardFeedback(
225
  state,
226
+ f"Object '{obj_name}' placed with stacking. Bonus: {bonus:.2f}",
227
+ bonus,
 
228
  )
229
  else:
230
  reward -= reward_per_obj_placed
 
239
 
240
  else:
241
  dims[pos[0] + i][pos[1] + j][pos[2] + k] = 1
242
+ bonus = reward_per_obj_placed * _get_weight_value(
243
+ weight, pos[0] + i, pos[1] + j, pos[2] + k
 
244
  )
245
+ reward += bonus
246
  appendRewardFeedback(
247
  state,
248
+ f"Object '{obj_name}' placed successfully. Bonus: {bonus:.2f}",
249
+ bonus,
 
250
  )
251
  if placement_failed:
252
  break