Spaces:
Sleeping
Sleeping
arjeet commited on
Commit ·
e9c9d34
1
Parent(s): e26941e
inference update v5
Browse files
server/cust_env_environment.py
CHANGED
|
@@ -62,7 +62,8 @@ class DocSweeperEnvironment(Environment):
|
|
| 62 |
active_file=""
|
| 63 |
)
|
| 64 |
|
| 65 |
-
|
|
|
|
| 66 |
|
| 67 |
def step(self, action: DocAction):
|
| 68 |
if self._state is None:
|
|
@@ -109,10 +110,19 @@ class DocSweeperEnvironment(Environment):
|
|
| 109 |
|
| 110 |
new_score = self._calculate_state_score()
|
| 111 |
|
|
|
|
| 112 |
delta_reward = (new_score - old_score)
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
|
| 115 |
-
return self._make_observation(reward=
|
| 116 |
|
| 117 |
def _handle_edit(self, action: DocAction) -> float:
|
| 118 |
"""Executes the edit and returns a penalty if it fails."""
|
|
|
|
| 62 |
active_file=""
|
| 63 |
)
|
| 64 |
|
| 65 |
+
# Baseline reward is exactly 0.5 (neutral)
|
| 66 |
+
return self._make_observation(reward=0.5, done=False)
|
| 67 |
|
| 68 |
def step(self, action: DocAction):
|
| 69 |
if self._state is None:
|
|
|
|
| 110 |
|
| 111 |
new_score = self._calculate_state_score()
|
| 112 |
|
| 113 |
+
# Calculate raw delta reward
|
| 114 |
delta_reward = (new_score - old_score)
|
| 115 |
+
raw_step_reward = delta_reward + step_penalty
|
| 116 |
+
|
| 117 |
+
# Map reward to be strictly within (0.0, 1.0)
|
| 118 |
+
# raw_step_reward ranges roughly from -1.0 to 1.0. We map it so 0.0 raw = 0.5 mapped.
|
| 119 |
+
mapped_reward = (raw_step_reward + 1.0) / 2.0
|
| 120 |
+
|
| 121 |
+
# Clamp strictly to (0.0, 1.0) boundaries using a 0.01 epsilon
|
| 122 |
+
EPSILON = 0.01
|
| 123 |
+
final_reward = max(EPSILON, min(1.0 - EPSILON, mapped_reward))
|
| 124 |
|
| 125 |
+
return self._make_observation(reward=final_reward, done=done)
|
| 126 |
|
| 127 |
def _handle_edit(self, action: DocAction) -> float:
|
| 128 |
"""Executes the edit and returns a penalty if it fails."""
|