Spaces:
Running
Running
Commit ·
a7386cd
1
Parent(s): 58758e7
Fix reward serialization - add reward field to ClaimsObservation
Browse filesThe reward was being calculated but not returned to the client because
ClaimsObservation was missing the reward field. OpenEnv's serialization
looks for observation.reward but it didn't exist.
Changes:
- Add reward field to ClaimsObservation in models.py
- Set observation.reward after step() execution
- Set reward=0.0 on initial reset observation
Tested locally: fraud case +17.40, normal case +13.20
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
- models.py +3 -0
- server/claims_environment.py +4 -0
models.py
CHANGED
|
@@ -69,6 +69,9 @@ class ClaimsObservation(Observation):
|
|
| 69 |
is_terminal: bool = Field(default=False, description="Whether episode is done")
|
| 70 |
terminal_reason: str = Field(default="", description="Why episode ended")
|
| 71 |
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
class ClaimsState(State):
|
| 74 |
"""
|
|
|
|
| 69 |
is_terminal: bool = Field(default=False, description="Whether episode is done")
|
| 70 |
terminal_reason: str = Field(default="", description="Why episode ended")
|
| 71 |
|
| 72 |
+
# Reward (required for OpenEnv serialization)
|
| 73 |
+
reward: float = Field(default=0.0, description="Reward from this step")
|
| 74 |
+
|
| 75 |
|
| 76 |
class ClaimsState(State):
|
| 77 |
"""
|
server/claims_environment.py
CHANGED
|
@@ -164,6 +164,7 @@ class ClaimsEnvironment(Environment):
|
|
| 164 |
time_elapsed_minutes=0,
|
| 165 |
queries_made=0,
|
| 166 |
is_terminal=False,
|
|
|
|
| 167 |
)
|
| 168 |
|
| 169 |
def step(self, action: ClaimsAction) -> ClaimsObservation:
|
|
@@ -196,6 +197,9 @@ class ClaimsEnvironment(Environment):
|
|
| 196 |
self._last_reward = reward
|
| 197 |
self._state.total_reward += reward
|
| 198 |
|
|
|
|
|
|
|
|
|
|
| 199 |
return observation
|
| 200 |
|
| 201 |
def _execute_action(self, action: ClaimsAction) -> Tuple[ClaimsObservation, float]:
|
|
|
|
| 164 |
time_elapsed_minutes=0,
|
| 165 |
queries_made=0,
|
| 166 |
is_terminal=False,
|
| 167 |
+
reward=0.0, # Initial observation has no reward
|
| 168 |
)
|
| 169 |
|
| 170 |
def step(self, action: ClaimsAction) -> ClaimsObservation:
|
|
|
|
| 197 |
self._last_reward = reward
|
| 198 |
self._state.total_reward += reward
|
| 199 |
|
| 200 |
+
# Set reward on observation for client serialization
|
| 201 |
+
observation.reward = reward
|
| 202 |
+
|
| 203 |
return observation
|
| 204 |
|
| 205 |
def _execute_action(self, action: ClaimsAction) -> Tuple[ClaimsObservation, float]:
|