Spaces:
Sleeping
Sleeping
Commit ·
9fecec8
1
Parent(s): 02c65a9
Fix: clamp all scores/rewards strictly to (0,1) exclusive range
Browse files- app.py +3 -1
- inference.py +4 -4
- server/app.py +4 -2
app.py
CHANGED
|
@@ -65,6 +65,8 @@ def state():
|
|
| 65 |
def step(request: ActionRequest):
|
| 66 |
action = request.action
|
| 67 |
observation, reward, done, info = env.step(action)
|
|
|
|
|
|
|
| 68 |
return {
|
| 69 |
"observation": {
|
| 70 |
"step": observation.step,
|
|
@@ -75,7 +77,7 @@ def step(request: ActionRequest):
|
|
| 75 |
"fix_applied": observation.fix_applied,
|
| 76 |
"is_resolved": observation.is_resolved,
|
| 77 |
},
|
| 78 |
-
"reward":
|
| 79 |
"done": done,
|
| 80 |
"info": info,
|
| 81 |
}
|
|
|
|
| 65 |
def step(request: ActionRequest):
|
| 66 |
action = request.action
|
| 67 |
observation, reward, done, info = env.step(action)
|
| 68 |
+
# Clamp reward to strictly (0, 1) for OpenEnv compliance
|
| 69 |
+
clamped_reward = min(max(reward / 20.5, 0.001), 0.999)
|
| 70 |
return {
|
| 71 |
"observation": {
|
| 72 |
"step": observation.step,
|
|
|
|
| 77 |
"fix_applied": observation.fix_applied,
|
| 78 |
"is_resolved": observation.is_resolved,
|
| 79 |
},
|
| 80 |
+
"reward": clamped_reward,
|
| 81 |
"done": done,
|
| 82 |
"info": info,
|
| 83 |
}
|
inference.py
CHANGED
|
@@ -117,7 +117,7 @@ def run_task(client: OpenAI, task_id: str) -> None:
|
|
| 117 |
history: List[str] = []
|
| 118 |
rewards: List[float] = []
|
| 119 |
steps_taken = 0
|
| 120 |
-
score = 0.
|
| 121 |
success = False
|
| 122 |
|
| 123 |
log_start(task=task_id, env=BENCHMARK, model=MODEL_NAME)
|
|
@@ -149,9 +149,9 @@ def run_task(client: OpenAI, task_id: str) -> None:
|
|
| 149 |
success = info.get("resolution") == "success"
|
| 150 |
break
|
| 151 |
|
| 152 |
-
# Compute score from actual rewards, clamped to
|
| 153 |
-
score = sum(rewards) / MAX_TOTAL_REWARD if MAX_TOTAL_REWARD > 0 else 0.
|
| 154 |
-
score = min(max(score, 0.
|
| 155 |
success = score >= SUCCESS_SCORE_THRESHOLD
|
| 156 |
|
| 157 |
except Exception as e:
|
|
|
|
| 117 |
history: List[str] = []
|
| 118 |
rewards: List[float] = []
|
| 119 |
steps_taken = 0
|
| 120 |
+
score = 0.001
|
| 121 |
success = False
|
| 122 |
|
| 123 |
log_start(task=task_id, env=BENCHMARK, model=MODEL_NAME)
|
|
|
|
| 149 |
success = info.get("resolution") == "success"
|
| 150 |
break
|
| 151 |
|
| 152 |
+
# Compute score from actual rewards, clamped strictly to (0, 1)
|
| 153 |
+
score = sum(rewards) / MAX_TOTAL_REWARD if MAX_TOTAL_REWARD > 0 else 0.001
|
| 154 |
+
score = min(max(score, 0.001), 0.999)
|
| 155 |
success = score >= SUCCESS_SCORE_THRESHOLD
|
| 156 |
|
| 157 |
except Exception as e:
|
server/app.py
CHANGED
|
@@ -65,6 +65,8 @@ def state():
|
|
| 65 |
def step(request: ActionRequest):
|
| 66 |
action = request.action
|
| 67 |
observation, reward, done, info = env.step(action)
|
|
|
|
|
|
|
| 68 |
return {
|
| 69 |
"observation": {
|
| 70 |
"step": observation.step,
|
|
@@ -75,7 +77,7 @@ def step(request: ActionRequest):
|
|
| 75 |
"fix_applied": observation.fix_applied,
|
| 76 |
"is_resolved": observation.is_resolved,
|
| 77 |
},
|
| 78 |
-
"reward":
|
| 79 |
"done": done,
|
| 80 |
"info": info,
|
| 81 |
}
|
|
@@ -121,4 +123,4 @@ def main():
|
|
| 121 |
|
| 122 |
|
| 123 |
if __name__ == "__main__":
|
| 124 |
-
main()
|
|
|
|
| 65 |
def step(request: ActionRequest):
|
| 66 |
action = request.action
|
| 67 |
observation, reward, done, info = env.step(action)
|
| 68 |
+
# Clamp reward to strictly (0, 1) for OpenEnv compliance
|
| 69 |
+
clamped_reward = min(max(reward / 20.5, 0.001), 0.999)
|
| 70 |
return {
|
| 71 |
"observation": {
|
| 72 |
"step": observation.step,
|
|
|
|
| 77 |
"fix_applied": observation.fix_applied,
|
| 78 |
"is_resolved": observation.is_resolved,
|
| 79 |
},
|
| 80 |
+
"reward": clamped_reward,
|
| 81 |
"done": done,
|
| 82 |
"info": info,
|
| 83 |
}
|
|
|
|
| 123 |
|
| 124 |
|
| 125 |
if __name__ == "__main__":
|
| 126 |
+
main()
|