Spaces:
Sleeping
Sleeping
Enforce exclusive score bounds in grader and inference
Browse filesCo-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
- inference.py +5 -1
- server/releaseops_environment.py +5 -0
inference.py
CHANGED
|
@@ -297,12 +297,16 @@ def run_task(llm: OpenAI, task_id: str) -> dict:
|
|
| 297 |
break
|
| 298 |
|
| 299 |
score = obs_dict.get("final_score") or 0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
success = score >= 0.5
|
| 301 |
|
| 302 |
except Exception as e:
|
| 303 |
print(f"[DEBUG] Task {task_id} failed with error: {e}", flush=True)
|
| 304 |
success = False
|
| 305 |
-
score = 0.
|
| 306 |
finally:
|
| 307 |
log_end(success, step, score, rewards)
|
| 308 |
|
|
|
|
| 297 |
break
|
| 298 |
|
| 299 |
score = obs_dict.get("final_score") or 0.0
|
| 300 |
+
if score <= 0.0:
|
| 301 |
+
score = 0.001
|
| 302 |
+
elif score >= 1.0:
|
| 303 |
+
score = 0.999
|
| 304 |
success = score >= 0.5
|
| 305 |
|
| 306 |
except Exception as e:
|
| 307 |
print(f"[DEBUG] Task {task_id} failed with error: {e}", flush=True)
|
| 308 |
success = False
|
| 309 |
+
score = 0.001
|
| 310 |
finally:
|
| 311 |
log_end(success, step, score, rewards)
|
| 312 |
|
server/releaseops_environment.py
CHANGED
|
@@ -881,6 +881,11 @@ class ReleaseOpsEnvironment(Environment):
|
|
| 881 |
+ 0.10 * efficiency
|
| 882 |
)
|
| 883 |
score = max(0.0, min(1.0, raw_score - forbidden_penalty))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 884 |
|
| 885 |
return {
|
| 886 |
"score": round(score, 3),
|
|
|
|
| 881 |
+ 0.10 * efficiency
|
| 882 |
)
|
| 883 |
score = max(0.0, min(1.0, raw_score - forbidden_penalty))
|
| 884 |
+
# Hackathon validator requires strict bounds: 0 < score < 1
|
| 885 |
+
if score <= 0.0:
|
| 886 |
+
score = 0.001
|
| 887 |
+
elif score >= 1.0:
|
| 888 |
+
score = 0.999
|
| 889 |
|
| 890 |
return {
|
| 891 |
"score": round(score, 3),
|