fix: introduce SCORE_EPSILON and clamp scores in run_episode and main functions
Browse files- inference.py +14 -4
inference.py
CHANGED
|
@@ -62,6 +62,7 @@ MAX_RETRIES = 3
|
|
| 62 |
# 96 steps × 15 min = 24 h (must match env.EpisodeSteps)
|
| 63 |
EPISODE_STEPS = 96
|
| 64 |
LAST_STEP_INDEX = EPISODE_STEPS - 1
|
|
|
|
| 65 |
|
| 66 |
SYSPROMPT = """You are GridMind, an expert industrial energy management controller.
|
| 67 |
You control a building's HVAC, thermal storage, batch job scheduling, and load shedding.
|
|
@@ -103,6 +104,15 @@ def extract_json_object(text: str) -> dict[str, Any] | None:
|
|
| 103 |
return None
|
| 104 |
|
| 105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
# ── Environment client ───────────────────────────────────────────────────────
|
| 107 |
|
| 108 |
|
|
@@ -146,7 +156,7 @@ class GridMindEnvClient:
|
|
| 146 |
return r.json()
|
| 147 |
except Exception as e:
|
| 148 |
print(f"[ERROR] Failed to grade: {e}", file=sys.stderr)
|
| 149 |
-
return {"score":
|
| 150 |
|
| 151 |
def state(self) -> dict | None:
|
| 152 |
try:
|
|
@@ -420,7 +430,7 @@ def run_episode(
|
|
| 420 |
"total_reward": total_reward,
|
| 421 |
"total_steps": total_steps,
|
| 422 |
"elapsed_sec": elapsed,
|
| 423 |
-
"score": grade.get("score",
|
| 424 |
"sub_scores": grade.get("sub_scores", {}),
|
| 425 |
"exploit_detected": grade.get("exploit_detected", False),
|
| 426 |
}
|
|
@@ -589,9 +599,9 @@ def main() -> None:
|
|
| 589 |
task_avgs: dict[int, float] = {}
|
| 590 |
for task_id in [1, 2, 3]:
|
| 591 |
scores = [float(r["score"]) for r in all_results if r["task_id"] == task_id]
|
| 592 |
-
avg = sum(scores) / len(scores) if scores else
|
| 593 |
task_avgs[task_id] = avg
|
| 594 |
-
overall = sum(task_avgs.values()) / len(task_avgs)
|
| 595 |
|
| 596 |
output = {
|
| 597 |
"model": MODEL_NAME,
|
|
|
|
| 62 |
# 96 steps × 15 min = 24 h (must match env.EpisodeSteps)
|
| 63 |
EPISODE_STEPS = 96
|
| 64 |
LAST_STEP_INDEX = EPISODE_STEPS - 1
|
| 65 |
+
SCORE_EPSILON = 1e-6
|
| 66 |
|
| 67 |
SYSPROMPT = """You are GridMind, an expert industrial energy management controller.
|
| 68 |
You control a building's HVAC, thermal storage, batch job scheduling, and load shedding.
|
|
|
|
| 104 |
return None
|
| 105 |
|
| 106 |
|
| 107 |
+
def clamp_open_score(score: float) -> float:
|
| 108 |
+
"""Clamp score into strict open interval (0, 1)."""
|
| 109 |
+
if score <= 0.0:
|
| 110 |
+
return SCORE_EPSILON
|
| 111 |
+
if score >= 1.0:
|
| 112 |
+
return 1.0 - SCORE_EPSILON
|
| 113 |
+
return score
|
| 114 |
+
|
| 115 |
+
|
| 116 |
# ── Environment client ───────────────────────────────────────────────────────
|
| 117 |
|
| 118 |
|
|
|
|
| 156 |
return r.json()
|
| 157 |
except Exception as e:
|
| 158 |
print(f"[ERROR] Failed to grade: {e}", file=sys.stderr)
|
| 159 |
+
return {"score": SCORE_EPSILON, "sub_scores": {}, "exploit_detected": False}
|
| 160 |
|
| 161 |
def state(self) -> dict | None:
|
| 162 |
try:
|
|
|
|
| 430 |
"total_reward": total_reward,
|
| 431 |
"total_steps": total_steps,
|
| 432 |
"elapsed_sec": elapsed,
|
| 433 |
+
"score": clamp_open_score(float(grade.get("score", SCORE_EPSILON))),
|
| 434 |
"sub_scores": grade.get("sub_scores", {}),
|
| 435 |
"exploit_detected": grade.get("exploit_detected", False),
|
| 436 |
}
|
|
|
|
| 599 |
task_avgs: dict[int, float] = {}
|
| 600 |
for task_id in [1, 2, 3]:
|
| 601 |
scores = [float(r["score"]) for r in all_results if r["task_id"] == task_id]
|
| 602 |
+
avg = clamp_open_score(sum(scores) / len(scores)) if scores else SCORE_EPSILON
|
| 603 |
task_avgs[task_id] = avg
|
| 604 |
+
overall = clamp_open_score(sum(task_avgs.values()) / len(task_avgs))
|
| 605 |
|
| 606 |
output = {
|
| 607 |
"model": MODEL_NAME,
|