adityss commited on
Commit
b93cee3
·
1 Parent(s): e58b5ec

fix: introduce SCORE_EPSILON and clamp scores in run_episode and main functions

Browse files
Files changed (1) hide show
  1. inference.py +14 -4
inference.py CHANGED
@@ -62,6 +62,7 @@ MAX_RETRIES = 3
62
  # 96 steps × 15 min = 24 h (must match env.EpisodeSteps)
63
  EPISODE_STEPS = 96
64
  LAST_STEP_INDEX = EPISODE_STEPS - 1
 
65
 
66
  SYSPROMPT = """You are GridMind, an expert industrial energy management controller.
67
  You control a building's HVAC, thermal storage, batch job scheduling, and load shedding.
@@ -103,6 +104,15 @@ def extract_json_object(text: str) -> dict[str, Any] | None:
103
  return None
104
 
105
 
 
 
 
 
 
 
 
 
 
106
  # ── Environment client ───────────────────────────────────────────────────────
107
 
108
 
@@ -146,7 +156,7 @@ class GridMindEnvClient:
146
  return r.json()
147
  except Exception as e:
148
  print(f"[ERROR] Failed to grade: {e}", file=sys.stderr)
149
- return {"score": 0.0, "sub_scores": {}, "exploit_detected": False}
150
 
151
  def state(self) -> dict | None:
152
  try:
@@ -420,7 +430,7 @@ def run_episode(
420
  "total_reward": total_reward,
421
  "total_steps": total_steps,
422
  "elapsed_sec": elapsed,
423
- "score": grade.get("score", 0.0),
424
  "sub_scores": grade.get("sub_scores", {}),
425
  "exploit_detected": grade.get("exploit_detected", False),
426
  }
@@ -589,9 +599,9 @@ def main() -> None:
589
  task_avgs: dict[int, float] = {}
590
  for task_id in [1, 2, 3]:
591
  scores = [float(r["score"]) for r in all_results if r["task_id"] == task_id]
592
- avg = sum(scores) / len(scores) if scores else 0.0
593
  task_avgs[task_id] = avg
594
- overall = sum(task_avgs.values()) / len(task_avgs)
595
 
596
  output = {
597
  "model": MODEL_NAME,
 
62
  # 96 steps × 15 min = 24 h (must match env.EpisodeSteps)
63
  EPISODE_STEPS = 96
64
  LAST_STEP_INDEX = EPISODE_STEPS - 1
65
+ SCORE_EPSILON = 1e-6
66
 
67
  SYSPROMPT = """You are GridMind, an expert industrial energy management controller.
68
  You control a building's HVAC, thermal storage, batch job scheduling, and load shedding.
 
104
  return None
105
 
106
 
107
+ def clamp_open_score(score: float) -> float:
108
+ """Clamp score into strict open interval (0, 1)."""
109
+ if score <= 0.0:
110
+ return SCORE_EPSILON
111
+ if score >= 1.0:
112
+ return 1.0 - SCORE_EPSILON
113
+ return score
114
+
115
+
116
  # ── Environment client ───────────────────────────────────────────────────────
117
 
118
 
 
156
  return r.json()
157
  except Exception as e:
158
  print(f"[ERROR] Failed to grade: {e}", file=sys.stderr)
159
+ return {"score": SCORE_EPSILON, "sub_scores": {}, "exploit_detected": False}
160
 
161
  def state(self) -> dict | None:
162
  try:
 
430
  "total_reward": total_reward,
431
  "total_steps": total_steps,
432
  "elapsed_sec": elapsed,
433
+ "score": clamp_open_score(float(grade.get("score", SCORE_EPSILON))),
434
  "sub_scores": grade.get("sub_scores", {}),
435
  "exploit_detected": grade.get("exploit_detected", False),
436
  }
 
599
  task_avgs: dict[int, float] = {}
600
  for task_id in [1, 2, 3]:
601
  scores = [float(r["score"]) for r in all_results if r["task_id"] == task_id]
602
+ avg = clamp_open_score(sum(scores) / len(scores)) if scores else SCORE_EPSILON
603
  task_avgs[task_id] = avg
604
+ overall = clamp_open_score(sum(task_avgs.values()) / len(task_avgs))
605
 
606
  output = {
607
  "model": MODEL_NAME,