Spaces:

LO-Kyu
/

gridmind

Running

App Files Files Community

adityss commited on 13 days ago

Commit

b93cee3

1 Parent(s): e58b5ec

fix: introduce SCORE_EPSILON and clamp scores in run_episode and main functions

Browse files

Files changed (1) hide show

inference.py +14 -4

inference.py CHANGED Viewed

@@ -62,6 +62,7 @@ MAX_RETRIES = 3
 # 96 steps × 15 min = 24 h (must match env.EpisodeSteps)
 EPISODE_STEPS = 96
 LAST_STEP_INDEX = EPISODE_STEPS - 1
 SYSPROMPT = """You are GridMind, an expert industrial energy management controller.
 You control a building's HVAC, thermal storage, batch job scheduling, and load shedding.
@@ -103,6 +104,15 @@ def extract_json_object(text: str) -> dict[str, Any] | None:
     return None
 # ── Environment client ───────────────────────────────────────────────────────
@@ -146,7 +156,7 @@ class GridMindEnvClient:
             return r.json()
         except Exception as e:
             print(f"[ERROR] Failed to grade: {e}", file=sys.stderr)
-            return {"score": 0.0, "sub_scores": {}, "exploit_detected": False}
     def state(self) -> dict | None:
         try:
@@ -420,7 +430,7 @@ def run_episode(
         "total_reward": total_reward,
         "total_steps": total_steps,
         "elapsed_sec": elapsed,
-        "score": grade.get("score", 0.0),
         "sub_scores": grade.get("sub_scores", {}),
         "exploit_detected": grade.get("exploit_detected", False),
     }
@@ -589,9 +599,9 @@ def main() -> None:
         task_avgs: dict[int, float] = {}
         for task_id in [1, 2, 3]:
             scores = [float(r["score"]) for r in all_results if r["task_id"] == task_id]
-            avg = sum(scores) / len(scores) if scores else 0.0
             task_avgs[task_id] = avg
-        overall = sum(task_avgs.values()) / len(task_avgs)
         output = {
             "model": MODEL_NAME,

 # 96 steps × 15 min = 24 h (must match env.EpisodeSteps)
 EPISODE_STEPS = 96
 LAST_STEP_INDEX = EPISODE_STEPS - 1
+SCORE_EPSILON = 1e-6
 SYSPROMPT = """You are GridMind, an expert industrial energy management controller.
 You control a building's HVAC, thermal storage, batch job scheduling, and load shedding.
     return None
+def clamp_open_score(score: float) -> float:
+    """Clamp score into strict open interval (0, 1)."""
+    if score <= 0.0:
+        return SCORE_EPSILON
+    if score >= 1.0:
+        return 1.0 - SCORE_EPSILON
+    return score
 # ── Environment client ───────────────────────────────────────────────────────
             return r.json()
         except Exception as e:
             print(f"[ERROR] Failed to grade: {e}", file=sys.stderr)
+            return {"score": SCORE_EPSILON, "sub_scores": {}, "exploit_detected": False}
     def state(self) -> dict | None:
         try:
         "total_reward": total_reward,
         "total_steps": total_steps,
         "elapsed_sec": elapsed,
+        "score": clamp_open_score(float(grade.get("score", SCORE_EPSILON))),
         "sub_scores": grade.get("sub_scores", {}),
         "exploit_detected": grade.get("exploit_detected", False),
     }
         task_avgs: dict[int, float] = {}
         for task_id in [1, 2, 3]:
             scores = [float(r["score"]) for r in all_results if r["task_id"] == task_id]
+            avg = clamp_open_score(sum(scores) / len(scores)) if scores else SCORE_EPSILON
             task_avgs[task_id] = avg
+        overall = clamp_open_score(sum(task_avgs.values()) / len(task_avgs))
         output = {
             "model": MODEL_NAME,