vikash-nuvai commited on
Commit
6a5b308
·
1 Parent(s): bfdceab

fix: clamp inference score between 0.0001 and 0.9999 to pass strictly bounded validation

Browse files
Files changed (1) hide show
  1. inference.py +6 -4
inference.py CHANGED
@@ -132,8 +132,8 @@ def run_episode(task_id: str) -> dict:
132
  obs = result.get("observation", result)
133
  except Exception as e:
134
  print(f" ERROR: Failed to reset environment: {e}", flush=True)
135
- print(f"[END] task={task_id} score=0.0 steps=0", flush=True)
136
- return {"task_id": task_id, "total_reward": 0.0, "reward": 0.0, "score": 0.0, "steps": 0, "error": str(e)}
137
 
138
  # Initialize conversation
139
  init_scene = obs.get("scene_description", "")
@@ -231,6 +231,8 @@ def run_episode(task_id: str) -> dict:
231
 
232
  # Extract final score
233
  final_score = obs.get("metadata", {}).get("final_score", 0.0)
 
 
234
  grade_breakdown = obs.get("metadata", {}).get("grade_breakdown", {})
235
 
236
  print(f"\n {'─'*40}", flush=True)
@@ -259,8 +261,8 @@ def run_episode(task_id: str) -> dict:
259
  # Catch-all: ensure [END] is ALWAYS emitted even on unexpected errors
260
  print(f" FATAL ERROR in episode {task_id}: {e}", flush=True)
261
  traceback.print_exc()
262
- print(f"[END] task={task_id} score=0.0 steps={step}", flush=True)
263
- return {"task_id": task_id, "total_reward": 0.0, "reward": 0.0, "score": 0.0, "steps": step, "error": str(e)}
264
 
265
 
266
  def main():
 
132
  obs = result.get("observation", result)
133
  except Exception as e:
134
  print(f" ERROR: Failed to reset environment: {e}", flush=True)
135
+ print(f"[END] task={task_id} score=0.0001 steps=0", flush=True)
136
+ return {"task_id": task_id, "total_reward": 0.0, "reward": 0.0, "score": 0.0001, "steps": 0, "error": str(e)}
137
 
138
  # Initialize conversation
139
  init_scene = obs.get("scene_description", "")
 
231
 
232
  # Extract final score
233
  final_score = obs.get("metadata", {}).get("final_score", 0.0)
234
+ # Ensure score is strictly between 0 and 1 (exclusive) for the validator
235
+ final_score = max(0.0001, min(0.9999, float(final_score)))
236
  grade_breakdown = obs.get("metadata", {}).get("grade_breakdown", {})
237
 
238
  print(f"\n {'─'*40}", flush=True)
 
261
  # Catch-all: ensure [END] is ALWAYS emitted even on unexpected errors
262
  print(f" FATAL ERROR in episode {task_id}: {e}", flush=True)
263
  traceback.print_exc()
264
+ print(f"[END] task={task_id} score=0.0001 steps={step}", flush=True)
265
+ return {"task_id": task_id, "total_reward": 0.0, "reward": 0.0, "score": 0.0001, "steps": step, "error": str(e)}
266
 
267
 
268
  def main():