UjjwalPardeshi commited on
Commit
f1b57dc
·
1 Parent(s): 9bb9fe6

fix graders

Browse files
Files changed (2) hide show
  1. inference.py +1 -1
  2. ml_training_debugger/graders.py +5 -3
inference.py CHANGED
@@ -254,7 +254,7 @@ async def main() -> None:
254
  break
255
 
256
  score = sum(rewards) / MAX_TOTAL_REWARD if MAX_TOTAL_REWARD > 0 else 0.0
257
- score = min(max(score, 0.0), 1.0) # clamp to [0, 1]
258
  success = score >= SUCCESS_SCORE_THRESHOLD
259
 
260
  except Exception as exc:
 
254
  break
255
 
256
  score = sum(rewards) / MAX_TOTAL_REWARD if MAX_TOTAL_REWARD > 0 else 0.0
257
+ score = min(max(score, 0.01), 0.99) # clamp to (0, 1) exclusive
258
  success = score >= SUCCESS_SCORE_THRESHOLD
259
 
260
  except Exception as exc:
ml_training_debugger/graders.py CHANGED
@@ -277,8 +277,10 @@ GRADERS = {
277
 
278
 
279
  def grade_episode(task_id: str, state: EpisodeState, scenario: ScenarioParams) -> float:
280
- """Grade a completed episode. Returns 0.0-1.0."""
281
  grader = GRADERS.get(task_id)
282
  if grader is None:
283
- return 0.0
284
- return grader(state, scenario)
 
 
 
277
 
278
 
279
  def grade_episode(task_id: str, state: EpisodeState, scenario: ScenarioParams) -> float:
280
+ """Grade a completed episode. Returns score in (0.0, 1.0) exclusive."""
281
  grader = GRADERS.get(task_id)
282
  if grader is None:
283
+ return 0.01
284
+ score = grader(state, scenario)
285
+ # Clamp to strictly between 0 and 1 (evaluator rejects exact 0.0 and 1.0)
286
+ return max(0.01, min(0.99, score))