md896 commited on
Commit
941f5f8
Β·
1 Parent(s): bc9f459

Keep task scores strictly inside (0,1) in inference logs

Browse files
Files changed (1) hide show
  1. inference.py +8 -2
inference.py CHANGED
@@ -37,6 +37,12 @@ TASK_CONFIGS = {
37
  "medium_logic_fix": {"max_steps": 20, "success_threshold": 0.7},
38
  "hard_multi_bug": {"max_steps": 30, "success_threshold": 0.5},
39
  }
 
 
 
 
 
 
40
 
41
 
42
  # ── Logging functions (EXACT FORMAT β€” DO NOT MODIFY) ────────────────────────
@@ -266,7 +272,7 @@ def run_task(
266
  break
267
 
268
  # Compute final score
269
- score = min(max(score, 0.0), 1.0)
270
  success = score >= success_threshold
271
 
272
  log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
@@ -310,7 +316,7 @@ def main():
310
  all_results.append(result)
311
  except Exception as e:
312
  print(f"[DEBUG] Task {task_id} failed: {e}", flush=True)
313
- log_end(success=False, steps=0, score=0.0, rewards=[])
314
 
315
  # Small delay between tasks
316
  time.sleep(2)
 
37
  "medium_logic_fix": {"max_steps": 20, "success_threshold": 0.7},
38
  "hard_multi_bug": {"max_steps": 30, "success_threshold": 0.5},
39
  }
40
+ MIN_STRICT_SCORE = 0.001
41
+ MAX_STRICT_SCORE = 0.999
42
+
43
+
44
+ def strict_score(value: float) -> float:
45
+ return min(MAX_STRICT_SCORE, max(MIN_STRICT_SCORE, value))
46
 
47
 
48
  # ── Logging functions (EXACT FORMAT β€” DO NOT MODIFY) ────────────────────────
 
272
  break
273
 
274
  # Compute final score
275
+ score = strict_score(score)
276
  success = score >= success_threshold
277
 
278
  log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
 
316
  all_results.append(result)
317
  except Exception as e:
318
  print(f"[DEBUG] Task {task_id} failed: {e}", flush=True)
319
+ log_end(success=False, steps=0, score=MIN_STRICT_SCORE, rewards=[])
320
 
321
  # Small delay between tasks
322
  time.sleep(2)