suraj-01 commited on
Commit
bc1af75
·
1 Parent(s): a9d4552

Fixed reward

Browse files
Files changed (3) hide show
  1. tasks/easy.py +3 -1
  2. tasks/hard.py +3 -1
  3. tasks/medium.py +3 -1
tasks/easy.py CHANGED
@@ -173,7 +173,9 @@ class EasyTaskGrader:
173
  raw = self.correct_actions / self.total_actions
174
  # Enforce strict (0, 1) range
175
  clamped = 0.01 + 0.98 * raw
176
- return round(float(clamped), 6)
 
 
177
 
178
 
179
  def passed(self) -> bool:
 
173
  raw = self.correct_actions / self.total_actions
174
  # Enforce strict (0, 1) range
175
  clamped = 0.01 + 0.98 * raw
176
+ rounded = round(float(clamped), 2)
177
+ # Ensure no rounding to boundaries (0.0 or 1.0)
178
+ return max(0.01, min(rounded, 0.99))
179
 
180
 
181
  def passed(self) -> bool:
tasks/hard.py CHANGED
@@ -398,7 +398,9 @@ class HardTaskGrader:
398
 
399
  # Enforce strict (0, 1) range
400
  clamped = 0.01 + 0.98 * final_base
401
- return round(float(clamped), 6)
 
 
402
 
403
 
404
  def passed(self) -> bool:
 
398
 
399
  # Enforce strict (0, 1) range
400
  clamped = 0.01 + 0.98 * final_base
401
+ rounded = round(float(clamped), 2)
402
+ # Ensure no rounding to boundaries (0.0 or 1.0)
403
+ return max(0.01, min(rounded, 0.99))
404
 
405
 
406
  def passed(self) -> bool:
tasks/medium.py CHANGED
@@ -222,7 +222,9 @@ class MediumTaskGrader:
222
  base_score = max(0.0, raw - fp_penalty - miss_penalty)
223
  # Enforce strict (0, 1) range
224
  clamped = 0.01 + 0.98 * base_score
225
- return round(float(clamped), 6)
 
 
226
 
227
 
228
  def passed(self) -> bool:
 
222
  base_score = max(0.0, raw - fp_penalty - miss_penalty)
223
  # Enforce strict (0, 1) range
224
  clamped = 0.01 + 0.98 * base_score
225
+ rounded = round(float(clamped), 2)
226
+ # Ensure no rounding to boundaries (0.0 or 1.0)
227
+ return max(0.01, min(rounded, 0.99))
228
 
229
 
230
  def passed(self) -> bool: