adityanaikhpt commited on
Commit
b53855f
·
1 Parent(s): 78b7e6b

Add normalize_reward(); enforce strict (0,1) on all paths

Browse files
Files changed (2) hide show
  1. server/app.py +1 -1
  2. server/grader.py +20 -13
server/app.py CHANGED
@@ -155,7 +155,7 @@ def api_step(action: CodeArenaAction):
155
  "test_results": "",
156
  "previous_attempts": [],
157
  },
158
- "reward": 0.0,
159
  "done": True,
160
  "info": {},
161
  }
 
155
  "test_results": "",
156
  "previous_attempts": [],
157
  },
158
+ "reward": 0.1,
159
  "done": True,
160
  "info": {},
161
  }
server/grader.py CHANGED
@@ -1,18 +1,25 @@
1
  from .models import ExecutionResult, TaskInfo
2
 
3
- def calculate_reward(exec_result: ExecutionResult, task_info: TaskInfo) -> float:
4
- passed_tests = exec_result.test_passed
5
- total_tests = exec_result.test_total
6
-
7
- if total_tests > 0:
8
- reward = passed_tests / total_tests
9
- else:
10
- reward = 0.0
11
 
12
- # enforce valid range
 
 
 
 
 
 
 
13
  if reward <= 0:
14
- reward = 0.1
15
  elif reward >= 1:
16
- reward = 0.9
17
-
18
- return reward
 
 
 
 
 
 
 
 
 
1
  from .models import ExecutionResult, TaskInfo
2
 
 
 
 
 
 
 
 
 
3
 
4
+ def normalize_reward(passed: int, total: int) -> float:
5
+ """
6
+ Compute a reward strictly within the open interval (0, 1).
7
+ Never returns exactly 0.0 or 1.0.
8
+ """
9
+ if total == 0:
10
+ return 0.5
11
+ reward = passed / total
12
  if reward <= 0:
13
+ return 0.1
14
  elif reward >= 1:
15
+ return 0.9
16
+ return float(reward)
17
+
18
+
19
+ def calculate_reward(exec_result: ExecutionResult, task_info: TaskInfo) -> float:
20
+ """
21
+ Single entry-point used by env.py and app.py.
22
+ Delegates entirely to normalize_reward so every task
23
+ always produces a score in (0, 1).
24
+ """
25
+ return normalize_reward(exec_result.test_passed, exec_result.test_total)