Lishika commited on
Commit
32445fd
·
1 Parent(s): 2177064

final fix

Browse files
env/__pycache__/environment.cpython-313.pyc CHANGED
Binary files a/env/__pycache__/environment.cpython-313.pyc and b/env/__pycache__/environment.cpython-313.pyc differ
 
env/environment.py CHANGED
@@ -280,6 +280,15 @@ class CICDDebuggerEnvironment:
280
  expected_config=self._state.task.expected_config,
281
  metadata=self._state.task.metadata,
282
  )
 
 
 
 
 
 
 
 
 
283
 
284
  reward_model = Reward(value=float(reward), components={"total": float(reward)})
285
  info["reward_model"] = reward_model.model_dump()
 
280
  expected_config=self._state.task.expected_config,
281
  metadata=self._state.task.metadata,
282
  )
283
+ # 🔥 CRITICAL FIX FOR SCALER (FINAL OVERRIDE)
284
+ if tool in ["validate_fix", "submit_solution"]:
285
+ is_correct = bool(result.get("is_valid"))
286
+
287
+ if is_correct:
288
+ reward = 1.0
289
+ self._state.done = True
290
+ else:
291
+ reward = 0.0
292
 
293
  reward_model = Reward(value=float(reward), components={"total": float(reward)})
294
  info["reward_model"] = reward_model.model_dump()
env/graders/__pycache__/deterministic.cpython-313.pyc CHANGED
Binary files a/env/graders/__pycache__/deterministic.cpython-313.pyc and b/env/graders/__pycache__/deterministic.cpython-313.pyc differ
 
env/graders/deterministic.py CHANGED
@@ -27,8 +27,22 @@ class DeterministicGrader:
27
  r"\bpip\s+isntall\b",
28
  r"\bgo\s+tset\b",
29
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
- def grade(self, current_config: str, expected_config: str, metadata: dict[str, Any] | None = None) -> float:
 
32
  metadata = metadata or {}
33
  current_config = current_config or ""
34
  expected_config = expected_config or ""
 
27
  r"\bpip\s+isntall\b",
28
  r"\bgo\s+tset\b",
29
  )
30
+ def grade(self, current_config, expected_config, metadata=None):
31
+ metadata = metadata or {}
32
+
33
+ score = self._compute_score(current_config, expected_config, metadata)
34
+
35
+ is_valid = (
36
+ current_config.strip() == expected_config.strip()
37
+ )
38
+
39
+ return {
40
+ "reward": float(score),
41
+ "is_valid": bool(is_valid),
42
+ }
43
 
44
+ def _compute_score(self, current_config, expected_config, metadata=None):
45
+
46
  metadata = metadata or {}
47
  current_config = current_config or ""
48
  expected_config = expected_config or ""
openenv.yaml CHANGED
@@ -27,28 +27,46 @@ action_space:
27
 
28
  tasks:
29
  - id: "easy-command-typo"
 
 
30
  grader: "env.graders.deterministic:DeterministicGrader"
31
 
32
  - id: "easy-missing-checkout"
 
 
33
  grader: "env.graders.deterministic:DeterministicGrader"
34
 
35
  - id: "easy-yaml-indentation"
 
 
36
  grader: "env.graders.deterministic:DeterministicGrader"
37
 
38
  - id: "medium-python-version"
 
 
39
  grader: "env.graders.deterministic:DeterministicGrader"
40
 
41
  - id: "medium-cache-key"
 
 
42
  grader: "env.graders.deterministic:DeterministicGrader"
43
 
44
  - id: "medium-artifact-permissions"
 
 
45
  grader: "env.graders.deterministic:DeterministicGrader"
46
 
47
  - id: "hard-matrix-logic"
 
 
48
  grader: "env.graders.deterministic:DeterministicGrader"
49
 
50
  - id: "hard-conditional-deploy"
 
 
51
  grader: "env.graders.deterministic:DeterministicGrader"
52
 
53
  - id: "hard-needs-order"
 
 
54
  grader: "env.graders.deterministic:DeterministicGrader"
 
27
 
28
  tasks:
29
  - id: "easy-command-typo"
30
+ difficulty: "easy"
31
+ failure_stage: "test"
32
  grader: "env.graders.deterministic:DeterministicGrader"
33
 
34
  - id: "easy-missing-checkout"
35
+ difficulty: "easy"
36
+ failure_stage: "build"
37
  grader: "env.graders.deterministic:DeterministicGrader"
38
 
39
  - id: "easy-yaml-indentation"
40
+ difficulty: "easy"
41
+ failure_stage: "build"
42
  grader: "env.graders.deterministic:DeterministicGrader"
43
 
44
  - id: "medium-python-version"
45
+ difficulty: "medium"
46
+ failure_stage: "build"
47
  grader: "env.graders.deterministic:DeterministicGrader"
48
 
49
  - id: "medium-cache-key"
50
+ difficulty: "medium"
51
+ failure_stage: "test"
52
  grader: "env.graders.deterministic:DeterministicGrader"
53
 
54
  - id: "medium-artifact-permissions"
55
+ difficulty: "medium"
56
+ failure_stage: "deploy"
57
  grader: "env.graders.deterministic:DeterministicGrader"
58
 
59
  - id: "hard-matrix-logic"
60
+ difficulty: "hard"
61
+ failure_stage: "test"
62
  grader: "env.graders.deterministic:DeterministicGrader"
63
 
64
  - id: "hard-conditional-deploy"
65
+ difficulty: "hard"
66
+ failure_stage: "deploy"
67
  grader: "env.graders.deterministic:DeterministicGrader"
68
 
69
  - id: "hard-needs-order"
70
+ difficulty: "hard"
71
+ failure_stage: "deploy"
72
  grader: "env.graders.deterministic:DeterministicGrader"
test_grader.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ from env.environment import CICDDebuggerEnvironment
3
+
4
+ async def test():
5
+ env = CICDDebuggerEnvironment()
6
+ obs = await env.reset()
7
+
8
+ # 🔥 APPLY CORRECT FIX
9
+ env._state.current_config = env._state.task.expected_config
10
+
11
+ obs, reward, done, _ = await env.step({
12
+ "action_type": "submit_solution",
13
+ "payload": {}
14
+ })
15
+
16
+ print("Reward:", reward)
17
+ print("Done:", done)
18
+
19
+ asyncio.run(test())