final fix
Browse files- env/__pycache__/environment.cpython-313.pyc +0 -0
- env/environment.py +9 -0
- env/graders/__pycache__/deterministic.cpython-313.pyc +0 -0
- env/graders/deterministic.py +15 -1
- openenv.yaml +18 -0
- test_grader.py +19 -0
env/__pycache__/environment.cpython-313.pyc
CHANGED
|
Binary files a/env/__pycache__/environment.cpython-313.pyc and b/env/__pycache__/environment.cpython-313.pyc differ
|
|
|
env/environment.py
CHANGED
|
@@ -280,6 +280,15 @@ class CICDDebuggerEnvironment:
|
|
| 280 |
expected_config=self._state.task.expected_config,
|
| 281 |
metadata=self._state.task.metadata,
|
| 282 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
|
| 284 |
reward_model = Reward(value=float(reward), components={"total": float(reward)})
|
| 285 |
info["reward_model"] = reward_model.model_dump()
|
|
|
|
| 280 |
expected_config=self._state.task.expected_config,
|
| 281 |
metadata=self._state.task.metadata,
|
| 282 |
)
|
| 283 |
+
# 🔥 CRITICAL FIX FOR SCALER (FINAL OVERRIDE)
|
| 284 |
+
if tool in ["validate_fix", "submit_solution"]:
|
| 285 |
+
is_correct = bool(result.get("is_valid"))
|
| 286 |
+
|
| 287 |
+
if is_correct:
|
| 288 |
+
reward = 1.0
|
| 289 |
+
self._state.done = True
|
| 290 |
+
else:
|
| 291 |
+
reward = 0.0
|
| 292 |
|
| 293 |
reward_model = Reward(value=float(reward), components={"total": float(reward)})
|
| 294 |
info["reward_model"] = reward_model.model_dump()
|
env/graders/__pycache__/deterministic.cpython-313.pyc
CHANGED
|
Binary files a/env/graders/__pycache__/deterministic.cpython-313.pyc and b/env/graders/__pycache__/deterministic.cpython-313.pyc differ
|
|
|
env/graders/deterministic.py
CHANGED
|
@@ -27,8 +27,22 @@ class DeterministicGrader:
|
|
| 27 |
r"\bpip\s+isntall\b",
|
| 28 |
r"\bgo\s+tset\b",
|
| 29 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
-
def
|
|
|
|
| 32 |
metadata = metadata or {}
|
| 33 |
current_config = current_config or ""
|
| 34 |
expected_config = expected_config or ""
|
|
|
|
| 27 |
r"\bpip\s+isntall\b",
|
| 28 |
r"\bgo\s+tset\b",
|
| 29 |
)
|
| 30 |
+
def grade(self, current_config, expected_config, metadata=None):
|
| 31 |
+
metadata = metadata or {}
|
| 32 |
+
|
| 33 |
+
score = self._compute_score(current_config, expected_config, metadata)
|
| 34 |
+
|
| 35 |
+
is_valid = (
|
| 36 |
+
current_config.strip() == expected_config.strip()
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
return {
|
| 40 |
+
"reward": float(score),
|
| 41 |
+
"is_valid": bool(is_valid),
|
| 42 |
+
}
|
| 43 |
|
| 44 |
+
def _compute_score(self, current_config, expected_config, metadata=None):
|
| 45 |
+
|
| 46 |
metadata = metadata or {}
|
| 47 |
current_config = current_config or ""
|
| 48 |
expected_config = expected_config or ""
|
openenv.yaml
CHANGED
|
@@ -27,28 +27,46 @@ action_space:
|
|
| 27 |
|
| 28 |
tasks:
|
| 29 |
- id: "easy-command-typo"
|
|
|
|
|
|
|
| 30 |
grader: "env.graders.deterministic:DeterministicGrader"
|
| 31 |
|
| 32 |
- id: "easy-missing-checkout"
|
|
|
|
|
|
|
| 33 |
grader: "env.graders.deterministic:DeterministicGrader"
|
| 34 |
|
| 35 |
- id: "easy-yaml-indentation"
|
|
|
|
|
|
|
| 36 |
grader: "env.graders.deterministic:DeterministicGrader"
|
| 37 |
|
| 38 |
- id: "medium-python-version"
|
|
|
|
|
|
|
| 39 |
grader: "env.graders.deterministic:DeterministicGrader"
|
| 40 |
|
| 41 |
- id: "medium-cache-key"
|
|
|
|
|
|
|
| 42 |
grader: "env.graders.deterministic:DeterministicGrader"
|
| 43 |
|
| 44 |
- id: "medium-artifact-permissions"
|
|
|
|
|
|
|
| 45 |
grader: "env.graders.deterministic:DeterministicGrader"
|
| 46 |
|
| 47 |
- id: "hard-matrix-logic"
|
|
|
|
|
|
|
| 48 |
grader: "env.graders.deterministic:DeterministicGrader"
|
| 49 |
|
| 50 |
- id: "hard-conditional-deploy"
|
|
|
|
|
|
|
| 51 |
grader: "env.graders.deterministic:DeterministicGrader"
|
| 52 |
|
| 53 |
- id: "hard-needs-order"
|
|
|
|
|
|
|
| 54 |
grader: "env.graders.deterministic:DeterministicGrader"
|
|
|
|
| 27 |
|
| 28 |
tasks:
|
| 29 |
- id: "easy-command-typo"
|
| 30 |
+
difficulty: "easy"
|
| 31 |
+
failure_stage: "test"
|
| 32 |
grader: "env.graders.deterministic:DeterministicGrader"
|
| 33 |
|
| 34 |
- id: "easy-missing-checkout"
|
| 35 |
+
difficulty: "easy"
|
| 36 |
+
failure_stage: "build"
|
| 37 |
grader: "env.graders.deterministic:DeterministicGrader"
|
| 38 |
|
| 39 |
- id: "easy-yaml-indentation"
|
| 40 |
+
difficulty: "easy"
|
| 41 |
+
failure_stage: "build"
|
| 42 |
grader: "env.graders.deterministic:DeterministicGrader"
|
| 43 |
|
| 44 |
- id: "medium-python-version"
|
| 45 |
+
difficulty: "medium"
|
| 46 |
+
failure_stage: "build"
|
| 47 |
grader: "env.graders.deterministic:DeterministicGrader"
|
| 48 |
|
| 49 |
- id: "medium-cache-key"
|
| 50 |
+
difficulty: "medium"
|
| 51 |
+
failure_stage: "test"
|
| 52 |
grader: "env.graders.deterministic:DeterministicGrader"
|
| 53 |
|
| 54 |
- id: "medium-artifact-permissions"
|
| 55 |
+
difficulty: "medium"
|
| 56 |
+
failure_stage: "deploy"
|
| 57 |
grader: "env.graders.deterministic:DeterministicGrader"
|
| 58 |
|
| 59 |
- id: "hard-matrix-logic"
|
| 60 |
+
difficulty: "hard"
|
| 61 |
+
failure_stage: "test"
|
| 62 |
grader: "env.graders.deterministic:DeterministicGrader"
|
| 63 |
|
| 64 |
- id: "hard-conditional-deploy"
|
| 65 |
+
difficulty: "hard"
|
| 66 |
+
failure_stage: "deploy"
|
| 67 |
grader: "env.graders.deterministic:DeterministicGrader"
|
| 68 |
|
| 69 |
- id: "hard-needs-order"
|
| 70 |
+
difficulty: "hard"
|
| 71 |
+
failure_stage: "deploy"
|
| 72 |
grader: "env.graders.deterministic:DeterministicGrader"
|
test_grader.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
from env.environment import CICDDebuggerEnvironment
|
| 3 |
+
|
| 4 |
+
async def test():
|
| 5 |
+
env = CICDDebuggerEnvironment()
|
| 6 |
+
obs = await env.reset()
|
| 7 |
+
|
| 8 |
+
# 🔥 APPLY CORRECT FIX
|
| 9 |
+
env._state.current_config = env._state.task.expected_config
|
| 10 |
+
|
| 11 |
+
obs, reward, done, _ = await env.step({
|
| 12 |
+
"action_type": "submit_solution",
|
| 13 |
+
"payload": {}
|
| 14 |
+
})
|
| 15 |
+
|
| 16 |
+
print("Reward:", reward)
|
| 17 |
+
print("Done:", done)
|
| 18 |
+
|
| 19 |
+
asyncio.run(test())
|