Spaces:
Sleeping
Sleeping
Add task_id/seed to action for stateless step
Browse files
README.md
CHANGED
|
@@ -65,7 +65,7 @@ See `training/README.md` and `training/grpo_train.py` for a minimal GRPO run.
|
|
| 65 |
|
| 66 |
## Environment design
|
| 67 |
|
| 68 |
-
- Action: a single pytest file plus a finalize flag.
|
| 69 |
- Observation: task prompt, kill counts, reference pass/fail, runtime, and
|
| 70 |
surviving mutant ids.
|
| 71 |
- Reward: mutant kill rate minus penalties for false positives, runtime, and
|
|
|
|
| 65 |
|
| 66 |
## Environment design
|
| 67 |
|
| 68 |
+
- Action: a single pytest file plus a finalize flag (optionally include `task_id` and `seed` for stateless HTTP calls).
|
| 69 |
- Observation: task prompt, kill counts, reference pass/fail, runtime, and
|
| 70 |
surviving mutant ids.
|
| 71 |
- Reward: mutant kill rate minus penalties for false positives, runtime, and
|
mutationgym_env/models.py
CHANGED
|
@@ -10,6 +10,12 @@ class MutationGymAction(Action):
|
|
| 10 |
"""Agent action containing pytest code and a finalize flag."""
|
| 11 |
|
| 12 |
tests_py: str = Field(..., description="Pytest tests as a single Python file.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
finalize: bool = Field(
|
| 14 |
default=True, description="If true, end the episode after scoring."
|
| 15 |
)
|
|
|
|
| 10 |
"""Agent action containing pytest code and a finalize flag."""
|
| 11 |
|
| 12 |
tests_py: str = Field(..., description="Pytest tests as a single Python file.")
|
| 13 |
+
task_id: Optional[str] = Field(
|
| 14 |
+
default=None, description="Optional task id for stateless step calls."
|
| 15 |
+
)
|
| 16 |
+
seed: Optional[int] = Field(
|
| 17 |
+
default=None, description="Optional seed for implicit reset."
|
| 18 |
+
)
|
| 19 |
finalize: bool = Field(
|
| 20 |
default=True, description="If true, end the episode after scoring."
|
| 21 |
)
|
mutationgym_env/server/mutationgym_environment.py
CHANGED
|
@@ -84,7 +84,7 @@ class MutationGymEnvironment(Environment):
|
|
| 84 |
if self._current_task is None:
|
| 85 |
# In stateless HTTP calls, the server may instantiate a fresh
|
| 86 |
# environment per request. Fall back to an implicit reset.
|
| 87 |
-
self.reset()
|
| 88 |
|
| 89 |
try:
|
| 90 |
self._state = State(
|
|
|
|
| 84 |
if self._current_task is None:
|
| 85 |
# In stateless HTTP calls, the server may instantiate a fresh
|
| 86 |
# environment per request. Fall back to an implicit reset.
|
| 87 |
+
self.reset(seed=action.seed, task_id=action.task_id)
|
| 88 |
|
| 89 |
try:
|
| 90 |
self._state = State(
|