paradox44 commited on
Commit
f475e81
·
verified ·
1 Parent(s): b552d82

Add task_id/seed to action for stateless step

Browse files
README.md CHANGED
@@ -65,7 +65,7 @@ See `training/README.md` and `training/grpo_train.py` for a minimal GRPO run.
65
 
66
  ## Environment design
67
 
68
- - Action: a single pytest file plus a finalize flag.
69
  - Observation: task prompt, kill counts, reference pass/fail, runtime, and
70
  surviving mutant ids.
71
  - Reward: mutant kill rate minus penalties for false positives, runtime, and
 
65
 
66
  ## Environment design
67
 
68
+ - Action: a single pytest file plus a finalize flag (optionally include `task_id` and `seed` for stateless HTTP calls).
69
  - Observation: task prompt, kill counts, reference pass/fail, runtime, and
70
  surviving mutant ids.
71
  - Reward: mutant kill rate minus penalties for false positives, runtime, and
mutationgym_env/models.py CHANGED
@@ -10,6 +10,12 @@ class MutationGymAction(Action):
10
  """Agent action containing pytest code and a finalize flag."""
11
 
12
  tests_py: str = Field(..., description="Pytest tests as a single Python file.")
 
 
 
 
 
 
13
  finalize: bool = Field(
14
  default=True, description="If true, end the episode after scoring."
15
  )
 
10
  """Agent action containing pytest code and a finalize flag."""
11
 
12
  tests_py: str = Field(..., description="Pytest tests as a single Python file.")
13
+ task_id: Optional[str] = Field(
14
+ default=None, description="Optional task id for stateless step calls."
15
+ )
16
+ seed: Optional[int] = Field(
17
+ default=None, description="Optional seed for implicit reset."
18
+ )
19
  finalize: bool = Field(
20
  default=True, description="If true, end the episode after scoring."
21
  )
mutationgym_env/server/mutationgym_environment.py CHANGED
@@ -84,7 +84,7 @@ class MutationGymEnvironment(Environment):
84
  if self._current_task is None:
85
  # In stateless HTTP calls, the server may instantiate a fresh
86
  # environment per request. Fall back to an implicit reset.
87
- self.reset()
88
 
89
  try:
90
  self._state = State(
 
84
  if self._current_task is None:
85
  # In stateless HTTP calls, the server may instantiate a fresh
86
  # environment per request. Fall back to an implicit reset.
87
+ self.reset(seed=action.seed, task_id=action.task_id)
88
 
89
  try:
90
  self._state = State(