Spaces:
Sleeping
Sleeping
natnael kahssay Claude Sonnet 4.6 commited on
Commit ·
5d3d3ff
1
Parent(s): 002fe30
feat: multi-turn tool-using RL environment (RFC 005 pattern)
Browse filesActions are now tool calls (read/edit/bash/submit) instead of one-shot
file submissions. The agent navigates the sandbox across multiple steps,
exactly like Claude Code, then calls submit to run vitest and get reward.
- MOAAction: {tool, params} replaces {file_path, content}
- MOAObservation: adds tool_result, user_messages, step_count
- moa_env.py: dispatches read/edit/bash/submit with sandbox path safety
- tasks.py: adds real user_messages per task (Step 1 intent alignment)
- Max 20 steps per episode; auto-submits on step 20
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- src/envs/moa_env/moa_env.py +148 -53
- src/envs/moa_env/models.py +36 -10
- src/envs/moa_env/tasks.py +21 -0
src/envs/moa_env/moa_env.py
CHANGED
|
@@ -1,8 +1,16 @@
|
|
| 1 |
"""
|
| 2 |
-
MOA Code Environment.
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
"""
|
| 7 |
|
| 8 |
import os
|
|
@@ -15,18 +23,17 @@ from core.env_server import Action, Environment, Observation
|
|
| 15 |
from .models import MOAAction, MOAObservation, MOAState
|
| 16 |
from .tasks import load_task, TASKS
|
| 17 |
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
-
class MOAEnv(Environment):
|
| 20 |
-
"""
|
| 21 |
-
RL environment for TypeScript code tasks derived from real MOA dev sessions.
|
| 22 |
-
|
| 23 |
-
reset() → gives agent task description + broken file + test file
|
| 24 |
-
step() → agent submits fixed file → runs vitest → returns reward
|
| 25 |
-
"""
|
| 26 |
|
|
|
|
| 27 |
def __init__(self):
|
| 28 |
self._state = MOAState()
|
| 29 |
-
self._task_index = 0
|
|
|
|
|
|
|
| 30 |
|
| 31 |
def reset(self) -> Observation:
|
| 32 |
task_id = TASKS[self._task_index % len(TASKS)]["id"]
|
|
@@ -39,6 +46,7 @@ class MOAEnv(Environment):
|
|
| 39 |
episode_id=str(uuid.uuid4()),
|
| 40 |
step_count=0,
|
| 41 |
current_task=task["description"],
|
|
|
|
| 42 |
broken_file_path=task["source_file"],
|
| 43 |
broken_file_content=task["broken_content"],
|
| 44 |
test_file_content=task["test_file_content"],
|
|
@@ -49,30 +57,149 @@ class MOAEnv(Environment):
|
|
| 49 |
|
| 50 |
return MOAObservation(
|
| 51 |
task=task["description"],
|
|
|
|
| 52 |
broken_file_path=task["source_file"],
|
| 53 |
broken_file_content=task["broken_content"],
|
| 54 |
test_file_content=task["test_file_content"],
|
|
|
|
|
|
|
| 55 |
done=False,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
)
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
def _make_sandbox(self, task: dict) -> str:
|
| 59 |
-
"""
|
| 60 |
-
Copy moav2/src to a temp dir, symlink node_modules from the pre-installed
|
| 61 |
-
/app/moav2 directory (avoids copying 700MB per request), then blank the
|
| 62 |
-
target source file so the agent starts from scratch.
|
| 63 |
-
"""
|
| 64 |
import shutil
|
| 65 |
MOAV2 = "/app/moav2"
|
| 66 |
sandbox = tempfile.mkdtemp(prefix="moa_env_")
|
| 67 |
|
| 68 |
-
# Copy source tree (812KB) and config files
|
| 69 |
shutil.copytree(os.path.join(MOAV2, "src"), os.path.join(sandbox, "src"))
|
| 70 |
for f in ("package.json", "vitest.config.ts", "tsconfig.json"):
|
| 71 |
src = os.path.join(MOAV2, f)
|
| 72 |
if os.path.exists(src):
|
| 73 |
shutil.copy(src, sandbox)
|
| 74 |
|
| 75 |
-
# Symlink node_modules — no copy needed
|
| 76 |
os.symlink(
|
| 77 |
os.path.join(MOAV2, "node_modules"),
|
| 78 |
os.path.join(sandbox, "node_modules"),
|
|
@@ -80,47 +207,15 @@ class MOAEnv(Environment):
|
|
| 80 |
|
| 81 |
# Blank out the target file — agent must implement it
|
| 82 |
broken_path = os.path.join(sandbox, task["source_file"])
|
|
|
|
| 83 |
with open(broken_path, "w") as f:
|
| 84 |
f.write(task["broken_content"])
|
| 85 |
|
| 86 |
return sandbox
|
| 87 |
|
| 88 |
-
|
| 89 |
-
if not isinstance(action, MOAAction):
|
| 90 |
-
raise ValueError(f"Expected MOAAction, got {type(action)}")
|
| 91 |
-
|
| 92 |
-
self._state.step_count += 1
|
| 93 |
-
|
| 94 |
-
# write agent's fix into sandbox
|
| 95 |
-
sandbox_file = os.path.join(
|
| 96 |
-
self._state.sandbox_dir,
|
| 97 |
-
action.file_path.lstrip("/"),
|
| 98 |
-
)
|
| 99 |
-
os.makedirs(os.path.dirname(sandbox_file), exist_ok=True)
|
| 100 |
-
with open(sandbox_file, "w") as f:
|
| 101 |
-
f.write(action.content)
|
| 102 |
-
|
| 103 |
-
# run tests
|
| 104 |
-
passed, total, output = self._run_tests()
|
| 105 |
-
reward = passed / max(total, 1)
|
| 106 |
-
done = (passed == total and total > 0) or self._state.step_count >= 10
|
| 107 |
-
|
| 108 |
-
self._state.last_reward = reward
|
| 109 |
-
|
| 110 |
-
return MOAObservation(
|
| 111 |
-
task=self._state.current_task,
|
| 112 |
-
broken_file_path=self._state.broken_file_path,
|
| 113 |
-
broken_file_content=action.content, # show what agent submitted
|
| 114 |
-
test_file_content=self._state.test_file_content,
|
| 115 |
-
test_output=output,
|
| 116 |
-
tests_passed=passed,
|
| 117 |
-
tests_total=total,
|
| 118 |
-
reward=reward,
|
| 119 |
-
done=done,
|
| 120 |
-
)
|
| 121 |
|
| 122 |
def _run_tests(self) -> tuple[int, int, str]:
|
| 123 |
-
"""Run only the task's test file for speed (~500ms vs full suite)."""
|
| 124 |
try:
|
| 125 |
result = subprocess.run(
|
| 126 |
["npx", "vitest", "run", "--reporter=verbose",
|
|
|
|
| 1 |
"""
|
| 2 |
+
MOA Code Environment — multi-turn tool-using RL environment.
|
| 3 |
+
|
| 4 |
+
Following OpenEnv RFC 005 (agentic harnesses) pattern:
|
| 5 |
+
reset() → task + broken file stub → agent starts exploring
|
| 6 |
+
step(read/edit/bash) → tool result, no reward yet
|
| 7 |
+
step(submit) → runs vitest → reward = tests_passed / tests_total → done
|
| 8 |
+
|
| 9 |
+
The agent uses the same tool kit as Claude Code:
|
| 10 |
+
read — read any file in the sandbox
|
| 11 |
+
edit — apply an exact string replacement
|
| 12 |
+
bash — run a shell command in the sandbox (timeout 10s)
|
| 13 |
+
submit — trigger tests and end the episode
|
| 14 |
"""
|
| 15 |
|
| 16 |
import os
|
|
|
|
| 23 |
from .models import MOAAction, MOAObservation, MOAState
|
| 24 |
from .tasks import load_task, TASKS
|
| 25 |
|
| 26 |
+
MAX_STEPS = 20
|
| 27 |
+
BASH_TIMEOUT = 10 # seconds per bash command
|
| 28 |
+
READ_MAX_CHARS = 8000
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
+
class MOAEnv(Environment):
|
| 32 |
def __init__(self):
|
| 33 |
self._state = MOAState()
|
| 34 |
+
self._task_index = 0
|
| 35 |
+
|
| 36 |
+
# ── reset ──────────────────────────────────────────────────────
|
| 37 |
|
| 38 |
def reset(self) -> Observation:
|
| 39 |
task_id = TASKS[self._task_index % len(TASKS)]["id"]
|
|
|
|
| 46 |
episode_id=str(uuid.uuid4()),
|
| 47 |
step_count=0,
|
| 48 |
current_task=task["description"],
|
| 49 |
+
user_messages=task.get("user_messages", []),
|
| 50 |
broken_file_path=task["source_file"],
|
| 51 |
broken_file_content=task["broken_content"],
|
| 52 |
test_file_content=task["test_file_content"],
|
|
|
|
| 57 |
|
| 58 |
return MOAObservation(
|
| 59 |
task=task["description"],
|
| 60 |
+
user_messages=task.get("user_messages", []),
|
| 61 |
broken_file_path=task["source_file"],
|
| 62 |
broken_file_content=task["broken_content"],
|
| 63 |
test_file_content=task["test_file_content"],
|
| 64 |
+
tool="reset",
|
| 65 |
+
tool_result="",
|
| 66 |
done=False,
|
| 67 |
+
step_count=0,
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
# ── step ───────────────────────────────────────────────────────
|
| 71 |
+
|
| 72 |
+
def step(self, action: Action) -> Observation:
|
| 73 |
+
if not isinstance(action, MOAAction):
|
| 74 |
+
raise ValueError(f"Expected MOAAction, got {type(action)}")
|
| 75 |
+
|
| 76 |
+
self._state.step_count += 1
|
| 77 |
+
tool = action.tool
|
| 78 |
+
params = action.params
|
| 79 |
+
|
| 80 |
+
# ── submit: run tests, end episode ──
|
| 81 |
+
if tool == "submit":
|
| 82 |
+
passed, total, output = self._run_tests()
|
| 83 |
+
reward = passed / max(total, 1)
|
| 84 |
+
self._state.last_reward = reward
|
| 85 |
+
return MOAObservation(
|
| 86 |
+
task=self._state.current_task,
|
| 87 |
+
user_messages=self._state.user_messages,
|
| 88 |
+
broken_file_path=self._state.broken_file_path,
|
| 89 |
+
tool="submit",
|
| 90 |
+
tool_result="",
|
| 91 |
+
test_output=output,
|
| 92 |
+
tests_passed=passed,
|
| 93 |
+
tests_total=total,
|
| 94 |
+
reward=reward,
|
| 95 |
+
done=True,
|
| 96 |
+
step_count=self._state.step_count,
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
# ── tool calls ──
|
| 100 |
+
try:
|
| 101 |
+
if tool == "read":
|
| 102 |
+
result = self._tool_read(params.get("path", ""))
|
| 103 |
+
elif tool == "edit":
|
| 104 |
+
result = self._tool_edit(
|
| 105 |
+
params.get("path", ""),
|
| 106 |
+
params.get("old_string", ""),
|
| 107 |
+
params.get("new_string", ""),
|
| 108 |
+
)
|
| 109 |
+
elif tool == "bash":
|
| 110 |
+
result = self._tool_bash(params.get("cmd", ""))
|
| 111 |
+
else:
|
| 112 |
+
result = f"Unknown tool '{tool}'. Available: read, edit, bash, submit"
|
| 113 |
+
except Exception as e:
|
| 114 |
+
result = f"Error: {e}"
|
| 115 |
+
|
| 116 |
+
# max steps → auto-submit
|
| 117 |
+
done = self._state.step_count >= MAX_STEPS
|
| 118 |
+
if done:
|
| 119 |
+
passed, total, output = self._run_tests()
|
| 120 |
+
reward = passed / max(total, 1)
|
| 121 |
+
self._state.last_reward = reward
|
| 122 |
+
else:
|
| 123 |
+
reward, passed, total, output = 0.0, 0, 0, ""
|
| 124 |
+
|
| 125 |
+
return MOAObservation(
|
| 126 |
+
task=self._state.current_task,
|
| 127 |
+
user_messages=self._state.user_messages,
|
| 128 |
+
broken_file_path=self._state.broken_file_path,
|
| 129 |
+
tool=tool,
|
| 130 |
+
tool_result=result,
|
| 131 |
+
test_output=output,
|
| 132 |
+
tests_passed=passed,
|
| 133 |
+
tests_total=total,
|
| 134 |
+
reward=reward,
|
| 135 |
+
done=done,
|
| 136 |
+
step_count=self._state.step_count,
|
| 137 |
)
|
| 138 |
|
| 139 |
+
# ── tools ──────────────────────────────────────────────────────
|
| 140 |
+
|
| 141 |
+
def _sandbox_path(self, rel_path: str) -> str:
|
| 142 |
+
"""Resolve a relative path to the sandbox, blocking directory traversal."""
|
| 143 |
+
clean = rel_path.lstrip("/")
|
| 144 |
+
full = os.path.realpath(os.path.join(self._state.sandbox_dir, clean))
|
| 145 |
+
if not full.startswith(os.path.realpath(self._state.sandbox_dir)):
|
| 146 |
+
raise ValueError("Path escapes sandbox")
|
| 147 |
+
return full
|
| 148 |
+
|
| 149 |
+
def _tool_read(self, path: str) -> str:
|
| 150 |
+
full = self._sandbox_path(path)
|
| 151 |
+
if not os.path.isfile(full):
|
| 152 |
+
return f"Error: file not found: {path}"
|
| 153 |
+
with open(full) as f:
|
| 154 |
+
content = f.read(READ_MAX_CHARS)
|
| 155 |
+
if len(content) == READ_MAX_CHARS:
|
| 156 |
+
content += "\n... (truncated)"
|
| 157 |
+
return content
|
| 158 |
+
|
| 159 |
+
def _tool_edit(self, path: str, old_string: str, new_string: str) -> str:
|
| 160 |
+
full = self._sandbox_path(path)
|
| 161 |
+
if not os.path.isfile(full):
|
| 162 |
+
return f"Error: file not found: {path}"
|
| 163 |
+
with open(full) as f:
|
| 164 |
+
original = f.read()
|
| 165 |
+
if old_string not in original:
|
| 166 |
+
return f"Error: old_string not found in {path}"
|
| 167 |
+
updated = original.replace(old_string, new_string, 1)
|
| 168 |
+
with open(full, "w") as f:
|
| 169 |
+
f.write(updated)
|
| 170 |
+
lines_changed = new_string.count("\n") - old_string.count("\n")
|
| 171 |
+
return f"Edited {path} ({lines_changed:+d} lines)"
|
| 172 |
+
|
| 173 |
+
def _tool_bash(self, cmd: str) -> str:
|
| 174 |
+
try:
|
| 175 |
+
result = subprocess.run(
|
| 176 |
+
cmd,
|
| 177 |
+
shell=True,
|
| 178 |
+
cwd=self._state.sandbox_dir,
|
| 179 |
+
capture_output=True,
|
| 180 |
+
text=True,
|
| 181 |
+
timeout=BASH_TIMEOUT,
|
| 182 |
+
)
|
| 183 |
+
out = (result.stdout + result.stderr)[-3000:]
|
| 184 |
+
return out if out else "(no output)"
|
| 185 |
+
except subprocess.TimeoutExpired:
|
| 186 |
+
return f"Error: command timed out after {BASH_TIMEOUT}s"
|
| 187 |
+
except Exception as e:
|
| 188 |
+
return f"Error: {e}"
|
| 189 |
+
|
| 190 |
+
# ── sandbox setup ──────────────────────────────────────────────
|
| 191 |
+
|
| 192 |
def _make_sandbox(self, task: dict) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
import shutil
|
| 194 |
MOAV2 = "/app/moav2"
|
| 195 |
sandbox = tempfile.mkdtemp(prefix="moa_env_")
|
| 196 |
|
|
|
|
| 197 |
shutil.copytree(os.path.join(MOAV2, "src"), os.path.join(sandbox, "src"))
|
| 198 |
for f in ("package.json", "vitest.config.ts", "tsconfig.json"):
|
| 199 |
src = os.path.join(MOAV2, f)
|
| 200 |
if os.path.exists(src):
|
| 201 |
shutil.copy(src, sandbox)
|
| 202 |
|
|
|
|
| 203 |
os.symlink(
|
| 204 |
os.path.join(MOAV2, "node_modules"),
|
| 205 |
os.path.join(sandbox, "node_modules"),
|
|
|
|
| 207 |
|
| 208 |
# Blank out the target file — agent must implement it
|
| 209 |
broken_path = os.path.join(sandbox, task["source_file"])
|
| 210 |
+
os.makedirs(os.path.dirname(broken_path), exist_ok=True)
|
| 211 |
with open(broken_path, "w") as f:
|
| 212 |
f.write(task["broken_content"])
|
| 213 |
|
| 214 |
return sandbox
|
| 215 |
|
| 216 |
+
# ── test runner ──────────────���─────────────────────────────────
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
def _run_tests(self) -> tuple[int, int, str]:
|
|
|
|
| 219 |
try:
|
| 220 |
result = subprocess.run(
|
| 221 |
["npx", "vitest", "run", "--reporter=verbose",
|
src/envs/moa_env/models.py
CHANGED
|
@@ -1,33 +1,57 @@
|
|
| 1 |
"""
|
| 2 |
Models for the MOA Code Environment.
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
| 4 |
"""
|
| 5 |
|
| 6 |
from __future__ import annotations
|
| 7 |
from dataclasses import dataclass, field
|
| 8 |
-
from typing import
|
| 9 |
from core.env_server import Action, Observation, State
|
| 10 |
|
| 11 |
|
| 12 |
@dataclass
|
| 13 |
class MOAAction(Action):
|
| 14 |
-
"""
|
| 15 |
-
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
|
| 19 |
@dataclass
|
| 20 |
class MOAObservation(Observation):
|
| 21 |
"""What the agent sees at each step."""
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
tests_passed: int = 0
|
| 28 |
tests_total: int = 0
|
|
|
|
| 29 |
reward: float = 0.0
|
| 30 |
done: bool = False
|
|
|
|
| 31 |
|
| 32 |
|
| 33 |
@dataclass
|
|
@@ -36,9 +60,11 @@ class MOAState(State):
|
|
| 36 |
episode_id: str = ""
|
| 37 |
step_count: int = 0
|
| 38 |
current_task: str = ""
|
|
|
|
| 39 |
broken_file_path: str = ""
|
| 40 |
broken_file_content: str = ""
|
| 41 |
test_file_content: str = ""
|
| 42 |
sandbox_dir: str = ""
|
| 43 |
test_file: str = ""
|
| 44 |
last_reward: float = 0.0
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
Models for the MOA Code Environment.
|
| 3 |
+
|
| 4 |
+
Multi-turn tool-using environment following OpenEnv RFC 005 (agentic harnesses).
|
| 5 |
+
The agent calls tools (read/edit/bash) across multiple steps, then submits to
|
| 6 |
+
trigger the test suite. Reward = tests_passed / tests_total on submit.
|
| 7 |
"""
|
| 8 |
|
| 9 |
from __future__ import annotations
|
| 10 |
from dataclasses import dataclass, field
|
| 11 |
+
from typing import List
|
| 12 |
from core.env_server import Action, Observation, State
|
| 13 |
|
| 14 |
|
| 15 |
@dataclass
|
| 16 |
class MOAAction(Action):
|
| 17 |
+
"""One tool call from the agent.
|
| 18 |
+
|
| 19 |
+
tool: "read" | "edit" | "bash" | "submit"
|
| 20 |
+
params: tool-specific parameters
|
| 21 |
+
|
| 22 |
+
read: {"path": "src/foo.ts"}
|
| 23 |
+
edit: {"path": "src/foo.ts", "old_string": "...", "new_string": "..."}
|
| 24 |
+
bash: {"cmd": "npx tsc --noEmit 2>&1 | head -20"}
|
| 25 |
+
submit: {} — runs the test suite and ends the episode
|
| 26 |
+
"""
|
| 27 |
+
tool: str
|
| 28 |
+
params: dict = field(default_factory=dict)
|
| 29 |
|
| 30 |
|
| 31 |
@dataclass
|
| 32 |
class MOAObservation(Observation):
|
| 33 |
"""What the agent sees at each step."""
|
| 34 |
+
# Present on reset and every step so agent always has context
|
| 35 |
+
task: str = ""
|
| 36 |
+
broken_file_path: str = ""
|
| 37 |
+
user_messages: List[str] = field(default_factory=list)
|
| 38 |
+
|
| 39 |
+
# Set on reset only (initial state)
|
| 40 |
+
broken_file_content: str = ""
|
| 41 |
+
test_file_content: str = ""
|
| 42 |
+
|
| 43 |
+
# Set after each tool call
|
| 44 |
+
tool: str = "" # which tool was just called
|
| 45 |
+
tool_result: str = "" # output / result of the tool
|
| 46 |
+
|
| 47 |
+
# Set only on submit (final step)
|
| 48 |
+
test_output: str = ""
|
| 49 |
tests_passed: int = 0
|
| 50 |
tests_total: int = 0
|
| 51 |
+
|
| 52 |
reward: float = 0.0
|
| 53 |
done: bool = False
|
| 54 |
+
step_count: int = 0
|
| 55 |
|
| 56 |
|
| 57 |
@dataclass
|
|
|
|
| 60 |
episode_id: str = ""
|
| 61 |
step_count: int = 0
|
| 62 |
current_task: str = ""
|
| 63 |
+
user_messages: List[str] = field(default_factory=list)
|
| 64 |
broken_file_path: str = ""
|
| 65 |
broken_file_content: str = ""
|
| 66 |
test_file_content: str = ""
|
| 67 |
sandbox_dir: str = ""
|
| 68 |
test_file: str = ""
|
| 69 |
last_reward: float = 0.0
|
| 70 |
+
max_steps: int = 20
|
src/envs/moa_env/tasks.py
CHANGED
|
@@ -2,6 +2,9 @@
|
|
| 2 |
Tasks for the MOA RL environment.
|
| 3 |
Uses real moav2 service files. Source and tests are pre-installed in /app/moav2.
|
| 4 |
Each task blanks out one service file — the agent must re-implement it.
|
|
|
|
|
|
|
|
|
|
| 5 |
"""
|
| 6 |
import os
|
| 7 |
|
|
@@ -10,6 +13,12 @@ MOAV2_DIR = "/app/moav2"
|
|
| 10 |
TASKS = [
|
| 11 |
{
|
| 12 |
"id": "task_001",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
"description": (
|
| 14 |
"Implement resolveModel() in model-resolver.ts. "
|
| 15 |
"It maps (modelId, authMethod) to a Model object using the pi-ai registry. "
|
|
@@ -30,6 +39,12 @@ TASKS = [
|
|
| 30 |
},
|
| 31 |
{
|
| 32 |
"id": "task_002",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
"description": (
|
| 34 |
"Implement retry.ts with three exports: "
|
| 35 |
"isRetryableError(e) returns true for HTTP 429/5xx and common retry keywords. "
|
|
@@ -53,6 +68,12 @@ TASKS = [
|
|
| 53 |
},
|
| 54 |
{
|
| 55 |
"id": "task_003",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
"description": (
|
| 57 |
"Implement EventStore in event-store.ts. "
|
| 58 |
"It persists events to a DB with append(event), query(filter), "
|
|
|
|
| 2 |
Tasks for the MOA RL environment.
|
| 3 |
Uses real moav2 service files. Source and tests are pre-installed in /app/moav2.
|
| 4 |
Each task blanks out one service file — the agent must re-implement it.
|
| 5 |
+
|
| 6 |
+
user_messages: real messages from MOA dev sessions. The model learns to map
|
| 7 |
+
terse, imprecise human intent directly to correct TypeScript implementations.
|
| 8 |
"""
|
| 9 |
import os
|
| 10 |
|
|
|
|
| 13 |
TASKS = [
|
| 14 |
{
|
| 15 |
"id": "task_001",
|
| 16 |
+
"user_messages": [
|
| 17 |
+
"the model resolver isn't working, it keeps throwing not implemented",
|
| 18 |
+
"we need it to support anthropic key, oauth, and vertex auth methods",
|
| 19 |
+
"if the registry lookup fails it should scan all providers",
|
| 20 |
+
"fall back to a custom config with providerBaseUrl if nothing works",
|
| 21 |
+
],
|
| 22 |
"description": (
|
| 23 |
"Implement resolveModel() in model-resolver.ts. "
|
| 24 |
"It maps (modelId, authMethod) to a Model object using the pi-ai registry. "
|
|
|
|
| 39 |
},
|
| 40 |
{
|
| 41 |
"id": "task_002",
|
| 42 |
+
"user_messages": [
|
| 43 |
+
"add retry logic to the api client, it's failing on rate limits",
|
| 44 |
+
"need exponential backoff, start at 500ms",
|
| 45 |
+
"also needs to handle 429 and 5xx errors",
|
| 46 |
+
"the sleep function should respect an abort signal",
|
| 47 |
+
],
|
| 48 |
"description": (
|
| 49 |
"Implement retry.ts with three exports: "
|
| 50 |
"isRetryableError(e) returns true for HTTP 429/5xx and common retry keywords. "
|
|
|
|
| 68 |
},
|
| 69 |
{
|
| 70 |
"id": "task_003",
|
| 71 |
+
"user_messages": [
|
| 72 |
+
"we need an event store that persists to the db",
|
| 73 |
+
"it needs append, query, search, count, and materialize",
|
| 74 |
+
"materialize should reconstruct session state from events",
|
| 75 |
+
"search should do full text search across event data",
|
| 76 |
+
],
|
| 77 |
"description": (
|
| 78 |
"Implement EventStore in event-store.ts. "
|
| 79 |
"It persists events to a DB with append(event), query(filter), "
|