pandelis's picture
Add Zerolang editing environment
bb1b296 verified
"""Synthetic task corpus for the Zerolang editing environment."""
from __future__ import annotations
from .task_builders import (
_branch_literal_task,
_call_task,
_condition_task,
_diagnostic_task,
_helper_task,
_literal_task,
_two_helper_task,
)
from .train_tasks import TRAIN_TASKS
EVAL_TASKS: list[dict[str, Any]] = [
_literal_task(
"literal-string-graph-patch",
"hello from zero",
"hello graph",
"Change the printed string from hello from zero to hello graph.",
),
_literal_task(
"repair-unknown-message",
"draft",
"fixed by zero",
'Replace the string literal "draft\\n" with "fixed by zero\\n".',
),
_literal_task(
"literal-status-ready",
"status: draft",
"status: ready",
'Replace the string literal "status: draft\\n" with "status: ready\\n".',
),
_literal_task(
"literal-counter-pass",
"counter failed",
"counter passed",
'Replace the string literal "counter failed\\n" with "counter passed\\n".',
),
_literal_task(
"literal-agent-graph",
"agent used text",
"agent used graph",
'Replace the string literal "agent used text\\n" with "agent used graph\\n".',
),
*[
_literal_task(task_id, old, new)
for task_id, old, new in [
("literal-alpha-beta", "alpha", "beta"),
("literal-start-finish", "start", "finish"),
("literal-left-right", "left", "right"),
("literal-plan-done", "plan pending", "plan done"),
("literal-state-green", "state: red", "state: green"),
("literal-cache-hot", "cache cold", "cache hot"),
]
],
*[
_literal_task(task_id, old, new, goal)
for task_id, old, new, goal in [
(
"literal-colon-version",
"status: init [v1]",
"status: init [v2]",
"Update the status bracket code from [v1] to [v2].",
),
(
"literal-api-version",
"load path /api/v1/health",
"load path /api/v2/health",
"Switch the printed endpoint from v1 to v2 while keeping the same path.",
),
(
"literal-score-number",
"score: 42/100",
"score: 99/100",
"Change the score text from 42 to 99.",
),
(
"literal-status-code",
"error: [404] failed",
"error: [200] resolved",
"Edit the status code label from 404 to 200 in brackets.",
),
(
"literal-progress-percent",
"progress: 50% complete",
"progress: 75% complete",
"Update the progress percentage from 50 to 75.",
),
(
"literal-time-stamp",
"time stamp 12:34",
"time stamp 13:00",
"Change the time from 12:34 to 13:00 in the output string.",
),
(
"literal-list-separator",
"list [a/b/c]",
"list [a-b-c]",
"Adjust the list label to use dashes instead of slashes.",
),
(
"literal-coordinate-label",
"coords (x:1,y:2)",
"coords (x:3,y:4)",
"Update the coordinate label from (x:1,y:2) to (x:3,y:4).",
),
]
],
*[
_branch_literal_task(task_id, helper, old, new)
for task_id, helper, old, new in [
("branch-literal-ready-version", "ready", "ready v1", "ready v2"),
("branch-literal-mode-active", "can_send", "mode: standby", "mode: active"),
("branch-literal-status-ok", "enabled", "status: ok [404]", "status: ok [200]"),
("branch-literal-step-count", "feature_flag", "steps: 1/3 complete", "steps: 2/3 complete"),
("branch-literal-coordinate", "should_emit", "coords (x:1,y:1)", "coords (x:2,y:2)"),
("branch-literal-check-pass", "allow_output", "check: fail", "check: pass"),
("branch-literal-health", "should_log", "health: warn", "health: ok"),
("branch-literal-phase", "gate_open", "phase: draft", "phase: final"),
]
],
*[
_helper_task(task_id, helper, source_expr, target_expr, expected, output)
for task_id, helper, source_expr, target_expr, expected, output in [
("helper-score-42", "score", "40 + 1", "40 + 2", 42, "ready"),
("helper-answer-41", "answer", "20 + 20", "20 + 21", 41, "green light"),
("helper-total-18", "total", "30 - 13", "30 - 12", 18, "total ok"),
("helper-count-9", "count", "12 - 4", "12 - 3", 9, "count passed"),
("helper-value-15", "value", "7 + 7", "7 + 8", 15, "value good"),
("helper-limit-24", "limit", "50 - 27", "50 - 26", 24, "limit open"),
("helper-score-31", "score", "15 + 15", "15 + 16", 31, "score matched"),
("helper-answer-8", "answer", "10 - 3", "10 - 2", 8, "done"),
]
],
*[
_two_helper_task(task_id, helper, other, source_expr, target_expr, other_expr, expected)
for task_id, helper, other, source_expr, target_expr, other_expr, expected in [
("two-helper-score", "score", "spare", "12 + 9", "13 + 9", "30 - 8", 22),
("two-helper-total", "total", "backup", "40 - 19", "41 - 19", "5 + 7", 22),
("two-helper-count", "count", "idle", "18 + 3", "19 + 3", "14 - 2", 22),
("two-helper-value", "value", "other", "55 - 35", "56 - 35", "6 + 2", 21),
("two-helper-answer", "answer", "spare", "27 - 7", "28 - 7", "8 + 1", 21),
("two-helper-level", "level", "helper", "9 + 10", "10 + 10", "40 - 3", 20),
("two-helper-points", "points", "extra", "64 - 46", "65 - 46", "11 + 4", 19),
("two-helper-result", "result", "unused", "33 - 12", "34 - 12", "2 + 2", 22),
]
],
*[
_call_task(task_id, source_args, target_args, expected)
for task_id, source_args, target_args, expected in [
("call-update-five", "1, 1", "4, 1", 5),
("call-update-ten", "7, 1", "7, 3", 10),
("call-update-eleven", "0, 6", "5, 6", 11),
("call-update-twenty", "9, 9", "11, 9", 20),
("call-update-seven", "2, 2", "5, 2", 7),
("call-update-twelve", "10, 0", "10, 2", 12),
("call-update-sixteen", "8, 4", "8, 8", 16),
("call-update-thirteen", "6, 6", "6, 7", 13),
]
],
*[
_condition_task(task_id, helper, returned, source_compare, "match found")
for task_id, helper, returned, source_compare in [
("condition-count-four", "count", 4, 1),
("condition-level-nine", "level", 9, 2),
("condition-token-twelve", "token", 12, 8),
("condition-value-fifteen", "value", 15, 10),
("condition-flag-six", "flag", 6, 3),
("condition-score-eleven", "score", 11, 7),
("condition-count-eight", "count", 8, 4),
("condition-marker-fourteen", "marker", 14, 0),
]
],
*[
_diagnostic_task(task_id, message)
for task_id, message in [
("diagnostic-starting-up", "starting up"),
("diagnostic-hello-main", "hello from main"),
("diagnostic-message", "diagnostic message"),
("diagnostic-payload-logged", "payload logged"),
("diagnostic-attempt-write", "attempt write"),
("diagnostic-retrying-output", "retrying output"),
("diagnostic-done", "done"),
("diagnostic-needs-raises", "needs raises"),
]
],
]
SYNTHETIC_TASKS: list[dict[str, Any]] = [*EVAL_TASKS, *TRAIN_TASKS]