Spaces:
Runtime error
Runtime error
| """ | |
| Code Mutation Engine β makes each episode surface unique. | |
| Each call to ``mutate_task()`` returns a deep copy of a task with three | |
| deterministic, seed-controlled transforms applied: | |
| 1. Variable renaming β one identifier swapped for a drop-in synonym so | |
| the agent cannot memorise exact token strings. | |
| 2. Line shifting β one blank line inserted above the first issue, | |
| shifting all issue line_ranges down by 1. | |
| 3. Constant variance β one numeric literal nudged Β±1 so the agent | |
| sees a fresh surface without changing the bug. | |
| Mutations are fully deterministic given a seed, making training runs | |
| reproducible while still presenting a different surface each episode. | |
| Design constraint | |
| ----------------- | |
| Mutations must NEVER change *whether* a bug exists or *which line category* | |
| it belongs to. Only surface tokens and line positions may change. | |
| """ | |
| from __future__ import annotations | |
| import copy | |
| import random | |
| import re | |
| from typing import Any | |
| # ββ Variable synonym table βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Maps original identifier β list of semantically equivalent drop-in synonyms. | |
| # Only single-token renames that do not affect runtime behaviour are listed. | |
| _IDENTIFIER_SYNONYMS: dict[str, list[str]] = { | |
| "total": ["acc", "running_total", "summed"], | |
| "numbers": ["values", "nums", "items"], | |
| "result": ["output", "response", "ret"], | |
| "data": ["payload", "records", "entries"], | |
| "item": ["record", "entry", "obj"], | |
| "items": ["records", "entries", "objects"], | |
| "user": ["account", "principal", "member"], | |
| "users": ["accounts", "principals", "members"], | |
| "password": ["passwd", "secret", "credential"], | |
| "username": ["user_name", "login", "uname"], | |
| "command": ["cmd", "instruction", "directive"], | |
| "filename": ["file_name", "fname", "path_name"], | |
| "url": ["endpoint", "uri", "address"], | |
| "attempt": ["try_num", "iteration", "retry_idx"], | |
| "counter": ["count", "tally", "n"], | |
| "session": ["conn", "http_session", "client"], | |
| "results": ["findings", "collected", "gathered"], | |
| "cache": ["store", "lookup", "memo"], | |
| "transformed": ["processed", "mapped", "converted"], | |
| } | |
| # Minimum numeric literal value after variance nudge (avoids nonsensical 0 or 1). | |
| _MIN_CONSTANT_VALUE: int = 2 | |
| def mutate_task(base_task: dict[str, Any], seed: int) -> dict[str, Any]: | |
| """ | |
| Return a mutated deep-copy of *base_task* using *seed* for reproducibility. | |
| The returned task is structurally identical to the original β same keys, | |
| same issue ids, same categories β but with surface-level code changes and | |
| adjusted line_ranges to match. | |
| """ | |
| rng = random.Random(seed) | |
| mutated_task: dict[str, Any] = copy.deepcopy(base_task) | |
| source_code: str = mutated_task["code"] | |
| issues: list[dict[str, Any]] = mutated_task["issues"] | |
| source_code, issues = _apply_variable_rename(source_code, issues, rng) | |
| source_code, issues = _apply_line_shift(source_code, issues) | |
| source_code = _apply_constant_variance(source_code, rng) | |
| mutated_task["code"] = source_code | |
| mutated_task["issues"] = issues | |
| mutated_task["_mutation_seed"] = seed | |
| return mutated_task | |
| # ββ Private mutation helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _apply_variable_rename( | |
| source_code: str, | |
| issues: list[dict[str, Any]], | |
| rng: random.Random, | |
| ) -> tuple[str, list[dict[str, Any]]]: | |
| """ | |
| Swap one identifier in the source for a synonym from _IDENTIFIER_SYNONYMS. | |
| Also updates each issue's keyword list so the grader continues to match | |
| after the rename. | |
| """ | |
| # \b word-boundary anchors prevent partial substitutions such as | |
| # replacing 'data' inside 'database' or 'user' inside 'username'. | |
| renameable = [orig for orig in _IDENTIFIER_SYNONYMS if re.search(rf"\b{orig}\b", source_code)] | |
| if not renameable: | |
| return source_code, issues | |
| original_identifier = rng.choice(renameable) | |
| replacement_identifier = rng.choice(_IDENTIFIER_SYNONYMS[original_identifier]) | |
| source_code = re.sub(rf"\b{original_identifier}\b", replacement_identifier, source_code) | |
| # Keep issue keywords in sync so the grader still matches post-rename. | |
| for issue in issues: | |
| issue["keywords"] = [ | |
| replacement_identifier if kw == original_identifier else kw | |
| for kw in issue["keywords"] | |
| ] | |
| return source_code, issues | |
| def _apply_line_shift( | |
| source_code: str, | |
| issues: list[dict[str, Any]], | |
| ) -> tuple[str, list[dict[str, Any]]]: | |
| """ | |
| Insert one blank line above the first issue, shifting all line_ranges down by 1. | |
| Forces the agent to re-read the code each episode rather than relying on | |
| memorised line numbers. | |
| """ | |
| if not issues: | |
| return source_code, issues | |
| first_issue_line = min(iss["line_range"][0] for iss in issues) | |
| # Convert 1-based line number to 0-based list index. | |
| # first_issue_line is 1-based; subtract 2 to get the 0-based index of the | |
| # line immediately above it (where the blank line will be inserted). | |
| insert_position = max(0, first_issue_line - 2) | |
| lines = source_code.split("\n") | |
| lines.insert(insert_position, "") | |
| source_code = "\n".join(lines) | |
| for issue in issues: | |
| start, end = issue["line_range"] | |
| issue["line_range"] = (start + 1, end + 1) | |
| return source_code, issues | |
| def _apply_constant_variance(source_code: str, rng: random.Random) -> str: | |
| """ | |
| Nudge one numeric literal by Β±1 to vary the code surface without changing | |
| which bug is present. | |
| Numbers that appear only inside a comment on the same line are excluded to | |
| avoid corrupting annotated line references. | |
| """ | |
| # Match literals >= 2 only β nudging 0 or 1 could produce 0 or a negative | |
| # value, breaking constructs like range(1) or timeout=1. | |
| # The lookahead on comment text prevents shifting annotated line references | |
| # that appear in inline comments (e.g. '# line 42'). | |
| numeric_matches = [ | |
| match | |
| for match in re.finditer(r"\b([2-9]|[1-9]\d+)\b", source_code) | |
| if not re.search(r"#[^\n]*" + re.escape(match.group()), source_code[: match.end()]) | |
| ] | |
| if not numeric_matches: | |
| return source_code | |
| chosen_match = rng.choice(numeric_matches) | |
| original_value = int(chosen_match.group()) | |
| nudge = rng.choice([-1, 1]) | |
| new_value = max(_MIN_CONSTANT_VALUE, original_value + nudge) | |
| return source_code[: chosen_match.start()] + str(new_value) + source_code[chosen_match.end() :] | |
| __all__ = ["mutate_task"] | |