File size: 2,042 Bytes
cacd58c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# task_generator.py — generate task variants programmatically
"""
Generates variants of each task by injecting different bug patterns.
Used to build a larger task pool for robust RL training.
"""
import random
from typing import Iterator

BUG_PATTERNS = [
    # General Logic Bugs
    ("off_by_one_minus", "len(arr)",        "len(arr) - 1"),
    ("off_by_one_plus",  "range(n)",         "range(n + 1)"),
    ("wrong_operator",   "current + nums[i]","current - nums[i]"),
    ("wrong_init",       "max_sum = arr[0]", "max_sum = 0"),
    ("wrong_comparison", "if a > b",         "if a >= b"),
    ("wrong_return",     "return result",    "return result - 1"),
    ("wrong_boolean",    "if not ",          "if "),

    # String Parsing Bugs (targets task_medium)
    ("wrong_split",      "split(';')",       "split(',')"),
    ("missing_strip_1",  "key.strip()",      "key"),
    ("missing_strip_2",  "value.strip()",    "value"),

    # Dictionary/List Bugs (targets task_hard)
    ("wrong_enumerate",  "enumerate(v)",     "enumerate(v, start=1)"),
    ("wrong_recursion",  "new_key, sep)",    "new_key, '/')"),
    ("missing_str_cast", "str_k = str(k)",   "str_k = k"),
    ("wrong_list_index", "str(i)",           "str(i+1)"),
    ("wrong_dict_check", "if not v:",        "if v:"),
]


def inject_bug(code: str, pattern: tuple) -> str:
    _, find, replace = pattern
    if find in code:
        return code.replace(find, replace, 1)
    return code  # pattern not applicable to this code


def generate_task_variants(base_task: dict, n: int = 20) -> Iterator[dict]:
    """Yield n variants of base_task with randomly injected bugs."""
    for i in range(n):
        pattern = random.choice(BUG_PATTERNS)
        buggy = inject_bug(base_task["clean_code"], pattern)
        if buggy == base_task["clean_code"]:
            continue  # pattern not applicable
        yield {
            **base_task,
            "task_id": f"{base_task['task_id']}_v{i:03d}",
            "buggy_code": buggy,
            "bug_pattern": pattern[0],
        }