File size: 3,957 Bytes
3ba81b5
57a6d0c
3ba81b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57a6d0c
 
 
3ba81b5
57a6d0c
3ba81b5
 
 
57a6d0c
3ba81b5
57a6d0c
 
 
 
3ba81b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57a6d0c
3ba81b5
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
from __future__ import annotations

import re


PROPOSER_PROMPT = """You are the Proposer in a debugging self-play game.

Given a clean Python function, inject a realistic logical bug into it.

Rules:

- Make exactly one small logical change.

- Keep the code valid Python.

- Keep the same function signature.

- Preserve the overall structure and formatting as much as possible.

- Prefer one of these mutation families: off_by_one, wrong_operator, wrong_builtin,

  condition_negation, loop_boundary_shift, or slice_boundary_corruption.

- Aim for an edge-case behavior change, not a cosmetic refactor.

- Avoid helper extraction, renaming-only edits, comment-only changes, or multi-line rewrites.

- Return only the full modified Python code inside triple backticks.

{focus_instruction}



Clean function:

```python

{code}

```

"""

SOLVER_PROMPT_FULL = """You are the Solver in a debugging self-play game.

The following Python code is failing its tests.

Repair the bug and return the full fixed Python code inside triple backticks.



Buggy function:

```python

{code}

```



Observed failure:

{execution_result}

"""

SOLVER_PROMPT_CONCISE = """You are the Solver in a debugging self-play game.

Fix the bug with the smallest correct local change and return only the full fixed Python code inside triple backticks.



Buggy function:

```python

{code}

```



Failure summary:

{execution_result}

"""

TRACEBACK_HINTS = (
    "Traceback",
    "AssertionError",
    "SyntaxError",
    "TypeError",
    "NameError",
    "ValueError",
    "IndexError",
    "KeyError",
    "ZeroDivisionError",
    "RuntimeError",
    "Timeout",
)


def summarize_failure_output(execution_result: str, *, max_lines: int = 3, max_chars: int = 220) -> str:
    text = execution_result.strip()
    if not text:
        return "No failure output provided."

    if text in {"Unsafe import detected.", "Execution timed out."} or text.startswith("SyntaxError:"):
        return _truncate_text(text, max_chars)

    lines = [line.strip() for line in text.splitlines() if line.strip()]
    if not lines:
        return "No failure output provided."

    traceback_positions = [idx for idx, line in enumerate(lines) if "Traceback" in line]
    if traceback_positions:
        tail = lines[traceback_positions[-1] :]
        if len(tail) > max_lines:
            lines = [tail[0], *tail[-(max_lines - 1) :]]
        else:
            lines = tail
    else:
        interesting_lines = [line for line in lines if any(hint in line for hint in TRACEBACK_HINTS)]
        if interesting_lines:
            lines = interesting_lines[-max_lines:]
        else:
            lines = lines[-max_lines:]

    summary = "\n".join(lines)
    return _truncate_text(summary, max_chars)


def _truncate_text(text: str, max_chars: int) -> str:
    cleaned = re.sub(r"[ \t]+", " ", text.strip())
    if len(cleaned) <= max_chars:
        return cleaned
    return cleaned[: max(0, max_chars - 3)].rstrip() + "..."


def sample_proposer_prompt(code: str, bug_focus: str | None = None) -> str:
    focus_instruction = ""
    if bug_focus:
        focus_instruction = (
            f"- Focus specifically on the `{bug_focus}` mutation family.\n"
            "- Keep the edit local so the bug can be repaired with a small fix."
        )
    return PROPOSER_PROMPT.format(code=code, focus_instruction=focus_instruction)


def sample_solver_prompt(

    code: str,

    execution_result: str = "",

    *,

    mode: str = "concise",

) -> str:
    failure_output = summarize_failure_output(execution_result)
    if mode == "full":
        failure_output = execution_result.strip() if execution_result.strip() else "No failure output provided."
        return SOLVER_PROMPT_FULL.format(code=code, execution_result=failure_output)
    return SOLVER_PROMPT_CONCISE.format(code=code, execution_result=failure_output)