debugZero / training /dual_role_sampler.py
The-Fool-09's picture
Upload folder using huggingface_hub
3ba81b5 verified
from __future__ import annotations
import re
PROPOSER_PROMPT = """You are the Proposer in a debugging self-play game.
Given a clean Python function, inject a realistic logical bug into it.
Rules:
- Make exactly one small logical change.
- Keep the code valid Python.
- Keep the same function signature.
- Preserve the overall structure and formatting as much as possible.
- Prefer one of these mutation families: off_by_one, wrong_operator, wrong_builtin,
condition_negation, loop_boundary_shift, or slice_boundary_corruption.
- Aim for an edge-case behavior change, not a cosmetic refactor.
- Avoid helper extraction, renaming-only edits, comment-only changes, or multi-line rewrites.
- Return only the full modified Python code inside triple backticks.
{focus_instruction}
Clean function:
```python
{code}
```
"""
SOLVER_PROMPT_FULL = """You are the Solver in a debugging self-play game.
The following Python code is failing its tests.
Repair the bug and return the full fixed Python code inside triple backticks.
Buggy function:
```python
{code}
```
Observed failure:
{execution_result}
"""
SOLVER_PROMPT_CONCISE = """You are the Solver in a debugging self-play game.
Fix the bug with the smallest correct local change and return only the full fixed Python code inside triple backticks.
Buggy function:
```python
{code}
```
Failure summary:
{execution_result}
"""
TRACEBACK_HINTS = (
"Traceback",
"AssertionError",
"SyntaxError",
"TypeError",
"NameError",
"ValueError",
"IndexError",
"KeyError",
"ZeroDivisionError",
"RuntimeError",
"Timeout",
)
def summarize_failure_output(execution_result: str, *, max_lines: int = 3, max_chars: int = 220) -> str:
text = execution_result.strip()
if not text:
return "No failure output provided."
if text in {"Unsafe import detected.", "Execution timed out."} or text.startswith("SyntaxError:"):
return _truncate_text(text, max_chars)
lines = [line.strip() for line in text.splitlines() if line.strip()]
if not lines:
return "No failure output provided."
traceback_positions = [idx for idx, line in enumerate(lines) if "Traceback" in line]
if traceback_positions:
tail = lines[traceback_positions[-1] :]
if len(tail) > max_lines:
lines = [tail[0], *tail[-(max_lines - 1) :]]
else:
lines = tail
else:
interesting_lines = [line for line in lines if any(hint in line for hint in TRACEBACK_HINTS)]
if interesting_lines:
lines = interesting_lines[-max_lines:]
else:
lines = lines[-max_lines:]
summary = "\n".join(lines)
return _truncate_text(summary, max_chars)
def _truncate_text(text: str, max_chars: int) -> str:
cleaned = re.sub(r"[ \t]+", " ", text.strip())
if len(cleaned) <= max_chars:
return cleaned
return cleaned[: max(0, max_chars - 3)].rstrip() + "..."
def sample_proposer_prompt(code: str, bug_focus: str | None = None) -> str:
focus_instruction = ""
if bug_focus:
focus_instruction = (
f"- Focus specifically on the `{bug_focus}` mutation family.\n"
"- Keep the edit local so the bug can be repaired with a small fix."
)
return PROPOSER_PROMPT.format(code=code, focus_instruction=focus_instruction)
def sample_solver_prompt(
code: str,
execution_result: str = "",
*,
mode: str = "concise",
) -> str:
failure_output = summarize_failure_output(execution_result)
if mode == "full":
failure_output = execution_result.strip() if execution_result.strip() else "No failure output provided."
return SOLVER_PROMPT_FULL.format(code=code, execution_result=failure_output)
return SOLVER_PROMPT_CONCISE.format(code=code, execution_result=failure_output)