Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import re | |
| PROPOSER_PROMPT = """You are the Proposer in a debugging self-play game. | |
| Given a clean Python function, inject a realistic logical bug into it. | |
| Rules: | |
| - Make exactly one small logical change. | |
| - Keep the code valid Python. | |
| - Keep the same function signature. | |
| - Preserve the overall structure and formatting as much as possible. | |
| - Prefer one of these mutation families: off_by_one, wrong_operator, wrong_builtin, | |
| condition_negation, loop_boundary_shift, or slice_boundary_corruption. | |
| - Aim for an edge-case behavior change, not a cosmetic refactor. | |
| - Avoid helper extraction, renaming-only edits, comment-only changes, or multi-line rewrites. | |
| - Return only the full modified Python code inside triple backticks. | |
| {focus_instruction} | |
| Clean function: | |
| ```python | |
| {code} | |
| ``` | |
| """ | |
| SOLVER_PROMPT_FULL = """You are the Solver in a debugging self-play game. | |
| The following Python code is failing its tests. | |
| Repair the bug and return the full fixed Python code inside triple backticks. | |
| Buggy function: | |
| ```python | |
| {code} | |
| ``` | |
| Observed failure: | |
| {execution_result} | |
| """ | |
| SOLVER_PROMPT_CONCISE = """You are the Solver in a debugging self-play game. | |
| Fix the bug with the smallest correct local change and return only the full fixed Python code inside triple backticks. | |
| Buggy function: | |
| ```python | |
| {code} | |
| ``` | |
| Failure summary: | |
| {execution_result} | |
| """ | |
| TRACEBACK_HINTS = ( | |
| "Traceback", | |
| "AssertionError", | |
| "SyntaxError", | |
| "TypeError", | |
| "NameError", | |
| "ValueError", | |
| "IndexError", | |
| "KeyError", | |
| "ZeroDivisionError", | |
| "RuntimeError", | |
| "Timeout", | |
| ) | |
| def summarize_failure_output(execution_result: str, *, max_lines: int = 3, max_chars: int = 220) -> str: | |
| text = execution_result.strip() | |
| if not text: | |
| return "No failure output provided." | |
| if text in {"Unsafe import detected.", "Execution timed out."} or text.startswith("SyntaxError:"): | |
| return _truncate_text(text, max_chars) | |
| lines = [line.strip() for line in text.splitlines() if line.strip()] | |
| if not lines: | |
| return "No failure output provided." | |
| traceback_positions = [idx for idx, line in enumerate(lines) if "Traceback" in line] | |
| if traceback_positions: | |
| tail = lines[traceback_positions[-1] :] | |
| if len(tail) > max_lines: | |
| lines = [tail[0], *tail[-(max_lines - 1) :]] | |
| else: | |
| lines = tail | |
| else: | |
| interesting_lines = [line for line in lines if any(hint in line for hint in TRACEBACK_HINTS)] | |
| if interesting_lines: | |
| lines = interesting_lines[-max_lines:] | |
| else: | |
| lines = lines[-max_lines:] | |
| summary = "\n".join(lines) | |
| return _truncate_text(summary, max_chars) | |
| def _truncate_text(text: str, max_chars: int) -> str: | |
| cleaned = re.sub(r"[ \t]+", " ", text.strip()) | |
| if len(cleaned) <= max_chars: | |
| return cleaned | |
| return cleaned[: max(0, max_chars - 3)].rstrip() + "..." | |
| def sample_proposer_prompt(code: str, bug_focus: str | None = None) -> str: | |
| focus_instruction = "" | |
| if bug_focus: | |
| focus_instruction = ( | |
| f"- Focus specifically on the `{bug_focus}` mutation family.\n" | |
| "- Keep the edit local so the bug can be repaired with a small fix." | |
| ) | |
| return PROPOSER_PROMPT.format(code=code, focus_instruction=focus_instruction) | |
| def sample_solver_prompt( | |
| code: str, | |
| execution_result: str = "", | |
| *, | |
| mode: str = "concise", | |
| ) -> str: | |
| failure_output = summarize_failure_output(execution_result) | |
| if mode == "full": | |
| failure_output = execution_result.strip() if execution_result.strip() else "No failure output provided." | |
| return SOLVER_PROMPT_FULL.format(code=code, execution_result=failure_output) | |
| return SOLVER_PROMPT_CONCISE.format(code=code, execution_result=failure_output) | |