| | import re |
| |
|
| | |
| | |
| | ANSI_ESCAPE_PATTERN = re.compile( |
| | r"\x1b\[[0-9;]*[A-Za-z]|\x1b\([AB]|\x1b[PX^_].*?\x1b\\|\x1b\].*?(?:\x07|\x1b\\)" |
| | ) |
| |
|
| |
|
| | def strip_ansi_codes(text: str) -> str: |
| | """ |
| | Strip ANSI escape codes from text. |
| | |
| | ANSI escape codes can be introduced by LLMs that include terminal |
| | color codes in their output. These codes cause syntax errors when |
| | the code is sent to Jupyter for execution. |
| | |
| | Common ANSI codes include: |
| | - Color codes: \x1b[31m (red), \x1b[32m (green), etc. |
| | - Reset codes: \x1b[0m, \x1b[39m |
| | - Cursor movement: \x1b[1A, \x1b[2J, etc. |
| | """ |
| | return ANSI_ESCAPE_PATTERN.sub("", text) |
| |
|
| |
|
| | def strip_markdown_code_fences(code: str) -> str: |
| | """ |
| | Strip markdown code fences if present. |
| | |
| | This is a defensive, non-breaking change — if the code doesn't |
| | contain fences, it passes through unchanged. |
| | |
| | Handles patterns like: |
| | - ```python |
| | - ```py |
| | - ``` |
| | """ |
| | code = code.strip() |
| | |
| | code = re.sub(r"^```\w*\n?", "", code) |
| | |
| | code = re.sub(r"\n?```\s*$", "", code) |
| | return code.strip() |
| |
|
| |
|
| | def sanitize_code(code: str) -> str: |
| | """ |
| | Sanitize code for execution by applying all necessary cleanup steps. |
| | |
| | This is the recommended function to use before sending code to |
| | interpreters like Jupyter or Pyodide. |
| | |
| | Steps applied: |
| | 1. Strip ANSI escape codes (from LLM output) |
| | 2. Strip markdown code fences (if model included them) |
| | """ |
| | code = strip_ansi_codes(code) |
| | code = strip_markdown_code_fences(code) |
| | return code |
| |
|