Spaces:

oki692
/

Webui

Build error

App Files Files Community

Webui / backend /open_webui /utils /sanitize.py

oki692

Upload folder using huggingface_hub

cfb0fa4 verified 18 days ago

raw

history blame contribute delete

1.75 kB

	import re

	# ANSI escape code pattern - matches all common ANSI sequences
	# This includes color codes, cursor movement, and other terminal control sequences
	ANSI_ESCAPE_PATTERN = re.compile(
	r"\x1b\[[0-9;][A-Za-z]\|\x1b$[AB]\|\x1b[PX^_].?\x1b\\\|\x1b\].*?(?:\x07\|\x1b\$"
	)


	def strip_ansi_codes(text: str) -> str:
	"""
	Strip ANSI escape codes from text.

	ANSI escape codes can be introduced by LLMs that include terminal
	color codes in their output. These codes cause syntax errors when
	the code is sent to Jupyter for execution.

	Common ANSI codes include:
	- Color codes: \x1b[31m (red), \x1b[32m (green), etc.
	- Reset codes: \x1b[0m, \x1b[39m
	- Cursor movement: \x1b[1A, \x1b[2J, etc.
	"""
	return ANSI_ESCAPE_PATTERN.sub("", text)


	def strip_markdown_code_fences(code: str) -> str:
	"""
	Strip markdown code fences if present.

	This is a defensive, non-breaking change — if the code doesn't
	contain fences, it passes through unchanged.

	Handles patterns like:
	- ```python
	- ```py
	- ```
	"""
	code = code.strip()
	# Remove opening fence (```python, ```py, ``` etc.)
	code = re.sub(r"^```\w*\n?", "", code)
	# Remove closing fence
	code = re.sub(r"\n?```\s*$", "", code)
	return code.strip()


	def sanitize_code(code: str) -> str:
	"""
	Sanitize code for execution by applying all necessary cleanup steps.

	This is the recommended function to use before sending code to
	interpreters like Jupyter or Pyodide.

	Steps applied:
	1. Strip ANSI escape codes (from LLM output)
	2. Strip markdown code fences (if model included them)
	"""
	code = strip_ansi_codes(code)
	code = strip_markdown_code_fences(code)
	return code