Raiff1982
/

Codette-Reasoning

Text Generation

multi-perspective

Model card Files Files and versions

Codette-Reasoning / utilities /input_sanitizer.py

Raiff1982's picture

Upload 120 files

ed1b365 verified 2 days ago

history blame contribute delete

1.53 kB


	import re
	import logging

	class InputSanitizer:
	"""Sanitize and inspect strings for newline‑based injection or other malicious patterns."""

	# Patterns that represent injection‑style line breaks or encoded variants
	_newline_patterns = [
	r"\\n", # literal newline escape
	r"\\r", # carriage return escape
	r" ", # HTML entity for LF
	r" ", # HTML entity for CR
	r"%0a", # URL encoded LF
	r"%0d" # URL encoded CR
	]
	_compiled_newline = re.compile('\|'.join(_newline_patterns), re.IGNORECASE)

	# Additional simple blacklist words (expand as needed)
	_blacklist = [
	r"<script", # script tags
	r"<iframe", # iframe tags
	r";--", # SQL comment injection
	]
	_compiled_black = re.compile('\|'.join(_blacklist), re.IGNORECASE)

	def sanitize(self, text: str) -> str:
	"""Remove dangerous patterns and log incidents."""
	original = text
	# Strip encoded newlines
	text = self._compiled_newline.sub(' ', text)
	# Strip obvious blacklist
	text = self._compiled_black.sub('[REDACTED]', text)

	if text != original:
	logging.warning("Input sanitized due to suspicious patterns")
	return text

	def detect(self, text: str) -> bool:
	"""Return True if malicious pattern detected."""
	return bool(self._compiled_newline.search(text) or self._compiled_black.search(text))