Spaces:

Huggingansuman
/

red_teaming_env

Running

App Files Files Community

red_teaming_env / server /text_utils.py

Huggingansuman

Upload red_teaming_env Space files

410276d verified 9 days ago

raw

history blame contribute delete

1.96 kB

	"""Shared text helpers for lightweight semantic scoring."""

	from __future__ import annotations

	import hashlib
	import re

	import numpy as np

	TOKEN_RE = re.compile(r"[a-z0-9_]+")


	def clamp(value: float, low: float = 0.0, high: float = 1.0) -> float:
	"""Clamp a scalar value into a closed interval."""
	return max(low, min(high, value))


	def normalize_text(text: str) -> str:
	"""Lower-case and collapse whitespace for stable comparisons."""
	return " ".join(text.lower().split())


	def tokenize(text: str) -> list[str]:
	"""Tokenize text into a lightweight alphanumeric bag."""
	return TOKEN_RE.findall(normalize_text(text))


	def hashed_embedding(text: str, dim: int = 256) -> np.ndarray:
	"""Create a deterministic hashed bag-of-words embedding."""
	vector = np.zeros(dim, dtype=np.float32)
	for token in tokenize(text):
	digest = hashlib.blake2b(token.encode("utf-8"), digest_size=2).hexdigest()
	index = int(digest, 16) % dim
	vector[index] += 1.0
	norm = np.linalg.norm(vector)
	if norm > 0:
	vector /= norm
	return vector


	def cosine_similarity(left: np.ndarray, right: np.ndarray) -> float:
	"""Compute cosine similarity with safe zero-vector handling."""
	left_norm = np.linalg.norm(left)
	right_norm = np.linalg.norm(right)
	if left_norm == 0.0 or right_norm == 0.0:
	return 0.0
	return float(np.dot(left, right) / (left_norm * right_norm))


	def text_similarity(left: str, right: str) -> float:
	"""Compute deterministic cosine similarity between two texts."""
	return cosine_similarity(hashed_embedding(left), hashed_embedding(right))


	def stable_noise(text: str, low: float = -0.05, high: float = 0.05) -> float:
	"""Map a text fingerprint to a stable uniform noise value."""
	digest = hashlib.blake2b(text.encode("utf-8"), digest_size=8).hexdigest()
	value = int(digest, 16) / float(16**16 - 1)
	return low + (high - low) * value