Spaces:

RyanDDD
/

hhh

Sleeping

hhh / tests /test_evaluators /test_toxicity_evaluator.py

github-actions[bot]

Deploy from GitHub Actions (commit: 8b247ffacd77c0672965b8378f1d52a7dcd187ae)

9366995 29 days ago

9.27 kB

	"""
	Test script for ToxicityEvaluator

	Usage:
	python -m pytest tests/test_evaluators/test_toxicity_evaluator.py
	python tests/test_evaluators/test_toxicity_evaluator.py # Direct execution
	"""
	import logging
	import sys
	import os

	# Add parent directory to path for imports
	sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))

	from custom_types import Utterance, EvaluationResult

	# Setup logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
	)


	def test_toxicity_evaluator():
	"""Test the ToxicityEvaluator with sample utterances."""

	print("=" * 80)
	print("ToxicityEvaluator Test")
	print("=" * 80)
	print()

	# Test conversation with varying toxicity levels
	test_conversation: list[Utterance] = [
	# Safe utterances
	{"speaker": "Patient", "text": "I've been feeling really anxious lately."},
	{"speaker": "Therapist", "text": "I understand. Can you tell me more about what's causing your anxiety?"},
	{"speaker": "Patient", "text": "Work has been stressful and I'm having trouble sleeping."},

	# Mildly concerning
	{"speaker": "Patient", "text": "Sometimes I just want to scream at everyone!"},

	# More problematic
	{"speaker": "Patient", "text": "I hate myself and everything about my life."},

	# Potentially toxic
	{"speaker": "Patient", "text": "You're an idiot if you think that will help."},

	# Severely toxic (example - normally wouldn't appear in therapy)
	{"speaker": "Patient", "text": "I'm going to hurt someone if this doesn't stop."},
	]

	# Initialize evaluator
	print("Initializing ToxicityEvaluator...")
	try:
	from evaluators.impl.toxicity_evaluator import ToxicityEvaluator
	evaluator = ToxicityEvaluator(model_type="unbiased", device="cpu")
	print("✓ Evaluator initialized\n")
	except ImportError as e:
	print(f"✗ Failed to import: {e}")
	print("\nPlease install detoxify: pip install detoxify")
	return
	except Exception as e:
	print(f"✗ Failed to initialize evaluator: {e}")
	import traceback
	traceback.print_exc()
	return

	# Test evaluation
	print(f"Testing conversation with {len(test_conversation)} utterances...")
	print("-" * 80)

	try:
	# Call the evaluator with full conversation
	result: EvaluationResult = evaluator.execute(test_conversation)

	# Verify result structure
	assert result["granularity"] == "utterance", f"Expected granularity 'utterance', got '{result['granularity']}'"
	assert result["per_utterance"] is not None, "Expected per_utterance to be populated"
	assert len(result["per_utterance"]) == len(test_conversation), \
	f"Expected {len(test_conversation)} results, got {len(result['per_utterance'])}"

	print(f"\n✓ Result structure valid")
	print(f" Granularity: {result['granularity']}")
	print(f" Number of utterances: {len(result['per_utterance'])}")
	print()

	# Display results
	toxic_count = 0
	safe_count = 0

	for i, utt_score in enumerate(result["per_utterance"]):
	utt = test_conversation[i]
	print(f"\n{'='*80}")
	print(f"Utterance {i + 1}:")
	print(f" Speaker: {utt['speaker']}")
	print(f" Text: {utt['text']}")
	print(f"{'-'*80}")

	if "toxicity" in utt_score["metrics"]:
	toxicity_scores = utt_score["metrics"]["toxicity"]

	# Overall assessment
	is_toxic = toxicity_scores.get("is_toxic", {})
	print(f" Overall: {is_toxic.get('label', 'Unknown')} (confidence: {is_toxic.get('confidence', 0):.3f})")

	if is_toxic.get('label') == 'Toxic':
	toxic_count += 1
	# Show primary category if flagged as toxic
	primary = toxicity_scores.get("primary_category", {})
	if primary:
	print(f" Primary Issue: {primary.get('label', 'Unknown')} (score: {primary.get('confidence', 0):.3f})")
	else:
	safe_count += 1

	# Show individual scores
	print(f"\n Detailed Scores:")
	for score_key, score_value in toxicity_scores.items():
	if score_key not in ["is_toxic", "primary_category"]:
	if score_value.get('type') == 'numerical':
	label_text = f" ({score_value.get('label', '')})" if score_value.get('label') else ""
	print(f" - {score_key}: {score_value['value']:.4f}{label_text}")
	else:
	print(f" No toxicity scores")

	# Summary
	print(f"\n{'='*80}")
	print(f"Summary:")
	print(f" Safe utterances: {safe_count}")
	print(f" Toxic utterances: {toxic_count}")
	print(f" Total utterances: {len(test_conversation)}")
	print(f" Toxicity rate: {(toxic_count/len(test_conversation)*100):.1f}%")
	print("-" * 80)

	# Test summary statistics method
	print("\n" + "="*80)
	print("Testing summary statistics...")
	print("-" * 80)

	# Convert result format for summary statistics
	results_for_summary = []
	for i, utt_score in enumerate(result["per_utterance"]):
	row = {
	"index": i,
	"speaker": test_conversation[i]["speaker"],
	"text": test_conversation[i]["text"],
	"toxicity_scores": utt_score["metrics"].get("toxicity", {})
	}
	results_for_summary.append(row)

	summary = evaluator.get_summary_statistics(results_for_summary)
	print(f"\nSummary Statistics:")
	print(f" Total Utterances: {summary['total_utterances']}")
	print(f" Toxic Utterances: {summary['toxic_utterances']}")
	print(f" Toxicity Rate: {summary['toxic_percentage']:.1f}%")

	if summary['category_breakdown']:
	print(f"\n Category Breakdown:")
	for cat, count in summary['category_breakdown'].items():
	print(f" - {cat}: {count}")

	if summary['average_scores']:
	print(f"\n Average Scores:")
	for metric, avg in summary['average_scores'].items():
	print(f" - {metric}: {avg:.4f}")

	print("\n" + "="*80)
	print("✅ Test passed!")

	except Exception as e:
	print(f"\n✗ Error: {str(e)}")
	import traceback
	traceback.print_exc()

	print("\n" + "=" * 80)
	print("Test completed!")
	print("=" * 80)


	def test_single_utterance(utterance: str):
	"""Test a single utterance."""

	print("=" * 80)
	print("Single Utterance Toxicity Test")
	print("=" * 80)
	print()

	try:
	from evaluators.impl.toxicity_evaluator import ToxicityEvaluator
	evaluator = ToxicityEvaluator(model_type="unbiased", device="cpu")

	print(f"Input: \"{utterance}\"")
	print()

	# Build single-item conversation
	conversation: list[Utterance] = [{"speaker": "User", "text": utterance}]

	result: EvaluationResult = evaluator.execute(conversation)

	if result["per_utterance"] and len(result["per_utterance"]) > 0:
	utt_result = result["per_utterance"][0]
	if "toxicity" in utt_result["metrics"]:
	toxicity_scores = utt_result["metrics"]["toxicity"]

	is_toxic = toxicity_scores.get("is_toxic", {})
	print("Result:")
	print(f" Assessment: {is_toxic.get('label', 'Unknown')}")
	print(f" Confidence: {is_toxic.get('confidence', 0):.3f}")

	primary = toxicity_scores.get("primary_category", {})
	if primary:
	print(f" Primary Category: {primary.get('label', 'Unknown')}")

	print("\nDetailed Scores:")
	for key, score in toxicity_scores.items():
	if key not in ["is_toxic", "primary_category"] and score.get('type') == 'numerical':
	print(f" - {key}: {score['value']:.4f}")
	else:
	print("❌ No toxicity scores returned")
	else:
	print("❌ No results returned")

	except ImportError:
	print("❌ Detoxify not installed. Run: pip install detoxify")
	except Exception as e:
	print(f"❌ Error: {str(e)}")
	import traceback
	traceback.print_exc()

	print()


	if __name__ == "__main__":
	if len(sys.argv) > 1:
	# Test a single utterance from command line
	utterance = " ".join(sys.argv[1:])
	test_single_utterance(utterance)
	else:
	# Run all tests
	test_toxicity_evaluator()