hhh / tests /test_evaluators /test_toxicity_evaluator.py
github-actions[bot]
Deploy from GitHub Actions (commit: 8b247ffacd77c0672965b8378f1d52a7dcd187ae)
9366995
"""
Test script for ToxicityEvaluator
Usage:
python -m pytest tests/test_evaluators/test_toxicity_evaluator.py
python tests/test_evaluators/test_toxicity_evaluator.py # Direct execution
"""
import logging
import sys
import os
# Add parent directory to path for imports
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
from custom_types import Utterance, EvaluationResult
# Setup logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
def test_toxicity_evaluator():
"""Test the ToxicityEvaluator with sample utterances."""
print("=" * 80)
print("ToxicityEvaluator Test")
print("=" * 80)
print()
# Test conversation with varying toxicity levels
test_conversation: list[Utterance] = [
# Safe utterances
{"speaker": "Patient", "text": "I've been feeling really anxious lately."},
{"speaker": "Therapist", "text": "I understand. Can you tell me more about what's causing your anxiety?"},
{"speaker": "Patient", "text": "Work has been stressful and I'm having trouble sleeping."},
# Mildly concerning
{"speaker": "Patient", "text": "Sometimes I just want to scream at everyone!"},
# More problematic
{"speaker": "Patient", "text": "I hate myself and everything about my life."},
# Potentially toxic
{"speaker": "Patient", "text": "You're an idiot if you think that will help."},
# Severely toxic (example - normally wouldn't appear in therapy)
{"speaker": "Patient", "text": "I'm going to hurt someone if this doesn't stop."},
]
# Initialize evaluator
print("Initializing ToxicityEvaluator...")
try:
from evaluators.impl.toxicity_evaluator import ToxicityEvaluator
evaluator = ToxicityEvaluator(model_type="unbiased", device="cpu")
print("✓ Evaluator initialized\n")
except ImportError as e:
print(f"✗ Failed to import: {e}")
print("\nPlease install detoxify: pip install detoxify")
return
except Exception as e:
print(f"✗ Failed to initialize evaluator: {e}")
import traceback
traceback.print_exc()
return
# Test evaluation
print(f"Testing conversation with {len(test_conversation)} utterances...")
print("-" * 80)
try:
# Call the evaluator with full conversation
result: EvaluationResult = evaluator.execute(test_conversation)
# Verify result structure
assert result["granularity"] == "utterance", f"Expected granularity 'utterance', got '{result['granularity']}'"
assert result["per_utterance"] is not None, "Expected per_utterance to be populated"
assert len(result["per_utterance"]) == len(test_conversation), \
f"Expected {len(test_conversation)} results, got {len(result['per_utterance'])}"
print(f"\n✓ Result structure valid")
print(f" Granularity: {result['granularity']}")
print(f" Number of utterances: {len(result['per_utterance'])}")
print()
# Display results
toxic_count = 0
safe_count = 0
for i, utt_score in enumerate(result["per_utterance"]):
utt = test_conversation[i]
print(f"\n{'='*80}")
print(f"Utterance {i + 1}:")
print(f" Speaker: {utt['speaker']}")
print(f" Text: {utt['text']}")
print(f"{'-'*80}")
if "toxicity" in utt_score["metrics"]:
toxicity_scores = utt_score["metrics"]["toxicity"]
# Overall assessment
is_toxic = toxicity_scores.get("is_toxic", {})
print(f" Overall: {is_toxic.get('label', 'Unknown')} (confidence: {is_toxic.get('confidence', 0):.3f})")
if is_toxic.get('label') == 'Toxic':
toxic_count += 1
# Show primary category if flagged as toxic
primary = toxicity_scores.get("primary_category", {})
if primary:
print(f" Primary Issue: {primary.get('label', 'Unknown')} (score: {primary.get('confidence', 0):.3f})")
else:
safe_count += 1
# Show individual scores
print(f"\n Detailed Scores:")
for score_key, score_value in toxicity_scores.items():
if score_key not in ["is_toxic", "primary_category"]:
if score_value.get('type') == 'numerical':
label_text = f" ({score_value.get('label', '')})" if score_value.get('label') else ""
print(f" - {score_key}: {score_value['value']:.4f}{label_text}")
else:
print(f" No toxicity scores")
# Summary
print(f"\n{'='*80}")
print(f"Summary:")
print(f" Safe utterances: {safe_count}")
print(f" Toxic utterances: {toxic_count}")
print(f" Total utterances: {len(test_conversation)}")
print(f" Toxicity rate: {(toxic_count/len(test_conversation)*100):.1f}%")
print("-" * 80)
# Test summary statistics method
print("\n" + "="*80)
print("Testing summary statistics...")
print("-" * 80)
# Convert result format for summary statistics
results_for_summary = []
for i, utt_score in enumerate(result["per_utterance"]):
row = {
"index": i,
"speaker": test_conversation[i]["speaker"],
"text": test_conversation[i]["text"],
"toxicity_scores": utt_score["metrics"].get("toxicity", {})
}
results_for_summary.append(row)
summary = evaluator.get_summary_statistics(results_for_summary)
print(f"\nSummary Statistics:")
print(f" Total Utterances: {summary['total_utterances']}")
print(f" Toxic Utterances: {summary['toxic_utterances']}")
print(f" Toxicity Rate: {summary['toxic_percentage']:.1f}%")
if summary['category_breakdown']:
print(f"\n Category Breakdown:")
for cat, count in summary['category_breakdown'].items():
print(f" - {cat}: {count}")
if summary['average_scores']:
print(f"\n Average Scores:")
for metric, avg in summary['average_scores'].items():
print(f" - {metric}: {avg:.4f}")
print("\n" + "="*80)
print("✅ Test passed!")
except Exception as e:
print(f"\n✗ Error: {str(e)}")
import traceback
traceback.print_exc()
print("\n" + "=" * 80)
print("Test completed!")
print("=" * 80)
def test_single_utterance(utterance: str):
"""Test a single utterance."""
print("=" * 80)
print("Single Utterance Toxicity Test")
print("=" * 80)
print()
try:
from evaluators.impl.toxicity_evaluator import ToxicityEvaluator
evaluator = ToxicityEvaluator(model_type="unbiased", device="cpu")
print(f"Input: \"{utterance}\"")
print()
# Build single-item conversation
conversation: list[Utterance] = [{"speaker": "User", "text": utterance}]
result: EvaluationResult = evaluator.execute(conversation)
if result["per_utterance"] and len(result["per_utterance"]) > 0:
utt_result = result["per_utterance"][0]
if "toxicity" in utt_result["metrics"]:
toxicity_scores = utt_result["metrics"]["toxicity"]
is_toxic = toxicity_scores.get("is_toxic", {})
print("Result:")
print(f" Assessment: {is_toxic.get('label', 'Unknown')}")
print(f" Confidence: {is_toxic.get('confidence', 0):.3f}")
primary = toxicity_scores.get("primary_category", {})
if primary:
print(f" Primary Category: {primary.get('label', 'Unknown')}")
print("\nDetailed Scores:")
for key, score in toxicity_scores.items():
if key not in ["is_toxic", "primary_category"] and score.get('type') == 'numerical':
print(f" - {key}: {score['value']:.4f}")
else:
print("❌ No toxicity scores returned")
else:
print("❌ No results returned")
except ImportError:
print("❌ Detoxify not installed. Run: pip install detoxify")
except Exception as e:
print(f"❌ Error: {str(e)}")
import traceback
traceback.print_exc()
print()
if __name__ == "__main__":
if len(sys.argv) > 1:
# Test a single utterance from command line
utterance = " ".join(sys.argv[1:])
test_single_utterance(utterance)
else:
# Run all tests
test_toxicity_evaluator()