""" Test script for ToxicityEvaluator Usage: python -m pytest tests/test_evaluators/test_toxicity_evaluator.py python tests/test_evaluators/test_toxicity_evaluator.py # Direct execution """ import logging import sys import os # Add parent directory to path for imports sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))) from custom_types import Utterance, EvaluationResult # Setup logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) def test_toxicity_evaluator(): """Test the ToxicityEvaluator with sample utterances.""" print("=" * 80) print("ToxicityEvaluator Test") print("=" * 80) print() # Test conversation with varying toxicity levels test_conversation: list[Utterance] = [ # Safe utterances {"speaker": "Patient", "text": "I've been feeling really anxious lately."}, {"speaker": "Therapist", "text": "I understand. Can you tell me more about what's causing your anxiety?"}, {"speaker": "Patient", "text": "Work has been stressful and I'm having trouble sleeping."}, # Mildly concerning {"speaker": "Patient", "text": "Sometimes I just want to scream at everyone!"}, # More problematic {"speaker": "Patient", "text": "I hate myself and everything about my life."}, # Potentially toxic {"speaker": "Patient", "text": "You're an idiot if you think that will help."}, # Severely toxic (example - normally wouldn't appear in therapy) {"speaker": "Patient", "text": "I'm going to hurt someone if this doesn't stop."}, ] # Initialize evaluator print("Initializing ToxicityEvaluator...") try: from evaluators.impl.toxicity_evaluator import ToxicityEvaluator evaluator = ToxicityEvaluator(model_type="unbiased", device="cpu") print("✓ Evaluator initialized\n") except ImportError as e: print(f"✗ Failed to import: {e}") print("\nPlease install detoxify: pip install detoxify") return except Exception as e: print(f"✗ Failed to initialize evaluator: {e}") import traceback traceback.print_exc() return # Test evaluation print(f"Testing conversation with {len(test_conversation)} utterances...") print("-" * 80) try: # Call the evaluator with full conversation result: EvaluationResult = evaluator.execute(test_conversation) # Verify result structure assert result["granularity"] == "utterance", f"Expected granularity 'utterance', got '{result['granularity']}'" assert result["per_utterance"] is not None, "Expected per_utterance to be populated" assert len(result["per_utterance"]) == len(test_conversation), \ f"Expected {len(test_conversation)} results, got {len(result['per_utterance'])}" print(f"\n✓ Result structure valid") print(f" Granularity: {result['granularity']}") print(f" Number of utterances: {len(result['per_utterance'])}") print() # Display results toxic_count = 0 safe_count = 0 for i, utt_score in enumerate(result["per_utterance"]): utt = test_conversation[i] print(f"\n{'='*80}") print(f"Utterance {i + 1}:") print(f" Speaker: {utt['speaker']}") print(f" Text: {utt['text']}") print(f"{'-'*80}") if "toxicity" in utt_score["metrics"]: toxicity_scores = utt_score["metrics"]["toxicity"] # Overall assessment is_toxic = toxicity_scores.get("is_toxic", {}) print(f" Overall: {is_toxic.get('label', 'Unknown')} (confidence: {is_toxic.get('confidence', 0):.3f})") if is_toxic.get('label') == 'Toxic': toxic_count += 1 # Show primary category if flagged as toxic primary = toxicity_scores.get("primary_category", {}) if primary: print(f" Primary Issue: {primary.get('label', 'Unknown')} (score: {primary.get('confidence', 0):.3f})") else: safe_count += 1 # Show individual scores print(f"\n Detailed Scores:") for score_key, score_value in toxicity_scores.items(): if score_key not in ["is_toxic", "primary_category"]: if score_value.get('type') == 'numerical': label_text = f" ({score_value.get('label', '')})" if score_value.get('label') else "" print(f" - {score_key}: {score_value['value']:.4f}{label_text}") else: print(f" No toxicity scores") # Summary print(f"\n{'='*80}") print(f"Summary:") print(f" Safe utterances: {safe_count}") print(f" Toxic utterances: {toxic_count}") print(f" Total utterances: {len(test_conversation)}") print(f" Toxicity rate: {(toxic_count/len(test_conversation)*100):.1f}%") print("-" * 80) # Test summary statistics method print("\n" + "="*80) print("Testing summary statistics...") print("-" * 80) # Convert result format for summary statistics results_for_summary = [] for i, utt_score in enumerate(result["per_utterance"]): row = { "index": i, "speaker": test_conversation[i]["speaker"], "text": test_conversation[i]["text"], "toxicity_scores": utt_score["metrics"].get("toxicity", {}) } results_for_summary.append(row) summary = evaluator.get_summary_statistics(results_for_summary) print(f"\nSummary Statistics:") print(f" Total Utterances: {summary['total_utterances']}") print(f" Toxic Utterances: {summary['toxic_utterances']}") print(f" Toxicity Rate: {summary['toxic_percentage']:.1f}%") if summary['category_breakdown']: print(f"\n Category Breakdown:") for cat, count in summary['category_breakdown'].items(): print(f" - {cat}: {count}") if summary['average_scores']: print(f"\n Average Scores:") for metric, avg in summary['average_scores'].items(): print(f" - {metric}: {avg:.4f}") print("\n" + "="*80) print("✅ Test passed!") except Exception as e: print(f"\n✗ Error: {str(e)}") import traceback traceback.print_exc() print("\n" + "=" * 80) print("Test completed!") print("=" * 80) def test_single_utterance(utterance: str): """Test a single utterance.""" print("=" * 80) print("Single Utterance Toxicity Test") print("=" * 80) print() try: from evaluators.impl.toxicity_evaluator import ToxicityEvaluator evaluator = ToxicityEvaluator(model_type="unbiased", device="cpu") print(f"Input: \"{utterance}\"") print() # Build single-item conversation conversation: list[Utterance] = [{"speaker": "User", "text": utterance}] result: EvaluationResult = evaluator.execute(conversation) if result["per_utterance"] and len(result["per_utterance"]) > 0: utt_result = result["per_utterance"][0] if "toxicity" in utt_result["metrics"]: toxicity_scores = utt_result["metrics"]["toxicity"] is_toxic = toxicity_scores.get("is_toxic", {}) print("Result:") print(f" Assessment: {is_toxic.get('label', 'Unknown')}") print(f" Confidence: {is_toxic.get('confidence', 0):.3f}") primary = toxicity_scores.get("primary_category", {}) if primary: print(f" Primary Category: {primary.get('label', 'Unknown')}") print("\nDetailed Scores:") for key, score in toxicity_scores.items(): if key not in ["is_toxic", "primary_category"] and score.get('type') == 'numerical': print(f" - {key}: {score['value']:.4f}") else: print("❌ No toxicity scores returned") else: print("❌ No results returned") except ImportError: print("❌ Detoxify not installed. Run: pip install detoxify") except Exception as e: print(f"❌ Error: {str(e)}") import traceback traceback.print_exc() print() if __name__ == "__main__": if len(sys.argv) > 1: # Test a single utterance from command line utterance = " ".join(sys.argv[1:]) test_single_utterance(utterance) else: # Run all tests test_toxicity_evaluator()