Spaces:

RyanDDD
/

hhh

Sleeping

hhh
File size: 9,271 Bytes
"""
Test script for ToxicityEvaluator

Usage:
    python -m pytest tests/test_evaluators/test_toxicity_evaluator.py
    python tests/test_evaluators/test_toxicity_evaluator.py  # Direct execution
"""
import logging
import sys
import os

# Add parent directory to path for imports
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))

from custom_types import Utterance, EvaluationResult

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)


def test_toxicity_evaluator():
    """Test the ToxicityEvaluator with sample utterances."""
    
    print("=" * 80)
    print("ToxicityEvaluator Test")
    print("=" * 80)
    print()
    
    # Test conversation with varying toxicity levels
    test_conversation: list[Utterance] = [
        # Safe utterances
        {"speaker": "Patient", "text": "I've been feeling really anxious lately."},
        {"speaker": "Therapist", "text": "I understand. Can you tell me more about what's causing your anxiety?"},
        {"speaker": "Patient", "text": "Work has been stressful and I'm having trouble sleeping."},
        
        # Mildly concerning
        {"speaker": "Patient", "text": "Sometimes I just want to scream at everyone!"},
        
        # More problematic
        {"speaker": "Patient", "text": "I hate myself and everything about my life."},
        
        # Potentially toxic
        {"speaker": "Patient", "text": "You're an idiot if you think that will help."},
        
        # Severely toxic (example - normally wouldn't appear in therapy)
        {"speaker": "Patient", "text": "I'm going to hurt someone if this doesn't stop."},
    ]
    
    # Initialize evaluator
    print("Initializing ToxicityEvaluator...")
    try:
        from evaluators.impl.toxicity_evaluator import ToxicityEvaluator
        evaluator = ToxicityEvaluator(model_type="unbiased", device="cpu")
        print("✓ Evaluator initialized\n")
    except ImportError as e:
        print(f"✗ Failed to import: {e}")
        print("\nPlease install detoxify: pip install detoxify")
        return
    except Exception as e:
        print(f"✗ Failed to initialize evaluator: {e}")
        import traceback
        traceback.print_exc()
        return
    
    # Test evaluation
    print(f"Testing conversation with {len(test_conversation)} utterances...")
    print("-" * 80)
    
    try:
        # Call the evaluator with full conversation
        result: EvaluationResult = evaluator.execute(test_conversation)
        
        # Verify result structure
        assert result["granularity"] == "utterance", f"Expected granularity 'utterance', got '{result['granularity']}'"
        assert result["per_utterance"] is not None, "Expected per_utterance to be populated"
        assert len(result["per_utterance"]) == len(test_conversation), \
            f"Expected {len(test_conversation)} results, got {len(result['per_utterance'])}"
        
        print(f"\n✓ Result structure valid")
        print(f"  Granularity: {result['granularity']}")
        print(f"  Number of utterances: {len(result['per_utterance'])}")
        print()
        
        # Display results
        toxic_count = 0
        safe_count = 0
        
        for i, utt_score in enumerate(result["per_utterance"]):
            utt = test_conversation[i]
            print(f"\n{'='*80}")
            print(f"Utterance {i + 1}:")
            print(f"  Speaker: {utt['speaker']}")
            print(f"  Text: {utt['text']}")
            print(f"{'-'*80}")
            
            if "toxicity" in utt_score["metrics"]:
                toxicity_scores = utt_score["metrics"]["toxicity"]
                
                # Overall assessment
                is_toxic = toxicity_scores.get("is_toxic", {})
                print(f"  Overall: {is_toxic.get('label', 'Unknown')} (confidence: {is_toxic.get('confidence', 0):.3f})")
                
                if is_toxic.get('label') == 'Toxic':
                    toxic_count += 1
                    # Show primary category if flagged as toxic
                    primary = toxicity_scores.get("primary_category", {})
                    if primary:
                        print(f"  Primary Issue: {primary.get('label', 'Unknown')} (score: {primary.get('confidence', 0):.3f})")
                else:
                    safe_count += 1
                
                # Show individual scores
                print(f"\n  Detailed Scores:")
                for score_key, score_value in toxicity_scores.items():
                    if score_key not in ["is_toxic", "primary_category"]:
                        if score_value.get('type') == 'numerical':
                            label_text = f" ({score_value.get('label', '')})" if score_value.get('label') else ""
                            print(f"    - {score_key}: {score_value['value']:.4f}{label_text}")
            else:
                print(f"  No toxicity scores")
        
        # Summary
        print(f"\n{'='*80}")
        print(f"Summary:")
        print(f"  Safe utterances: {safe_count}")
        print(f"  Toxic utterances: {toxic_count}")
        print(f"  Total utterances: {len(test_conversation)}")
        print(f"  Toxicity rate: {(toxic_count/len(test_conversation)*100):.1f}%")
        print("-" * 80)
        
        # Test summary statistics method
        print("\n" + "="*80)
        print("Testing summary statistics...")
        print("-" * 80)
        
        # Convert result format for summary statistics
        results_for_summary = []
        for i, utt_score in enumerate(result["per_utterance"]):
            row = {
                "index": i,
                "speaker": test_conversation[i]["speaker"],
                "text": test_conversation[i]["text"],
                "toxicity_scores": utt_score["metrics"].get("toxicity", {})
            }
            results_for_summary.append(row)
        
        summary = evaluator.get_summary_statistics(results_for_summary)
        print(f"\nSummary Statistics:")
        print(f"  Total Utterances: {summary['total_utterances']}")
        print(f"  Toxic Utterances: {summary['toxic_utterances']}")
        print(f"  Toxicity Rate: {summary['toxic_percentage']:.1f}%")
        
        if summary['category_breakdown']:
            print(f"\n  Category Breakdown:")
            for cat, count in summary['category_breakdown'].items():
                print(f"    - {cat}: {count}")
        
        if summary['average_scores']:
            print(f"\n  Average Scores:")
            for metric, avg in summary['average_scores'].items():
                print(f"    - {metric}: {avg:.4f}")
        
        print("\n" + "="*80)
        print("✅ Test passed!")
            
    except Exception as e:
        print(f"\n✗ Error: {str(e)}")
        import traceback
        traceback.print_exc()
    
    print("\n" + "=" * 80)
    print("Test completed!")
    print("=" * 80)


def test_single_utterance(utterance: str):
    """Test a single utterance."""
    
    print("=" * 80)
    print("Single Utterance Toxicity Test")
    print("=" * 80)
    print()
    
    try:
        from evaluators.impl.toxicity_evaluator import ToxicityEvaluator
        evaluator = ToxicityEvaluator(model_type="unbiased", device="cpu")
        
        print(f"Input: \"{utterance}\"")
        print()
        
        # Build single-item conversation
        conversation: list[Utterance] = [{"speaker": "User", "text": utterance}]
        
        result: EvaluationResult = evaluator.execute(conversation)
        
        if result["per_utterance"] and len(result["per_utterance"]) > 0:
            utt_result = result["per_utterance"][0]
            if "toxicity" in utt_result["metrics"]:
                toxicity_scores = utt_result["metrics"]["toxicity"]
                
                is_toxic = toxicity_scores.get("is_toxic", {})
                print("Result:")
                print(f"  Assessment: {is_toxic.get('label', 'Unknown')}")
                print(f"  Confidence: {is_toxic.get('confidence', 0):.3f}")
                
                primary = toxicity_scores.get("primary_category", {})
                if primary:
                    print(f"  Primary Category: {primary.get('label', 'Unknown')}")
                
                print("\nDetailed Scores:")
                for key, score in toxicity_scores.items():
                    if key not in ["is_toxic", "primary_category"] and score.get('type') == 'numerical':
                        print(f"  - {key}: {score['value']:.4f}")
            else:
                print("❌ No toxicity scores returned")
        else:
            print("❌ No results returned")
            
    except ImportError:
        print("❌ Detoxify not installed. Run: pip install detoxify")
    except Exception as e:
        print(f"❌ Error: {str(e)}")
        import traceback
        traceback.print_exc()
    
    print()


if __name__ == "__main__":
    if len(sys.argv) > 1:
        # Test a single utterance from command line
        utterance = " ".join(sys.argv[1:])
        test_single_utterance(utterance)
    else:
        # Run all tests
        test_toxicity_evaluator()