#!/usr/bin/env python3
"""
Test script for QuestionEffectivenessValidator functionality.
"""

import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'src'))

from config.prompt_management.question_validator import QuestionEffectivenessValidator, QuestionQuality
from config.prompt_management.data_models import ScenarioType

def test_question_validator():
    """Test QuestionEffectivenessValidator functionality."""
    print("Testing QuestionEffectivenessValidator...")
    
    # Initialize validator
    validator = QuestionEffectivenessValidator()
    print("✓ QuestionEffectivenessValidator initialized")
    
    # Test 1: Validate high-quality questions
    print("\n1. Testing high-quality question validation...")
    
    high_quality_questions = [
        ("You mentioned you can't garden anymore. Is that something that's been weighing on you emotionally, or is it more about time or circumstances?", ScenarioType.LOSS_OF_INTEREST),
        ("I'm sorry for your loss. How have you been coping with this? Is there anything that's been particularly difficult for you?", ScenarioType.LOSS_OF_LOVED_ONE),
        ("It sounds like you're managing a lot on your own. How is that affecting you? Is it more of a practical challenge, or is it weighing on you emotionally?", ScenarioType.NO_SUPPORT),
        ("I hear that things have been stressful. Can you tell me more about what's been causing that stress?", ScenarioType.VAGUE_STRESS),
        ("Sleep difficulties can be really challenging. Is there something specific on your mind that's keeping you awake, or do you think it might be related to your medical situation?", ScenarioType.SLEEP_ISSUES)
    ]
    
    for question, scenario_type in high_quality_questions:
        analysis = validator.validate_question_effectiveness(question, scenario_type)
        
        print(f"   Question: {question[:50]}...")
        print(f"   Score: {analysis.effectiveness_score:.2f} ({analysis.quality_level.value})")
        print(f"   Targeting: {analysis.targeting_score:.2f}, Empathy: {analysis.empathy_score:.2f}, Clarity: {analysis.clarity_score:.2f}")
        
        if analysis.effectiveness_score >= 0.6:
            print(f"   ✓ High quality achieved")
        else:
            print(f"   ⚠ Lower than expected quality")
        
        if analysis.strengths:
            print(f"   Strengths: {len(analysis.strengths)} identified")
        
        print()
    
    # Test 2: Validate poor-quality questions
    print("2. Testing poor-quality question validation...")
    
    poor_quality_questions = [
        ("How are you feeling?", ScenarioType.LOSS_OF_INTEREST),
        ("That's sad.", ScenarioType.LOSS_OF_LOVED_ONE),
        ("Okay.", ScenarioType.NO_SUPPORT),
        ("Tell me more", ScenarioType.VAGUE_STRESS),
        ("Are you sleeping well or not sleeping well or maybe sleeping okay but not great and what do you think about that situation with your sleep patterns?", ScenarioType.SLEEP_ISSUES)
    ]
    
    for question, scenario_type in poor_quality_questions:
        analysis = validator.validate_question_effectiveness(question, scenario_type)
        
        print(f"   Question: {question[:50]}...")
        print(f"   Score: {analysis.effectiveness_score:.2f} ({analysis.quality_level.value})")
        
        if analysis.effectiveness_score < 0.5:
            print(f"   ✓ Correctly identified as low quality")
        else:
            print(f"   ⚠ Higher than expected quality")
        
        if analysis.weaknesses:
            print(f"   Weaknesses: {analysis.weaknesses[:2]}")
        
        if analysis.suggestions:
            print(f"   Suggestions: {analysis.suggestions[:2]}")
        
        print()
    
    # Test 3: Test component scoring
    print("3. Testing component scoring...")
    
    # Test targeting score
    targeting_test = "Is that something that's been weighing on you emotionally, or is it more about circumstances?"
    analysis = validator.validate_question_effectiveness(targeting_test, ScenarioType.LOSS_OF_INTEREST)
    print(f"   Targeting test: {analysis.targeting_score:.2f}")
    
    # Test empathy score
    empathy_test = "I'm sorry for your loss. I understand this must be very difficult for you."
    analysis = validator.validate_question_effectiveness(empathy_test, ScenarioType.LOSS_OF_LOVED_ONE)
    print(f"   Empathy test: {analysis.empathy_score:.2f}")
    
    # Test clarity score
    clarity_test = "What specifically has been causing your sleep problems?"
    analysis = validator.validate_question_effectiveness(clarity_test, ScenarioType.SLEEP_ISSUES)
    print(f"   Clarity test: {analysis.clarity_score:.2f}")
    
    # Test 4: Batch validation
    print("\n4. Testing batch validation...")
    
    batch_questions = [
        ("You mentioned you can't garden anymore. Is that weighing on you emotionally?", ScenarioType.LOSS_OF_INTEREST),
        ("How are you coping with your loss?", ScenarioType.LOSS_OF_LOVED_ONE),
        ("What's causing your stress?", ScenarioType.VAGUE_STRESS)
    ]
    
    batch_results = validator.batch_validate_questions(batch_questions)
    print(f"   ✓ Batch validated {len(batch_results)} questions")
    
    for i, result in enumerate(batch_results):
        print(f"     Question {i+1}: {result.effectiveness_score:.2f} ({result.quality_level.value})")
    
    # Test 5: Generate effectiveness report
    print("\n5. Testing effectiveness report generation...")
    
    report = validator.generate_effectiveness_report(batch_results)
    
    print(f"   ✓ Report generated for {report['total_questions']} questions")
    print(f"   Average effectiveness: {report['average_scores']['effectiveness']}")
    print(f"   Quality distribution: {report['quality_distribution']}")
    
    if report['common_strengths']:
        print(f"   Most common strength: {report['common_strengths'][0][0]}")
    
    if report['common_weaknesses']:
        print(f"   Most common weakness: {report['common_weaknesses'][0][0]}")
    
    # Test 6: Edge cases
    print("\n6. Testing edge cases...")
    
    edge_cases = [
        ("", None),  # Empty question
        ("This is not a question", ScenarioType.VAGUE_STRESS),  # No question mark
        ("What? How? Why? When? Where?", ScenarioType.LOSS_OF_INTEREST),  # Multiple questions
        ("A" * 200, ScenarioType.NO_SUPPORT)  # Very long question
    ]
    
    for question, scenario_type in edge_cases:
        try:
            analysis = validator.validate_question_effectiveness(question, scenario_type)
            print(f"   ✓ Handled edge case: {len(question)} chars → {analysis.effectiveness_score:.2f}")
        except Exception as e:
            print(f"   ✗ Edge case failed: {e}")
            return False
    
    # Test 7: Scenario-specific validation
    print("\n7. Testing scenario-specific validation...")
    
    scenario_tests = {
        ScenarioType.LOSS_OF_INTEREST: "Is this change meaningful to you, or is it more about practical circumstances?",
        ScenarioType.LOSS_OF_LOVED_ONE: "How are you processing this grief emotionally?",
        ScenarioType.NO_SUPPORT: "Is this isolation causing you distress, or is it more about practical assistance?",
        ScenarioType.VAGUE_STRESS: "What specifically is contributing to that stress?",
        ScenarioType.SLEEP_ISSUES: "Is something on your mind keeping you awake, or might it be medical?"
    }
    
    for scenario_type, question in scenario_tests.items():
        analysis = validator.validate_question_effectiveness(question, scenario_type)
        print(f"   {scenario_type.value}: {analysis.targeting_score:.2f} targeting score")
        
        if analysis.targeting_score >= 0.5:
            print(f"     ✓ Good scenario targeting")
        else:
            print(f"     ⚠ Weak scenario targeting")
    
    print("\n✓ All QuestionEffectivenessValidator tests passed!")
    return True

if __name__ == "__main__":
    success = test_question_validator()
    sys.exit(0 if success else 1)