File size: 8,088 Bytes
24214fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#!/usr/bin/env python3
"""
Test script for QuestionEffectivenessValidator functionality.
"""

import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'src'))

from config.prompt_management.question_validator import QuestionEffectivenessValidator, QuestionQuality
from config.prompt_management.data_models import ScenarioType

def test_question_validator():
    """Test QuestionEffectivenessValidator functionality."""
    print("Testing QuestionEffectivenessValidator...")
    
    # Initialize validator
    validator = QuestionEffectivenessValidator()
    print("✓ QuestionEffectivenessValidator initialized")
    
    # Test 1: Validate high-quality questions
    print("\n1. Testing high-quality question validation...")
    
    high_quality_questions = [
        ("You mentioned you can't garden anymore. Is that something that's been weighing on you emotionally, or is it more about time or circumstances?", ScenarioType.LOSS_OF_INTEREST),
        ("I'm sorry for your loss. How have you been coping with this? Is there anything that's been particularly difficult for you?", ScenarioType.LOSS_OF_LOVED_ONE),
        ("It sounds like you're managing a lot on your own. How is that affecting you? Is it more of a practical challenge, or is it weighing on you emotionally?", ScenarioType.NO_SUPPORT),
        ("I hear that things have been stressful. Can you tell me more about what's been causing that stress?", ScenarioType.VAGUE_STRESS),
        ("Sleep difficulties can be really challenging. Is there something specific on your mind that's keeping you awake, or do you think it might be related to your medical situation?", ScenarioType.SLEEP_ISSUES)
    ]
    
    for question, scenario_type in high_quality_questions:
        analysis = validator.validate_question_effectiveness(question, scenario_type)
        
        print(f"   Question: {question[:50]}...")
        print(f"   Score: {analysis.effectiveness_score:.2f} ({analysis.quality_level.value})")
        print(f"   Targeting: {analysis.targeting_score:.2f}, Empathy: {analysis.empathy_score:.2f}, Clarity: {analysis.clarity_score:.2f}")
        
        if analysis.effectiveness_score >= 0.6:
            print(f"   ✓ High quality achieved")
        else:
            print(f"   âš  Lower than expected quality")
        
        if analysis.strengths:
            print(f"   Strengths: {len(analysis.strengths)} identified")
        
        print()
    
    # Test 2: Validate poor-quality questions
    print("2. Testing poor-quality question validation...")
    
    poor_quality_questions = [
        ("How are you feeling?", ScenarioType.LOSS_OF_INTEREST),
        ("That's sad.", ScenarioType.LOSS_OF_LOVED_ONE),
        ("Okay.", ScenarioType.NO_SUPPORT),
        ("Tell me more", ScenarioType.VAGUE_STRESS),
        ("Are you sleeping well or not sleeping well or maybe sleeping okay but not great and what do you think about that situation with your sleep patterns?", ScenarioType.SLEEP_ISSUES)
    ]
    
    for question, scenario_type in poor_quality_questions:
        analysis = validator.validate_question_effectiveness(question, scenario_type)
        
        print(f"   Question: {question[:50]}...")
        print(f"   Score: {analysis.effectiveness_score:.2f} ({analysis.quality_level.value})")
        
        if analysis.effectiveness_score < 0.5:
            print(f"   ✓ Correctly identified as low quality")
        else:
            print(f"   âš  Higher than expected quality")
        
        if analysis.weaknesses:
            print(f"   Weaknesses: {analysis.weaknesses[:2]}")
        
        if analysis.suggestions:
            print(f"   Suggestions: {analysis.suggestions[:2]}")
        
        print()
    
    # Test 3: Test component scoring
    print("3. Testing component scoring...")
    
    # Test targeting score
    targeting_test = "Is that something that's been weighing on you emotionally, or is it more about circumstances?"
    analysis = validator.validate_question_effectiveness(targeting_test, ScenarioType.LOSS_OF_INTEREST)
    print(f"   Targeting test: {analysis.targeting_score:.2f}")
    
    # Test empathy score
    empathy_test = "I'm sorry for your loss. I understand this must be very difficult for you."
    analysis = validator.validate_question_effectiveness(empathy_test, ScenarioType.LOSS_OF_LOVED_ONE)
    print(f"   Empathy test: {analysis.empathy_score:.2f}")
    
    # Test clarity score
    clarity_test = "What specifically has been causing your sleep problems?"
    analysis = validator.validate_question_effectiveness(clarity_test, ScenarioType.SLEEP_ISSUES)
    print(f"   Clarity test: {analysis.clarity_score:.2f}")
    
    # Test 4: Batch validation
    print("\n4. Testing batch validation...")
    
    batch_questions = [
        ("You mentioned you can't garden anymore. Is that weighing on you emotionally?", ScenarioType.LOSS_OF_INTEREST),
        ("How are you coping with your loss?", ScenarioType.LOSS_OF_LOVED_ONE),
        ("What's causing your stress?", ScenarioType.VAGUE_STRESS)
    ]
    
    batch_results = validator.batch_validate_questions(batch_questions)
    print(f"   ✓ Batch validated {len(batch_results)} questions")
    
    for i, result in enumerate(batch_results):
        print(f"     Question {i+1}: {result.effectiveness_score:.2f} ({result.quality_level.value})")
    
    # Test 5: Generate effectiveness report
    print("\n5. Testing effectiveness report generation...")
    
    report = validator.generate_effectiveness_report(batch_results)
    
    print(f"   ✓ Report generated for {report['total_questions']} questions")
    print(f"   Average effectiveness: {report['average_scores']['effectiveness']}")
    print(f"   Quality distribution: {report['quality_distribution']}")
    
    if report['common_strengths']:
        print(f"   Most common strength: {report['common_strengths'][0][0]}")
    
    if report['common_weaknesses']:
        print(f"   Most common weakness: {report['common_weaknesses'][0][0]}")
    
    # Test 6: Edge cases
    print("\n6. Testing edge cases...")
    
    edge_cases = [
        ("", None),  # Empty question
        ("This is not a question", ScenarioType.VAGUE_STRESS),  # No question mark
        ("What? How? Why? When? Where?", ScenarioType.LOSS_OF_INTEREST),  # Multiple questions
        ("A" * 200, ScenarioType.NO_SUPPORT)  # Very long question
    ]
    
    for question, scenario_type in edge_cases:
        try:
            analysis = validator.validate_question_effectiveness(question, scenario_type)
            print(f"   ✓ Handled edge case: {len(question)} chars → {analysis.effectiveness_score:.2f}")
        except Exception as e:
            print(f"   ✗ Edge case failed: {e}")
            return False
    
    # Test 7: Scenario-specific validation
    print("\n7. Testing scenario-specific validation...")
    
    scenario_tests = {
        ScenarioType.LOSS_OF_INTEREST: "Is this change meaningful to you, or is it more about practical circumstances?",
        ScenarioType.LOSS_OF_LOVED_ONE: "How are you processing this grief emotionally?",
        ScenarioType.NO_SUPPORT: "Is this isolation causing you distress, or is it more about practical assistance?",
        ScenarioType.VAGUE_STRESS: "What specifically is contributing to that stress?",
        ScenarioType.SLEEP_ISSUES: "Is something on your mind keeping you awake, or might it be medical?"
    }
    
    for scenario_type, question in scenario_tests.items():
        analysis = validator.validate_question_effectiveness(question, scenario_type)
        print(f"   {scenario_type.value}: {analysis.targeting_score:.2f} targeting score")
        
        if analysis.targeting_score >= 0.5:
            print(f"     ✓ Good scenario targeting")
        else:
            print(f"     âš  Weak scenario targeting")
    
    print("\n✓ All QuestionEffectivenessValidator tests passed!")
    return True

if __name__ == "__main__":
    success = test_question_validator()
    sys.exit(0 if success else 1)