Spaces:
Sleeping
Sleeping
File size: 8,088 Bytes
24214fc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
#!/usr/bin/env python3
"""
Test script for QuestionEffectivenessValidator functionality.
"""
import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'src'))
from config.prompt_management.question_validator import QuestionEffectivenessValidator, QuestionQuality
from config.prompt_management.data_models import ScenarioType
def test_question_validator():
"""Test QuestionEffectivenessValidator functionality."""
print("Testing QuestionEffectivenessValidator...")
# Initialize validator
validator = QuestionEffectivenessValidator()
print("✓ QuestionEffectivenessValidator initialized")
# Test 1: Validate high-quality questions
print("\n1. Testing high-quality question validation...")
high_quality_questions = [
("You mentioned you can't garden anymore. Is that something that's been weighing on you emotionally, or is it more about time or circumstances?", ScenarioType.LOSS_OF_INTEREST),
("I'm sorry for your loss. How have you been coping with this? Is there anything that's been particularly difficult for you?", ScenarioType.LOSS_OF_LOVED_ONE),
("It sounds like you're managing a lot on your own. How is that affecting you? Is it more of a practical challenge, or is it weighing on you emotionally?", ScenarioType.NO_SUPPORT),
("I hear that things have been stressful. Can you tell me more about what's been causing that stress?", ScenarioType.VAGUE_STRESS),
("Sleep difficulties can be really challenging. Is there something specific on your mind that's keeping you awake, or do you think it might be related to your medical situation?", ScenarioType.SLEEP_ISSUES)
]
for question, scenario_type in high_quality_questions:
analysis = validator.validate_question_effectiveness(question, scenario_type)
print(f" Question: {question[:50]}...")
print(f" Score: {analysis.effectiveness_score:.2f} ({analysis.quality_level.value})")
print(f" Targeting: {analysis.targeting_score:.2f}, Empathy: {analysis.empathy_score:.2f}, Clarity: {analysis.clarity_score:.2f}")
if analysis.effectiveness_score >= 0.6:
print(f" ✓ High quality achieved")
else:
print(f" âš Lower than expected quality")
if analysis.strengths:
print(f" Strengths: {len(analysis.strengths)} identified")
print()
# Test 2: Validate poor-quality questions
print("2. Testing poor-quality question validation...")
poor_quality_questions = [
("How are you feeling?", ScenarioType.LOSS_OF_INTEREST),
("That's sad.", ScenarioType.LOSS_OF_LOVED_ONE),
("Okay.", ScenarioType.NO_SUPPORT),
("Tell me more", ScenarioType.VAGUE_STRESS),
("Are you sleeping well or not sleeping well or maybe sleeping okay but not great and what do you think about that situation with your sleep patterns?", ScenarioType.SLEEP_ISSUES)
]
for question, scenario_type in poor_quality_questions:
analysis = validator.validate_question_effectiveness(question, scenario_type)
print(f" Question: {question[:50]}...")
print(f" Score: {analysis.effectiveness_score:.2f} ({analysis.quality_level.value})")
if analysis.effectiveness_score < 0.5:
print(f" ✓ Correctly identified as low quality")
else:
print(f" âš Higher than expected quality")
if analysis.weaknesses:
print(f" Weaknesses: {analysis.weaknesses[:2]}")
if analysis.suggestions:
print(f" Suggestions: {analysis.suggestions[:2]}")
print()
# Test 3: Test component scoring
print("3. Testing component scoring...")
# Test targeting score
targeting_test = "Is that something that's been weighing on you emotionally, or is it more about circumstances?"
analysis = validator.validate_question_effectiveness(targeting_test, ScenarioType.LOSS_OF_INTEREST)
print(f" Targeting test: {analysis.targeting_score:.2f}")
# Test empathy score
empathy_test = "I'm sorry for your loss. I understand this must be very difficult for you."
analysis = validator.validate_question_effectiveness(empathy_test, ScenarioType.LOSS_OF_LOVED_ONE)
print(f" Empathy test: {analysis.empathy_score:.2f}")
# Test clarity score
clarity_test = "What specifically has been causing your sleep problems?"
analysis = validator.validate_question_effectiveness(clarity_test, ScenarioType.SLEEP_ISSUES)
print(f" Clarity test: {analysis.clarity_score:.2f}")
# Test 4: Batch validation
print("\n4. Testing batch validation...")
batch_questions = [
("You mentioned you can't garden anymore. Is that weighing on you emotionally?", ScenarioType.LOSS_OF_INTEREST),
("How are you coping with your loss?", ScenarioType.LOSS_OF_LOVED_ONE),
("What's causing your stress?", ScenarioType.VAGUE_STRESS)
]
batch_results = validator.batch_validate_questions(batch_questions)
print(f" ✓ Batch validated {len(batch_results)} questions")
for i, result in enumerate(batch_results):
print(f" Question {i+1}: {result.effectiveness_score:.2f} ({result.quality_level.value})")
# Test 5: Generate effectiveness report
print("\n5. Testing effectiveness report generation...")
report = validator.generate_effectiveness_report(batch_results)
print(f" ✓ Report generated for {report['total_questions']} questions")
print(f" Average effectiveness: {report['average_scores']['effectiveness']}")
print(f" Quality distribution: {report['quality_distribution']}")
if report['common_strengths']:
print(f" Most common strength: {report['common_strengths'][0][0]}")
if report['common_weaknesses']:
print(f" Most common weakness: {report['common_weaknesses'][0][0]}")
# Test 6: Edge cases
print("\n6. Testing edge cases...")
edge_cases = [
("", None), # Empty question
("This is not a question", ScenarioType.VAGUE_STRESS), # No question mark
("What? How? Why? When? Where?", ScenarioType.LOSS_OF_INTEREST), # Multiple questions
("A" * 200, ScenarioType.NO_SUPPORT) # Very long question
]
for question, scenario_type in edge_cases:
try:
analysis = validator.validate_question_effectiveness(question, scenario_type)
print(f" ✓ Handled edge case: {len(question)} chars → {analysis.effectiveness_score:.2f}")
except Exception as e:
print(f" ✗ Edge case failed: {e}")
return False
# Test 7: Scenario-specific validation
print("\n7. Testing scenario-specific validation...")
scenario_tests = {
ScenarioType.LOSS_OF_INTEREST: "Is this change meaningful to you, or is it more about practical circumstances?",
ScenarioType.LOSS_OF_LOVED_ONE: "How are you processing this grief emotionally?",
ScenarioType.NO_SUPPORT: "Is this isolation causing you distress, or is it more about practical assistance?",
ScenarioType.VAGUE_STRESS: "What specifically is contributing to that stress?",
ScenarioType.SLEEP_ISSUES: "Is something on your mind keeping you awake, or might it be medical?"
}
for scenario_type, question in scenario_tests.items():
analysis = validator.validate_question_effectiveness(question, scenario_type)
print(f" {scenario_type.value}: {analysis.targeting_score:.2f} targeting score")
if analysis.targeting_score >= 0.5:
print(f" ✓ Good scenario targeting")
else:
print(f" âš Weak scenario targeting")
print("\n✓ All QuestionEffectivenessValidator tests passed!")
return True
if __name__ == "__main__":
success = test_question_validator()
sys.exit(0 if success else 1) |