Spiritual_Health_Project / tests /unit /test_question_validator.py
DocUA's picture
feat: Complete prompt optimization system implementation
24214fc
#!/usr/bin/env python3
"""
Test script for QuestionEffectivenessValidator functionality.
"""
import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'src'))
from config.prompt_management.question_validator import QuestionEffectivenessValidator, QuestionQuality
from config.prompt_management.data_models import ScenarioType
def test_question_validator():
"""Test QuestionEffectivenessValidator functionality."""
print("Testing QuestionEffectivenessValidator...")
# Initialize validator
validator = QuestionEffectivenessValidator()
print("βœ“ QuestionEffectivenessValidator initialized")
# Test 1: Validate high-quality questions
print("\n1. Testing high-quality question validation...")
high_quality_questions = [
("You mentioned you can't garden anymore. Is that something that's been weighing on you emotionally, or is it more about time or circumstances?", ScenarioType.LOSS_OF_INTEREST),
("I'm sorry for your loss. How have you been coping with this? Is there anything that's been particularly difficult for you?", ScenarioType.LOSS_OF_LOVED_ONE),
("It sounds like you're managing a lot on your own. How is that affecting you? Is it more of a practical challenge, or is it weighing on you emotionally?", ScenarioType.NO_SUPPORT),
("I hear that things have been stressful. Can you tell me more about what's been causing that stress?", ScenarioType.VAGUE_STRESS),
("Sleep difficulties can be really challenging. Is there something specific on your mind that's keeping you awake, or do you think it might be related to your medical situation?", ScenarioType.SLEEP_ISSUES)
]
for question, scenario_type in high_quality_questions:
analysis = validator.validate_question_effectiveness(question, scenario_type)
print(f" Question: {question[:50]}...")
print(f" Score: {analysis.effectiveness_score:.2f} ({analysis.quality_level.value})")
print(f" Targeting: {analysis.targeting_score:.2f}, Empathy: {analysis.empathy_score:.2f}, Clarity: {analysis.clarity_score:.2f}")
if analysis.effectiveness_score >= 0.6:
print(f" βœ“ High quality achieved")
else:
print(f" ⚠ Lower than expected quality")
if analysis.strengths:
print(f" Strengths: {len(analysis.strengths)} identified")
print()
# Test 2: Validate poor-quality questions
print("2. Testing poor-quality question validation...")
poor_quality_questions = [
("How are you feeling?", ScenarioType.LOSS_OF_INTEREST),
("That's sad.", ScenarioType.LOSS_OF_LOVED_ONE),
("Okay.", ScenarioType.NO_SUPPORT),
("Tell me more", ScenarioType.VAGUE_STRESS),
("Are you sleeping well or not sleeping well or maybe sleeping okay but not great and what do you think about that situation with your sleep patterns?", ScenarioType.SLEEP_ISSUES)
]
for question, scenario_type in poor_quality_questions:
analysis = validator.validate_question_effectiveness(question, scenario_type)
print(f" Question: {question[:50]}...")
print(f" Score: {analysis.effectiveness_score:.2f} ({analysis.quality_level.value})")
if analysis.effectiveness_score < 0.5:
print(f" βœ“ Correctly identified as low quality")
else:
print(f" ⚠ Higher than expected quality")
if analysis.weaknesses:
print(f" Weaknesses: {analysis.weaknesses[:2]}")
if analysis.suggestions:
print(f" Suggestions: {analysis.suggestions[:2]}")
print()
# Test 3: Test component scoring
print("3. Testing component scoring...")
# Test targeting score
targeting_test = "Is that something that's been weighing on you emotionally, or is it more about circumstances?"
analysis = validator.validate_question_effectiveness(targeting_test, ScenarioType.LOSS_OF_INTEREST)
print(f" Targeting test: {analysis.targeting_score:.2f}")
# Test empathy score
empathy_test = "I'm sorry for your loss. I understand this must be very difficult for you."
analysis = validator.validate_question_effectiveness(empathy_test, ScenarioType.LOSS_OF_LOVED_ONE)
print(f" Empathy test: {analysis.empathy_score:.2f}")
# Test clarity score
clarity_test = "What specifically has been causing your sleep problems?"
analysis = validator.validate_question_effectiveness(clarity_test, ScenarioType.SLEEP_ISSUES)
print(f" Clarity test: {analysis.clarity_score:.2f}")
# Test 4: Batch validation
print("\n4. Testing batch validation...")
batch_questions = [
("You mentioned you can't garden anymore. Is that weighing on you emotionally?", ScenarioType.LOSS_OF_INTEREST),
("How are you coping with your loss?", ScenarioType.LOSS_OF_LOVED_ONE),
("What's causing your stress?", ScenarioType.VAGUE_STRESS)
]
batch_results = validator.batch_validate_questions(batch_questions)
print(f" βœ“ Batch validated {len(batch_results)} questions")
for i, result in enumerate(batch_results):
print(f" Question {i+1}: {result.effectiveness_score:.2f} ({result.quality_level.value})")
# Test 5: Generate effectiveness report
print("\n5. Testing effectiveness report generation...")
report = validator.generate_effectiveness_report(batch_results)
print(f" βœ“ Report generated for {report['total_questions']} questions")
print(f" Average effectiveness: {report['average_scores']['effectiveness']}")
print(f" Quality distribution: {report['quality_distribution']}")
if report['common_strengths']:
print(f" Most common strength: {report['common_strengths'][0][0]}")
if report['common_weaknesses']:
print(f" Most common weakness: {report['common_weaknesses'][0][0]}")
# Test 6: Edge cases
print("\n6. Testing edge cases...")
edge_cases = [
("", None), # Empty question
("This is not a question", ScenarioType.VAGUE_STRESS), # No question mark
("What? How? Why? When? Where?", ScenarioType.LOSS_OF_INTEREST), # Multiple questions
("A" * 200, ScenarioType.NO_SUPPORT) # Very long question
]
for question, scenario_type in edge_cases:
try:
analysis = validator.validate_question_effectiveness(question, scenario_type)
print(f" βœ“ Handled edge case: {len(question)} chars β†’ {analysis.effectiveness_score:.2f}")
except Exception as e:
print(f" βœ— Edge case failed: {e}")
return False
# Test 7: Scenario-specific validation
print("\n7. Testing scenario-specific validation...")
scenario_tests = {
ScenarioType.LOSS_OF_INTEREST: "Is this change meaningful to you, or is it more about practical circumstances?",
ScenarioType.LOSS_OF_LOVED_ONE: "How are you processing this grief emotionally?",
ScenarioType.NO_SUPPORT: "Is this isolation causing you distress, or is it more about practical assistance?",
ScenarioType.VAGUE_STRESS: "What specifically is contributing to that stress?",
ScenarioType.SLEEP_ISSUES: "Is something on your mind keeping you awake, or might it be medical?"
}
for scenario_type, question in scenario_tests.items():
analysis = validator.validate_question_effectiveness(question, scenario_type)
print(f" {scenario_type.value}: {analysis.targeting_score:.2f} targeting score")
if analysis.targeting_score >= 0.5:
print(f" βœ“ Good scenario targeting")
else:
print(f" ⚠ Weak scenario targeting")
print("\nβœ“ All QuestionEffectivenessValidator tests passed!")
return True
if __name__ == "__main__":
success = test_question_validator()
sys.exit(0 if success else 1)