Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Test script for QuestionEffectivenessValidator functionality. | |
| """ | |
| import sys | |
| import os | |
| sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'src')) | |
| from config.prompt_management.question_validator import QuestionEffectivenessValidator, QuestionQuality | |
| from config.prompt_management.data_models import ScenarioType | |
| def test_question_validator(): | |
| """Test QuestionEffectivenessValidator functionality.""" | |
| print("Testing QuestionEffectivenessValidator...") | |
| # Initialize validator | |
| validator = QuestionEffectivenessValidator() | |
| print("β QuestionEffectivenessValidator initialized") | |
| # Test 1: Validate high-quality questions | |
| print("\n1. Testing high-quality question validation...") | |
| high_quality_questions = [ | |
| ("You mentioned you can't garden anymore. Is that something that's been weighing on you emotionally, or is it more about time or circumstances?", ScenarioType.LOSS_OF_INTEREST), | |
| ("I'm sorry for your loss. How have you been coping with this? Is there anything that's been particularly difficult for you?", ScenarioType.LOSS_OF_LOVED_ONE), | |
| ("It sounds like you're managing a lot on your own. How is that affecting you? Is it more of a practical challenge, or is it weighing on you emotionally?", ScenarioType.NO_SUPPORT), | |
| ("I hear that things have been stressful. Can you tell me more about what's been causing that stress?", ScenarioType.VAGUE_STRESS), | |
| ("Sleep difficulties can be really challenging. Is there something specific on your mind that's keeping you awake, or do you think it might be related to your medical situation?", ScenarioType.SLEEP_ISSUES) | |
| ] | |
| for question, scenario_type in high_quality_questions: | |
| analysis = validator.validate_question_effectiveness(question, scenario_type) | |
| print(f" Question: {question[:50]}...") | |
| print(f" Score: {analysis.effectiveness_score:.2f} ({analysis.quality_level.value})") | |
| print(f" Targeting: {analysis.targeting_score:.2f}, Empathy: {analysis.empathy_score:.2f}, Clarity: {analysis.clarity_score:.2f}") | |
| if analysis.effectiveness_score >= 0.6: | |
| print(f" β High quality achieved") | |
| else: | |
| print(f" β Lower than expected quality") | |
| if analysis.strengths: | |
| print(f" Strengths: {len(analysis.strengths)} identified") | |
| print() | |
| # Test 2: Validate poor-quality questions | |
| print("2. Testing poor-quality question validation...") | |
| poor_quality_questions = [ | |
| ("How are you feeling?", ScenarioType.LOSS_OF_INTEREST), | |
| ("That's sad.", ScenarioType.LOSS_OF_LOVED_ONE), | |
| ("Okay.", ScenarioType.NO_SUPPORT), | |
| ("Tell me more", ScenarioType.VAGUE_STRESS), | |
| ("Are you sleeping well or not sleeping well or maybe sleeping okay but not great and what do you think about that situation with your sleep patterns?", ScenarioType.SLEEP_ISSUES) | |
| ] | |
| for question, scenario_type in poor_quality_questions: | |
| analysis = validator.validate_question_effectiveness(question, scenario_type) | |
| print(f" Question: {question[:50]}...") | |
| print(f" Score: {analysis.effectiveness_score:.2f} ({analysis.quality_level.value})") | |
| if analysis.effectiveness_score < 0.5: | |
| print(f" β Correctly identified as low quality") | |
| else: | |
| print(f" β Higher than expected quality") | |
| if analysis.weaknesses: | |
| print(f" Weaknesses: {analysis.weaknesses[:2]}") | |
| if analysis.suggestions: | |
| print(f" Suggestions: {analysis.suggestions[:2]}") | |
| print() | |
| # Test 3: Test component scoring | |
| print("3. Testing component scoring...") | |
| # Test targeting score | |
| targeting_test = "Is that something that's been weighing on you emotionally, or is it more about circumstances?" | |
| analysis = validator.validate_question_effectiveness(targeting_test, ScenarioType.LOSS_OF_INTEREST) | |
| print(f" Targeting test: {analysis.targeting_score:.2f}") | |
| # Test empathy score | |
| empathy_test = "I'm sorry for your loss. I understand this must be very difficult for you." | |
| analysis = validator.validate_question_effectiveness(empathy_test, ScenarioType.LOSS_OF_LOVED_ONE) | |
| print(f" Empathy test: {analysis.empathy_score:.2f}") | |
| # Test clarity score | |
| clarity_test = "What specifically has been causing your sleep problems?" | |
| analysis = validator.validate_question_effectiveness(clarity_test, ScenarioType.SLEEP_ISSUES) | |
| print(f" Clarity test: {analysis.clarity_score:.2f}") | |
| # Test 4: Batch validation | |
| print("\n4. Testing batch validation...") | |
| batch_questions = [ | |
| ("You mentioned you can't garden anymore. Is that weighing on you emotionally?", ScenarioType.LOSS_OF_INTEREST), | |
| ("How are you coping with your loss?", ScenarioType.LOSS_OF_LOVED_ONE), | |
| ("What's causing your stress?", ScenarioType.VAGUE_STRESS) | |
| ] | |
| batch_results = validator.batch_validate_questions(batch_questions) | |
| print(f" β Batch validated {len(batch_results)} questions") | |
| for i, result in enumerate(batch_results): | |
| print(f" Question {i+1}: {result.effectiveness_score:.2f} ({result.quality_level.value})") | |
| # Test 5: Generate effectiveness report | |
| print("\n5. Testing effectiveness report generation...") | |
| report = validator.generate_effectiveness_report(batch_results) | |
| print(f" β Report generated for {report['total_questions']} questions") | |
| print(f" Average effectiveness: {report['average_scores']['effectiveness']}") | |
| print(f" Quality distribution: {report['quality_distribution']}") | |
| if report['common_strengths']: | |
| print(f" Most common strength: {report['common_strengths'][0][0]}") | |
| if report['common_weaknesses']: | |
| print(f" Most common weakness: {report['common_weaknesses'][0][0]}") | |
| # Test 6: Edge cases | |
| print("\n6. Testing edge cases...") | |
| edge_cases = [ | |
| ("", None), # Empty question | |
| ("This is not a question", ScenarioType.VAGUE_STRESS), # No question mark | |
| ("What? How? Why? When? Where?", ScenarioType.LOSS_OF_INTEREST), # Multiple questions | |
| ("A" * 200, ScenarioType.NO_SUPPORT) # Very long question | |
| ] | |
| for question, scenario_type in edge_cases: | |
| try: | |
| analysis = validator.validate_question_effectiveness(question, scenario_type) | |
| print(f" β Handled edge case: {len(question)} chars β {analysis.effectiveness_score:.2f}") | |
| except Exception as e: | |
| print(f" β Edge case failed: {e}") | |
| return False | |
| # Test 7: Scenario-specific validation | |
| print("\n7. Testing scenario-specific validation...") | |
| scenario_tests = { | |
| ScenarioType.LOSS_OF_INTEREST: "Is this change meaningful to you, or is it more about practical circumstances?", | |
| ScenarioType.LOSS_OF_LOVED_ONE: "How are you processing this grief emotionally?", | |
| ScenarioType.NO_SUPPORT: "Is this isolation causing you distress, or is it more about practical assistance?", | |
| ScenarioType.VAGUE_STRESS: "What specifically is contributing to that stress?", | |
| ScenarioType.SLEEP_ISSUES: "Is something on your mind keeping you awake, or might it be medical?" | |
| } | |
| for scenario_type, question in scenario_tests.items(): | |
| analysis = validator.validate_question_effectiveness(question, scenario_type) | |
| print(f" {scenario_type.value}: {analysis.targeting_score:.2f} targeting score") | |
| if analysis.targeting_score >= 0.5: | |
| print(f" β Good scenario targeting") | |
| else: | |
| print(f" β Weak scenario targeting") | |
| print("\nβ All QuestionEffectivenessValidator tests passed!") | |
| return True | |
| if __name__ == "__main__": | |
| success = test_question_validator() | |
| sys.exit(0 if success else 1) |