#!/usr/bin/env python3 """ Test script for QuestionEffectivenessValidator functionality. """ import sys import os sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'src')) from config.prompt_management.question_validator import QuestionEffectivenessValidator, QuestionQuality from config.prompt_management.data_models import ScenarioType def test_question_validator(): """Test QuestionEffectivenessValidator functionality.""" print("Testing QuestionEffectivenessValidator...") # Initialize validator validator = QuestionEffectivenessValidator() print("✓ QuestionEffectivenessValidator initialized") # Test 1: Validate high-quality questions print("\n1. Testing high-quality question validation...") high_quality_questions = [ ("You mentioned you can't garden anymore. Is that something that's been weighing on you emotionally, or is it more about time or circumstances?", ScenarioType.LOSS_OF_INTEREST), ("I'm sorry for your loss. How have you been coping with this? Is there anything that's been particularly difficult for you?", ScenarioType.LOSS_OF_LOVED_ONE), ("It sounds like you're managing a lot on your own. How is that affecting you? Is it more of a practical challenge, or is it weighing on you emotionally?", ScenarioType.NO_SUPPORT), ("I hear that things have been stressful. Can you tell me more about what's been causing that stress?", ScenarioType.VAGUE_STRESS), ("Sleep difficulties can be really challenging. Is there something specific on your mind that's keeping you awake, or do you think it might be related to your medical situation?", ScenarioType.SLEEP_ISSUES) ] for question, scenario_type in high_quality_questions: analysis = validator.validate_question_effectiveness(question, scenario_type) print(f" Question: {question[:50]}...") print(f" Score: {analysis.effectiveness_score:.2f} ({analysis.quality_level.value})") print(f" Targeting: {analysis.targeting_score:.2f}, Empathy: {analysis.empathy_score:.2f}, Clarity: {analysis.clarity_score:.2f}") if analysis.effectiveness_score >= 0.6: print(f" ✓ High quality achieved") else: print(f" ⚠ Lower than expected quality") if analysis.strengths: print(f" Strengths: {len(analysis.strengths)} identified") print() # Test 2: Validate poor-quality questions print("2. Testing poor-quality question validation...") poor_quality_questions = [ ("How are you feeling?", ScenarioType.LOSS_OF_INTEREST), ("That's sad.", ScenarioType.LOSS_OF_LOVED_ONE), ("Okay.", ScenarioType.NO_SUPPORT), ("Tell me more", ScenarioType.VAGUE_STRESS), ("Are you sleeping well or not sleeping well or maybe sleeping okay but not great and what do you think about that situation with your sleep patterns?", ScenarioType.SLEEP_ISSUES) ] for question, scenario_type in poor_quality_questions: analysis = validator.validate_question_effectiveness(question, scenario_type) print(f" Question: {question[:50]}...") print(f" Score: {analysis.effectiveness_score:.2f} ({analysis.quality_level.value})") if analysis.effectiveness_score < 0.5: print(f" ✓ Correctly identified as low quality") else: print(f" ⚠ Higher than expected quality") if analysis.weaknesses: print(f" Weaknesses: {analysis.weaknesses[:2]}") if analysis.suggestions: print(f" Suggestions: {analysis.suggestions[:2]}") print() # Test 3: Test component scoring print("3. Testing component scoring...") # Test targeting score targeting_test = "Is that something that's been weighing on you emotionally, or is it more about circumstances?" analysis = validator.validate_question_effectiveness(targeting_test, ScenarioType.LOSS_OF_INTEREST) print(f" Targeting test: {analysis.targeting_score:.2f}") # Test empathy score empathy_test = "I'm sorry for your loss. I understand this must be very difficult for you." analysis = validator.validate_question_effectiveness(empathy_test, ScenarioType.LOSS_OF_LOVED_ONE) print(f" Empathy test: {analysis.empathy_score:.2f}") # Test clarity score clarity_test = "What specifically has been causing your sleep problems?" analysis = validator.validate_question_effectiveness(clarity_test, ScenarioType.SLEEP_ISSUES) print(f" Clarity test: {analysis.clarity_score:.2f}") # Test 4: Batch validation print("\n4. Testing batch validation...") batch_questions = [ ("You mentioned you can't garden anymore. Is that weighing on you emotionally?", ScenarioType.LOSS_OF_INTEREST), ("How are you coping with your loss?", ScenarioType.LOSS_OF_LOVED_ONE), ("What's causing your stress?", ScenarioType.VAGUE_STRESS) ] batch_results = validator.batch_validate_questions(batch_questions) print(f" ✓ Batch validated {len(batch_results)} questions") for i, result in enumerate(batch_results): print(f" Question {i+1}: {result.effectiveness_score:.2f} ({result.quality_level.value})") # Test 5: Generate effectiveness report print("\n5. Testing effectiveness report generation...") report = validator.generate_effectiveness_report(batch_results) print(f" ✓ Report generated for {report['total_questions']} questions") print(f" Average effectiveness: {report['average_scores']['effectiveness']}") print(f" Quality distribution: {report['quality_distribution']}") if report['common_strengths']: print(f" Most common strength: {report['common_strengths'][0][0]}") if report['common_weaknesses']: print(f" Most common weakness: {report['common_weaknesses'][0][0]}") # Test 6: Edge cases print("\n6. Testing edge cases...") edge_cases = [ ("", None), # Empty question ("This is not a question", ScenarioType.VAGUE_STRESS), # No question mark ("What? How? Why? When? Where?", ScenarioType.LOSS_OF_INTEREST), # Multiple questions ("A" * 200, ScenarioType.NO_SUPPORT) # Very long question ] for question, scenario_type in edge_cases: try: analysis = validator.validate_question_effectiveness(question, scenario_type) print(f" ✓ Handled edge case: {len(question)} chars → {analysis.effectiveness_score:.2f}") except Exception as e: print(f" ✗ Edge case failed: {e}") return False # Test 7: Scenario-specific validation print("\n7. Testing scenario-specific validation...") scenario_tests = { ScenarioType.LOSS_OF_INTEREST: "Is this change meaningful to you, or is it more about practical circumstances?", ScenarioType.LOSS_OF_LOVED_ONE: "How are you processing this grief emotionally?", ScenarioType.NO_SUPPORT: "Is this isolation causing you distress, or is it more about practical assistance?", ScenarioType.VAGUE_STRESS: "What specifically is contributing to that stress?", ScenarioType.SLEEP_ISSUES: "Is something on your mind keeping you awake, or might it be medical?" } for scenario_type, question in scenario_tests.items(): analysis = validator.validate_question_effectiveness(question, scenario_type) print(f" {scenario_type.value}: {analysis.targeting_score:.2f} targeting score") if analysis.targeting_score >= 0.5: print(f" ✓ Good scenario targeting") else: print(f" ⚠ Weak scenario targeting") print("\n✓ All QuestionEffectivenessValidator tests passed!") return True if __name__ == "__main__": success = test_question_validator() sys.exit(0 if success else 1)