#!/usr/bin/env python3 """ Test script for the pattern recognizer and error pattern analysis. Tests Task 4.4 implementation. """ import sys import os sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'src')) from config.prompt_management.pattern_recognizer import PatternRecognizer from config.prompt_management.feedback_system import FeedbackSystem from config.prompt_management.data_models import ( ErrorType, ErrorSubcategory, QuestionIssueType, ReferralProblemType, ScenarioType ) def test_pattern_recognizer_initialization(): """Test that the pattern recognizer initializes correctly.""" print("Testing pattern recognizer initialization...") # Test with default parameters recognizer = PatternRecognizer() assert recognizer.min_pattern_frequency == 3 assert recognizer.confidence_threshold == 0.7 assert hasattr(recognizer, 'analysis_strategies') assert hasattr(recognizer, 'suggestion_templates') # Test with custom parameters custom_recognizer = PatternRecognizer(min_pattern_frequency=5, confidence_threshold=0.8) assert custom_recognizer.min_pattern_frequency == 5 assert custom_recognizer.confidence_threshold == 0.8 print("✓ Pattern recognizer initializes correctly") return True def test_classification_error_pattern_analysis(): """Test pattern analysis for classification errors.""" print("Testing classification error pattern analysis...") recognizer = PatternRecognizer(min_pattern_frequency=2) # Create test classification errors test_errors = [] # Create multiple wrong classification errors for i in range(4): test_errors.append({ 'error_id': f'error_{i}', 'error_type': 'wrong_classification', 'subcategory': 'green_to_yellow', 'expected_category': 'YELLOW', 'actual_category': 'GREEN', 'message_content': f'I feel stressed about work {i}', 'reviewer_comments': f'Test comment {i}', 'confidence_level': 0.8 + (i * 0.05), 'timestamp': '2024-12-18T10:00:00', 'session_id': f'session_{i}', 'additional_context': {'scenario_type': 'vague_stress'} }) # Create severity misjudgment errors for i in range(3): test_errors.append({ 'error_id': f'severity_{i}', 'error_type': 'severity_misjudgment', 'subcategory': 'underestimated_distress', 'expected_category': 'RED', 'actual_category': 'YELLOW', 'message_content': f'I cannot go on like this {i}', 'reviewer_comments': f'Severe distress comment {i}', 'confidence_level': 0.9, 'timestamp': '2024-12-18T11:00:00', 'session_id': f'severity_session_{i}', 'additional_context': {} }) # Analyze patterns patterns = recognizer._analyze_classification_error_patterns(test_errors) # Verify patterns were identified assert len(patterns) > 0, "Should identify patterns in test data" # Check for wrong classification pattern wrong_classification_patterns = [p for p in patterns if 'wrong_classification' in p.pattern_type] assert len(wrong_classification_patterns) > 0, "Should identify wrong classification pattern" wrong_pattern = wrong_classification_patterns[0] assert wrong_pattern.frequency == 4, "Wrong classification pattern should have frequency 4" assert len(wrong_pattern.suggested_improvements) > 0, "Should have improvement suggestions" # Check for severity misjudgment pattern severity_patterns = [p for p in patterns if 'severity_misjudgment' in p.pattern_type] assert len(severity_patterns) > 0, "Should identify severity misjudgment pattern" severity_pattern = severity_patterns[0] assert severity_pattern.frequency == 3, "Severity pattern should have frequency 3" print(f"✓ Identified {len(patterns)} classification error patterns") for pattern in patterns[:3]: # Show first 3 patterns print(f" - {pattern.description} (confidence: {pattern.confidence_score:.2f})") return True def test_question_issue_pattern_analysis(): """Test pattern analysis for question issues.""" print("Testing question issue pattern analysis...") recognizer = PatternRecognizer(min_pattern_frequency=2) # Create test question issues test_questions = [] # Create inappropriate question issues for i in range(3): test_questions.append({ 'issue_id': f'question_{i}', 'issue_type': 'inappropriate_question', 'question_content': f'Why are you sad? {i}', 'scenario_type': 'loss_of_interest', 'reviewer_comments': f'Too direct question {i}', 'severity': 'medium', 'timestamp': '2024-12-18T12:00:00', 'session_id': f'question_session_{i}', 'suggested_improvement': f'Better question {i}' }) # Create wrong scenario targeting issues for i in range(2): test_questions.append({ 'issue_id': f'targeting_{i}', 'issue_type': 'wrong_scenario_targeting', 'question_content': f'How does that make you feel? {i}', 'scenario_type': 'vague_stress', 'reviewer_comments': f'Wrong targeting comment {i}', 'severity': 'high', 'timestamp': '2024-12-18T13:00:00', 'session_id': f'targeting_session_{i}', 'suggested_improvement': None }) # Analyze patterns patterns = recognizer._analyze_question_issue_patterns(test_questions) # Verify patterns were identified assert len(patterns) > 0, "Should identify question issue patterns" # Check for inappropriate question pattern inappropriate_patterns = [p for p in patterns if 'inappropriate_question' in p.pattern_type] assert len(inappropriate_patterns) > 0, "Should identify inappropriate question pattern" inappropriate_pattern = inappropriate_patterns[0] assert inappropriate_pattern.frequency == 3, "Inappropriate question pattern should have frequency 3" print(f"✓ Identified {len(patterns)} question issue patterns") for pattern in patterns: print(f" - {pattern.description} (confidence: {pattern.confidence_score:.2f})") return True def test_comprehensive_pattern_analysis(): """Test comprehensive pattern analysis across all feedback types.""" print("Testing comprehensive pattern analysis...") recognizer = PatternRecognizer(min_pattern_frequency=2) # Create mixed test data test_errors = [ { 'error_id': 'comp_error_1', 'error_type': 'wrong_classification', 'subcategory': 'green_to_yellow', 'expected_category': 'YELLOW', 'actual_category': 'GREEN', 'message_content': 'I feel overwhelmed', 'reviewer_comments': 'Clear distress missed', 'confidence_level': 0.9, 'timestamp': '2024-12-18T14:00:00', 'session_id': 'comp_session_1', 'additional_context': {} }, { 'error_id': 'comp_error_2', 'error_type': 'wrong_classification', 'subcategory': 'green_to_yellow', 'expected_category': 'YELLOW', 'actual_category': 'GREEN', 'message_content': 'Everything is falling apart', 'reviewer_comments': 'Obvious distress indicators', 'confidence_level': 0.95, 'timestamp': '2024-12-18T14:30:00', 'session_id': 'comp_session_2', 'additional_context': {} } ] test_questions = [ { 'issue_id': 'comp_question_1', 'issue_type': 'insensitive_language', 'question_content': 'What is wrong with you?', 'scenario_type': 'vague_stress', 'reviewer_comments': 'Harsh language', 'severity': 'high', 'timestamp': '2024-12-18T15:00:00', 'session_id': 'comp_session_1', # Same session as error 'suggested_improvement': 'Use gentler language' } ] test_referrals = [ { 'problem_id': 'comp_referral_1', 'problem_type': 'incomplete_summary', 'referral_content': 'Patient needs help.', 'reviewer_comments': 'Missing details', 'severity': 'medium', 'timestamp': '2024-12-18T16:00:00', 'session_id': 'comp_session_3', 'missing_fields': ['distress_indicators', 'urgency_level'] } ] # Analyze comprehensive patterns patterns = recognizer.analyze_comprehensive_patterns(test_errors, test_questions, test_referrals) # Verify patterns were identified assert len(patterns) > 0, "Should identify comprehensive patterns" # Check for cross-feedback patterns (same session with error and question) cross_patterns = [p for p in patterns if 'correlation' in p.pattern_type] # Note: May not always find correlation with small test data print(f"✓ Identified {len(patterns)} comprehensive patterns") for pattern in patterns[:5]: # Show first 5 patterns print(f" - {pattern.description}") if pattern.suggested_improvements: print(f" Suggestion: {pattern.suggested_improvements[0]}") return True def test_optimization_report_generation(): """Test optimization report generation.""" print("Testing optimization report generation...") recognizer = PatternRecognizer(min_pattern_frequency=1) # Create test patterns from config.prompt_management.data_models import ErrorPattern test_patterns = [ ErrorPattern( pattern_id="test_pattern_1", pattern_type="error_type_wrong_classification", description="Frequent wrong classification errors (5 occurrences)", frequency=5, affected_scenarios=[ScenarioType.VAGUE_STRESS], suggested_improvements=[ "Review classification criteria", "Add more training examples", "Improve decision boundaries" ], confidence_score=0.8 ), ErrorPattern( pattern_id="test_pattern_2", pattern_type="question_issue_inappropriate_question", description="Frequent inappropriate question issues (3 occurrences)", frequency=3, affected_scenarios=[ScenarioType.LOSS_OF_INTEREST], suggested_improvements=[ "Review question appropriateness", "Add sensitivity training" ], confidence_score=0.6 ) ] # Generate optimization report report = recognizer.generate_optimization_report(test_patterns) # Verify report structure required_fields = [ 'summary', 'total_patterns', 'recommendations', 'priority_actions', 'confidence_score', 'most_frequent_pattern', 'affected_scenarios', 'report_generated' ] for field in required_fields: assert field in report, f"Report missing required field: {field}" # Verify report content assert report['total_patterns'] == 2, "Should report correct number of patterns" assert len(report['recommendations']) > 0, "Should have recommendations" assert 0.0 <= report['confidence_score'] <= 1.0, "Confidence score should be valid" assert report['most_frequent_pattern']['frequency'] == 5, "Should identify most frequent pattern" print("✓ Optimization report generated successfully") print(f" - Total patterns: {report['total_patterns']}") print(f" - Confidence score: {report['confidence_score']:.2f}") print(f" - Top recommendation: {report['recommendations'][0] if report['recommendations'] else 'None'}") return True def test_feedback_system_integration(): """Test integration with feedback system.""" print("Testing feedback system integration...") # Create feedback system with pattern recognizer feedback_system = FeedbackSystem(storage_path=".verification_data/test_pattern_integration") # Record multiple similar errors to create patterns for i in range(4): feedback_system.record_classification_error( error_type=ErrorType.WRONG_CLASSIFICATION, subcategory=ErrorSubcategory.GREEN_TO_YELLOW, expected_category="YELLOW", actual_category="GREEN", message_content=f"I feel stressed and overwhelmed {i}", reviewer_comments=f"Clear distress indicators missed {i}", confidence_level=0.85 + (i * 0.02), session_id=f"integration_session_{i}", additional_context={"scenario_type": "vague_stress"} ) # Record question issues for i in range(3): feedback_system.record_question_issue( issue_type=QuestionIssueType.INAPPROPRIATE_QUESTION, question_content=f"What's wrong with you? {i}", scenario_type=ScenarioType.VAGUE_STRESS, reviewer_comments=f"Too harsh language {i}", severity="high", session_id=f"integration_session_{i}" ) # Analyze patterns through feedback system patterns = feedback_system.analyze_error_patterns(min_frequency=2) # Verify patterns were identified assert len(patterns) > 0, "Feedback system should identify patterns" # Generate optimization report report = feedback_system.generate_optimization_report() # Verify report assert report['total_patterns'] > 0, "Should have patterns in report" assert len(report['recommendations']) > 0, "Should have recommendations" print(f"✓ Feedback system integration works") print(f" - Patterns identified: {len(patterns)}") print(f" - Report confidence: {report['confidence_score']:.2f}") return True def main(): """Run all pattern recognizer tests.""" print("=" * 60) print("PATTERN RECOGNIZER TESTS") print("=" * 60) tests = [ test_pattern_recognizer_initialization, test_classification_error_pattern_analysis, test_question_issue_pattern_analysis, test_comprehensive_pattern_analysis, test_optimization_report_generation, test_feedback_system_integration ] passed = 0 failed = 0 for test in tests: try: print(f"\n{test.__name__.replace('_', ' ').title()}:") print("-" * 40) result = test() if result: passed += 1 print("✓ PASSED") else: failed += 1 print("✗ FAILED") except Exception as e: failed += 1 print(f"✗ FAILED: {str(e)}") print("\n" + "=" * 60) print(f"RESULTS: {passed} passed, {failed} failed") print("=" * 60) if failed == 0: print("🎉 All pattern recognizer tests passed!") print("\n**Task 4.4: Error Pattern Analysis**") print("✓ COMPLETED: PatternRecognizer for identifying common error types") print("✓ COMPLETED: Automated improvement suggestion generation") print("✓ COMPLETED: Feedback aggregation and reporting") print("✓ COMPLETED: Integration with FeedbackSystem") return True else: print("❌ Some tests failed. Please check the implementation.") return False if __name__ == "__main__": success = main() sys.exit(0 if success else 1)