Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Test script for the pattern recognizer and error pattern analysis. | |
| Tests Task 4.4 implementation. | |
| """ | |
| import sys | |
| import os | |
| sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'src')) | |
| from config.prompt_management.pattern_recognizer import PatternRecognizer | |
| from config.prompt_management.feedback_system import FeedbackSystem | |
| from config.prompt_management.data_models import ( | |
| ErrorType, ErrorSubcategory, QuestionIssueType, ReferralProblemType, ScenarioType | |
| ) | |
| def test_pattern_recognizer_initialization(): | |
| """Test that the pattern recognizer initializes correctly.""" | |
| print("Testing pattern recognizer initialization...") | |
| # Test with default parameters | |
| recognizer = PatternRecognizer() | |
| assert recognizer.min_pattern_frequency == 3 | |
| assert recognizer.confidence_threshold == 0.7 | |
| assert hasattr(recognizer, 'analysis_strategies') | |
| assert hasattr(recognizer, 'suggestion_templates') | |
| # Test with custom parameters | |
| custom_recognizer = PatternRecognizer(min_pattern_frequency=5, confidence_threshold=0.8) | |
| assert custom_recognizer.min_pattern_frequency == 5 | |
| assert custom_recognizer.confidence_threshold == 0.8 | |
| print("β Pattern recognizer initializes correctly") | |
| return True | |
| def test_classification_error_pattern_analysis(): | |
| """Test pattern analysis for classification errors.""" | |
| print("Testing classification error pattern analysis...") | |
| recognizer = PatternRecognizer(min_pattern_frequency=2) | |
| # Create test classification errors | |
| test_errors = [] | |
| # Create multiple wrong classification errors | |
| for i in range(4): | |
| test_errors.append({ | |
| 'error_id': f'error_{i}', | |
| 'error_type': 'wrong_classification', | |
| 'subcategory': 'green_to_yellow', | |
| 'expected_category': 'YELLOW', | |
| 'actual_category': 'GREEN', | |
| 'message_content': f'I feel stressed about work {i}', | |
| 'reviewer_comments': f'Test comment {i}', | |
| 'confidence_level': 0.8 + (i * 0.05), | |
| 'timestamp': '2024-12-18T10:00:00', | |
| 'session_id': f'session_{i}', | |
| 'additional_context': {'scenario_type': 'vague_stress'} | |
| }) | |
| # Create severity misjudgment errors | |
| for i in range(3): | |
| test_errors.append({ | |
| 'error_id': f'severity_{i}', | |
| 'error_type': 'severity_misjudgment', | |
| 'subcategory': 'underestimated_distress', | |
| 'expected_category': 'RED', | |
| 'actual_category': 'YELLOW', | |
| 'message_content': f'I cannot go on like this {i}', | |
| 'reviewer_comments': f'Severe distress comment {i}', | |
| 'confidence_level': 0.9, | |
| 'timestamp': '2024-12-18T11:00:00', | |
| 'session_id': f'severity_session_{i}', | |
| 'additional_context': {} | |
| }) | |
| # Analyze patterns | |
| patterns = recognizer._analyze_classification_error_patterns(test_errors) | |
| # Verify patterns were identified | |
| assert len(patterns) > 0, "Should identify patterns in test data" | |
| # Check for wrong classification pattern | |
| wrong_classification_patterns = [p for p in patterns if 'wrong_classification' in p.pattern_type] | |
| assert len(wrong_classification_patterns) > 0, "Should identify wrong classification pattern" | |
| wrong_pattern = wrong_classification_patterns[0] | |
| assert wrong_pattern.frequency == 4, "Wrong classification pattern should have frequency 4" | |
| assert len(wrong_pattern.suggested_improvements) > 0, "Should have improvement suggestions" | |
| # Check for severity misjudgment pattern | |
| severity_patterns = [p for p in patterns if 'severity_misjudgment' in p.pattern_type] | |
| assert len(severity_patterns) > 0, "Should identify severity misjudgment pattern" | |
| severity_pattern = severity_patterns[0] | |
| assert severity_pattern.frequency == 3, "Severity pattern should have frequency 3" | |
| print(f"β Identified {len(patterns)} classification error patterns") | |
| for pattern in patterns[:3]: # Show first 3 patterns | |
| print(f" - {pattern.description} (confidence: {pattern.confidence_score:.2f})") | |
| return True | |
| def test_question_issue_pattern_analysis(): | |
| """Test pattern analysis for question issues.""" | |
| print("Testing question issue pattern analysis...") | |
| recognizer = PatternRecognizer(min_pattern_frequency=2) | |
| # Create test question issues | |
| test_questions = [] | |
| # Create inappropriate question issues | |
| for i in range(3): | |
| test_questions.append({ | |
| 'issue_id': f'question_{i}', | |
| 'issue_type': 'inappropriate_question', | |
| 'question_content': f'Why are you sad? {i}', | |
| 'scenario_type': 'loss_of_interest', | |
| 'reviewer_comments': f'Too direct question {i}', | |
| 'severity': 'medium', | |
| 'timestamp': '2024-12-18T12:00:00', | |
| 'session_id': f'question_session_{i}', | |
| 'suggested_improvement': f'Better question {i}' | |
| }) | |
| # Create wrong scenario targeting issues | |
| for i in range(2): | |
| test_questions.append({ | |
| 'issue_id': f'targeting_{i}', | |
| 'issue_type': 'wrong_scenario_targeting', | |
| 'question_content': f'How does that make you feel? {i}', | |
| 'scenario_type': 'vague_stress', | |
| 'reviewer_comments': f'Wrong targeting comment {i}', | |
| 'severity': 'high', | |
| 'timestamp': '2024-12-18T13:00:00', | |
| 'session_id': f'targeting_session_{i}', | |
| 'suggested_improvement': None | |
| }) | |
| # Analyze patterns | |
| patterns = recognizer._analyze_question_issue_patterns(test_questions) | |
| # Verify patterns were identified | |
| assert len(patterns) > 0, "Should identify question issue patterns" | |
| # Check for inappropriate question pattern | |
| inappropriate_patterns = [p for p in patterns if 'inappropriate_question' in p.pattern_type] | |
| assert len(inappropriate_patterns) > 0, "Should identify inappropriate question pattern" | |
| inappropriate_pattern = inappropriate_patterns[0] | |
| assert inappropriate_pattern.frequency == 3, "Inappropriate question pattern should have frequency 3" | |
| print(f"β Identified {len(patterns)} question issue patterns") | |
| for pattern in patterns: | |
| print(f" - {pattern.description} (confidence: {pattern.confidence_score:.2f})") | |
| return True | |
| def test_comprehensive_pattern_analysis(): | |
| """Test comprehensive pattern analysis across all feedback types.""" | |
| print("Testing comprehensive pattern analysis...") | |
| recognizer = PatternRecognizer(min_pattern_frequency=2) | |
| # Create mixed test data | |
| test_errors = [ | |
| { | |
| 'error_id': 'comp_error_1', | |
| 'error_type': 'wrong_classification', | |
| 'subcategory': 'green_to_yellow', | |
| 'expected_category': 'YELLOW', | |
| 'actual_category': 'GREEN', | |
| 'message_content': 'I feel overwhelmed', | |
| 'reviewer_comments': 'Clear distress missed', | |
| 'confidence_level': 0.9, | |
| 'timestamp': '2024-12-18T14:00:00', | |
| 'session_id': 'comp_session_1', | |
| 'additional_context': {} | |
| }, | |
| { | |
| 'error_id': 'comp_error_2', | |
| 'error_type': 'wrong_classification', | |
| 'subcategory': 'green_to_yellow', | |
| 'expected_category': 'YELLOW', | |
| 'actual_category': 'GREEN', | |
| 'message_content': 'Everything is falling apart', | |
| 'reviewer_comments': 'Obvious distress indicators', | |
| 'confidence_level': 0.95, | |
| 'timestamp': '2024-12-18T14:30:00', | |
| 'session_id': 'comp_session_2', | |
| 'additional_context': {} | |
| } | |
| ] | |
| test_questions = [ | |
| { | |
| 'issue_id': 'comp_question_1', | |
| 'issue_type': 'insensitive_language', | |
| 'question_content': 'What is wrong with you?', | |
| 'scenario_type': 'vague_stress', | |
| 'reviewer_comments': 'Harsh language', | |
| 'severity': 'high', | |
| 'timestamp': '2024-12-18T15:00:00', | |
| 'session_id': 'comp_session_1', # Same session as error | |
| 'suggested_improvement': 'Use gentler language' | |
| } | |
| ] | |
| test_referrals = [ | |
| { | |
| 'problem_id': 'comp_referral_1', | |
| 'problem_type': 'incomplete_summary', | |
| 'referral_content': 'Patient needs help.', | |
| 'reviewer_comments': 'Missing details', | |
| 'severity': 'medium', | |
| 'timestamp': '2024-12-18T16:00:00', | |
| 'session_id': 'comp_session_3', | |
| 'missing_fields': ['distress_indicators', 'urgency_level'] | |
| } | |
| ] | |
| # Analyze comprehensive patterns | |
| patterns = recognizer.analyze_comprehensive_patterns(test_errors, test_questions, test_referrals) | |
| # Verify patterns were identified | |
| assert len(patterns) > 0, "Should identify comprehensive patterns" | |
| # Check for cross-feedback patterns (same session with error and question) | |
| cross_patterns = [p for p in patterns if 'correlation' in p.pattern_type] | |
| # Note: May not always find correlation with small test data | |
| print(f"β Identified {len(patterns)} comprehensive patterns") | |
| for pattern in patterns[:5]: # Show first 5 patterns | |
| print(f" - {pattern.description}") | |
| if pattern.suggested_improvements: | |
| print(f" Suggestion: {pattern.suggested_improvements[0]}") | |
| return True | |
| def test_optimization_report_generation(): | |
| """Test optimization report generation.""" | |
| print("Testing optimization report generation...") | |
| recognizer = PatternRecognizer(min_pattern_frequency=1) | |
| # Create test patterns | |
| from config.prompt_management.data_models import ErrorPattern | |
| test_patterns = [ | |
| ErrorPattern( | |
| pattern_id="test_pattern_1", | |
| pattern_type="error_type_wrong_classification", | |
| description="Frequent wrong classification errors (5 occurrences)", | |
| frequency=5, | |
| affected_scenarios=[ScenarioType.VAGUE_STRESS], | |
| suggested_improvements=[ | |
| "Review classification criteria", | |
| "Add more training examples", | |
| "Improve decision boundaries" | |
| ], | |
| confidence_score=0.8 | |
| ), | |
| ErrorPattern( | |
| pattern_id="test_pattern_2", | |
| pattern_type="question_issue_inappropriate_question", | |
| description="Frequent inappropriate question issues (3 occurrences)", | |
| frequency=3, | |
| affected_scenarios=[ScenarioType.LOSS_OF_INTEREST], | |
| suggested_improvements=[ | |
| "Review question appropriateness", | |
| "Add sensitivity training" | |
| ], | |
| confidence_score=0.6 | |
| ) | |
| ] | |
| # Generate optimization report | |
| report = recognizer.generate_optimization_report(test_patterns) | |
| # Verify report structure | |
| required_fields = [ | |
| 'summary', 'total_patterns', 'recommendations', 'priority_actions', | |
| 'confidence_score', 'most_frequent_pattern', 'affected_scenarios', | |
| 'report_generated' | |
| ] | |
| for field in required_fields: | |
| assert field in report, f"Report missing required field: {field}" | |
| # Verify report content | |
| assert report['total_patterns'] == 2, "Should report correct number of patterns" | |
| assert len(report['recommendations']) > 0, "Should have recommendations" | |
| assert 0.0 <= report['confidence_score'] <= 1.0, "Confidence score should be valid" | |
| assert report['most_frequent_pattern']['frequency'] == 5, "Should identify most frequent pattern" | |
| print("β Optimization report generated successfully") | |
| print(f" - Total patterns: {report['total_patterns']}") | |
| print(f" - Confidence score: {report['confidence_score']:.2f}") | |
| print(f" - Top recommendation: {report['recommendations'][0] if report['recommendations'] else 'None'}") | |
| return True | |
| def test_feedback_system_integration(): | |
| """Test integration with feedback system.""" | |
| print("Testing feedback system integration...") | |
| # Create feedback system with pattern recognizer | |
| feedback_system = FeedbackSystem(storage_path=".verification_data/test_pattern_integration") | |
| # Record multiple similar errors to create patterns | |
| for i in range(4): | |
| feedback_system.record_classification_error( | |
| error_type=ErrorType.WRONG_CLASSIFICATION, | |
| subcategory=ErrorSubcategory.GREEN_TO_YELLOW, | |
| expected_category="YELLOW", | |
| actual_category="GREEN", | |
| message_content=f"I feel stressed and overwhelmed {i}", | |
| reviewer_comments=f"Clear distress indicators missed {i}", | |
| confidence_level=0.85 + (i * 0.02), | |
| session_id=f"integration_session_{i}", | |
| additional_context={"scenario_type": "vague_stress"} | |
| ) | |
| # Record question issues | |
| for i in range(3): | |
| feedback_system.record_question_issue( | |
| issue_type=QuestionIssueType.INAPPROPRIATE_QUESTION, | |
| question_content=f"What's wrong with you? {i}", | |
| scenario_type=ScenarioType.VAGUE_STRESS, | |
| reviewer_comments=f"Too harsh language {i}", | |
| severity="high", | |
| session_id=f"integration_session_{i}" | |
| ) | |
| # Analyze patterns through feedback system | |
| patterns = feedback_system.analyze_error_patterns(min_frequency=2) | |
| # Verify patterns were identified | |
| assert len(patterns) > 0, "Feedback system should identify patterns" | |
| # Generate optimization report | |
| report = feedback_system.generate_optimization_report() | |
| # Verify report | |
| assert report['total_patterns'] > 0, "Should have patterns in report" | |
| assert len(report['recommendations']) > 0, "Should have recommendations" | |
| print(f"β Feedback system integration works") | |
| print(f" - Patterns identified: {len(patterns)}") | |
| print(f" - Report confidence: {report['confidence_score']:.2f}") | |
| return True | |
| def main(): | |
| """Run all pattern recognizer tests.""" | |
| print("=" * 60) | |
| print("PATTERN RECOGNIZER TESTS") | |
| print("=" * 60) | |
| tests = [ | |
| test_pattern_recognizer_initialization, | |
| test_classification_error_pattern_analysis, | |
| test_question_issue_pattern_analysis, | |
| test_comprehensive_pattern_analysis, | |
| test_optimization_report_generation, | |
| test_feedback_system_integration | |
| ] | |
| passed = 0 | |
| failed = 0 | |
| for test in tests: | |
| try: | |
| print(f"\n{test.__name__.replace('_', ' ').title()}:") | |
| print("-" * 40) | |
| result = test() | |
| if result: | |
| passed += 1 | |
| print("β PASSED") | |
| else: | |
| failed += 1 | |
| print("β FAILED") | |
| except Exception as e: | |
| failed += 1 | |
| print(f"β FAILED: {str(e)}") | |
| print("\n" + "=" * 60) | |
| print(f"RESULTS: {passed} passed, {failed} failed") | |
| print("=" * 60) | |
| if failed == 0: | |
| print("π All pattern recognizer tests passed!") | |
| print("\n**Task 4.4: Error Pattern Analysis**") | |
| print("β COMPLETED: PatternRecognizer for identifying common error types") | |
| print("β COMPLETED: Automated improvement suggestion generation") | |
| print("β COMPLETED: Feedback aggregation and reporting") | |
| print("β COMPLETED: Integration with FeedbackSystem") | |
| return True | |
| else: | |
| print("β Some tests failed. Please check the implementation.") | |
| return False | |
| if __name__ == "__main__": | |
| success = main() | |
| sys.exit(0 if success else 1) |