#!/usr/bin/env python3 """ Test script for Context-Aware Classifier implementation. This script validates the context-aware classification functionality including: - Context-aware classification with conversation history - Defensive response pattern detection - Contextual indicator weighting - Contextual follow-up question generation - Medical context integration """ import sys import os sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'src')) from datetime import datetime, timedelta from config.prompt_management.context_aware_classifier import ContextAwareClassifier from config.prompt_management.data_models import ConversationHistory, Message, Classification def test_context_aware_classifier(): """Test the ContextAwareClassifier implementation.""" print("Testing Context-Aware Classifier...") classifier = ContextAwareClassifier() # Test 1: Basic classification without context print("\n1. Testing basic classification...") message = "I'm feeling stressed about work" empty_history = ConversationHistory( messages=[], distress_indicators_found=[], context_flags=[] ) result = classifier.classify_with_context(message, empty_history) print(f" Message: '{message}'") print(f" Classification: {result.category} (confidence: {result.confidence:.2f})") print(f" Reasoning: {result.reasoning}") assert result.category in ['GREEN', 'YELLOW', 'RED'], "Invalid category" assert 0.0 <= result.confidence <= 1.0, "Invalid confidence" print(" ✓ Basic classification works") # Test 2: Historical distress with dismissive response print("\n2. Testing historical distress with dismissive response...") history_with_distress = ConversationHistory( messages=[ Message("I'm really struggling with anxiety", "YELLOW", datetime.now() - timedelta(hours=1)), Message("I feel overwhelmed and sad", "YELLOW", datetime.now() - timedelta(minutes=30)) ], distress_indicators_found=['anxiety', 'overwhelmed', 'sad'], context_flags=['distress_expressed'] ) dismissive_message = "I'm fine now, everything is okay" result = classifier.classify_with_context(dismissive_message, history_with_distress) print(f" Message: '{dismissive_message}'") print(f" Classification: {result.category} (confidence: {result.confidence:.2f})") print(f" Context factors: {result.context_factors}") print(f" Reasoning: {result.reasoning}") # Should be YELLOW due to historical context assert result.category in ['YELLOW', 'RED'], f"Expected YELLOW/RED with historical distress, got {result.category}" assert 'historical' in result.reasoning.lower() or 'previous' in result.reasoning.lower(), "Should mention historical context" print(" ✓ Historical context influences classification") # Test 3: Defensive response detection print("\n3. Testing defensive response detection...") defensive_responses = [ "I'm fine", "Everything is okay", "No problems here", "I don't need help" ] for response in defensive_responses: is_defensive = classifier.detect_defensive_responses(response, history_with_distress) print(f" '{response}' -> Defensive: {is_defensive}") assert is_defensive == True, f"Should detect '{response}' as defensive with distress history" print(" ✓ Defensive response detection works") # Test 4: Contextual indicator weighting print("\n4. Testing contextual indicator weighting...") context_scenarios = [ {'historical_mentions': 0, 'recent_mention': False, 'conversation_length': 1}, {'historical_mentions': 3, 'recent_mention': True, 'conversation_length': 5}, {'historical_mentions': 1, 'recent_mention': False, 'conversation_length': 2} ] for i, context in enumerate(context_scenarios): weight = classifier.evaluate_contextual_indicators(['stress'], context) print(f" Scenario {i+1}: {context} -> Weight: {weight:.2f}") assert 0.0 <= weight <= 1.0, "Weight should be between 0 and 1" # Higher historical mentions should generally increase weight if context['historical_mentions'] >= 2: assert weight >= 0.5, "High historical mentions should increase weight" print(" ✓ Contextual indicator weighting works") # Test 5: Contextual follow-up generation print("\n5. Testing contextual follow-up generation...") follow_up = classifier.generate_contextual_follow_up( "I'm not sure how I feel", history_with_distress, "YELLOW" ) print(f" Follow-up question: '{follow_up}'") assert len(follow_up.strip()) > 0, "Follow-up should not be empty" assert '?' in follow_up, "Follow-up should be a question" print(" ✓ Contextual follow-up generation works") # Test 6: Medical context integration print("\n6. Testing medical context integration...") medical_history = ConversationHistory( messages=[], distress_indicators_found=[], context_flags=[], medical_context={'conditions': ['anxiety disorder'], 'medications': ['SSRI']} ) medical_message = "I'm managing my anxiety with medication but still feel stressed" result = classifier.classify_with_context(medical_message, medical_history) print(f" Message: '{medical_message}'") print(f" Classification: {result.category} (confidence: {result.confidence:.2f})") print(f" Reasoning: {result.reasoning}") # Should consider medical context assert result.category in ['YELLOW', 'RED'], "Medical context with stress should be YELLOW/RED" print(" ✓ Medical context integration works") # Test 7: Classification consistency print("\n7. Testing classification consistency...") test_messages = [ ("I feel great today", "GREEN"), ("I'm worried about my job", "YELLOW"), ("I want to end it all", "RED") ] for message, expected_category in test_messages: result = classifier.classify_with_context(message, empty_history) print(f" '{message}' -> {result.category} (expected: {expected_category})") # Allow some flexibility in classification if expected_category == "RED": assert result.category == "RED", f"RED messages should be classified as RED" # Other categories can have some variation based on context print(" ✓ Classification consistency maintained") return True def test_data_model_integration(): """Test integration with data models.""" print("\nTesting data model integration...") # Test Message serialization message = Message( content="Test message", classification="YELLOW", timestamp=datetime.now(), confidence=0.8 ) message_dict = message.to_dict() restored_message = Message.from_dict(message_dict) assert restored_message.content == message.content, "Message content should match" assert restored_message.classification == message.classification, "Classification should match" print(" ✓ Message serialization works") # Test Classification serialization classification = Classification( category="YELLOW", confidence=0.7, reasoning="Test reasoning", indicators_found=['stress'], context_factors=['historical_distress'] ) class_dict = classification.to_dict() restored_class = Classification.from_dict(class_dict) assert restored_class.category == classification.category, "Category should match" assert restored_class.confidence == classification.confidence, "Confidence should match" print(" ✓ Classification serialization works") # Test ConversationHistory serialization history = ConversationHistory( messages=[message], distress_indicators_found=['stress', 'anxiety'], context_flags=['distress_expressed'], medical_context={'conditions': ['anxiety'], 'medications': []} ) history_dict = history.to_dict() restored_history = ConversationHistory.from_dict(history_dict) assert len(restored_history.messages) == 1, "Should have one message" assert restored_history.distress_indicators_found == history.distress_indicators_found, "Indicators should match" print(" ✓ ConversationHistory serialization works") return True def main(): """Run all tests.""" print("=" * 60) print("CONTEXT-AWARE CLASSIFIER TEST SUITE") print("=" * 60) try: # Run tests test_context_aware_classifier() test_data_model_integration() print("\n" + "=" * 60) print("✅ ALL TESTS PASSED!") print("Context-Aware Classifier implementation is working correctly.") print("=" * 60) return True except Exception as e: print(f"\n❌ TEST FAILED: {e}") import traceback traceback.print_exc() return False if __name__ == "__main__": success = main() sys.exit(0 if success else 1)