Spaces:
Sleeping
Sleeping
File size: 9,288 Bytes
24214fc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 |
#!/usr/bin/env python3
"""
Test script for Context-Aware Classifier implementation.
This script validates the context-aware classification functionality including:
- Context-aware classification with conversation history
- Defensive response pattern detection
- Contextual indicator weighting
- Contextual follow-up question generation
- Medical context integration
"""
import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'src'))
from datetime import datetime, timedelta
from config.prompt_management.context_aware_classifier import ContextAwareClassifier
from config.prompt_management.data_models import ConversationHistory, Message, Classification
def test_context_aware_classifier():
"""Test the ContextAwareClassifier implementation."""
print("Testing Context-Aware Classifier...")
classifier = ContextAwareClassifier()
# Test 1: Basic classification without context
print("\n1. Testing basic classification...")
message = "I'm feeling stressed about work"
empty_history = ConversationHistory(
messages=[],
distress_indicators_found=[],
context_flags=[]
)
result = classifier.classify_with_context(message, empty_history)
print(f" Message: '{message}'")
print(f" Classification: {result.category} (confidence: {result.confidence:.2f})")
print(f" Reasoning: {result.reasoning}")
assert result.category in ['GREEN', 'YELLOW', 'RED'], "Invalid category"
assert 0.0 <= result.confidence <= 1.0, "Invalid confidence"
print(" β Basic classification works")
# Test 2: Historical distress with dismissive response
print("\n2. Testing historical distress with dismissive response...")
history_with_distress = ConversationHistory(
messages=[
Message("I'm really struggling with anxiety", "YELLOW", datetime.now() - timedelta(hours=1)),
Message("I feel overwhelmed and sad", "YELLOW", datetime.now() - timedelta(minutes=30))
],
distress_indicators_found=['anxiety', 'overwhelmed', 'sad'],
context_flags=['distress_expressed']
)
dismissive_message = "I'm fine now, everything is okay"
result = classifier.classify_with_context(dismissive_message, history_with_distress)
print(f" Message: '{dismissive_message}'")
print(f" Classification: {result.category} (confidence: {result.confidence:.2f})")
print(f" Context factors: {result.context_factors}")
print(f" Reasoning: {result.reasoning}")
# Should be YELLOW due to historical context
assert result.category in ['YELLOW', 'RED'], f"Expected YELLOW/RED with historical distress, got {result.category}"
assert 'historical' in result.reasoning.lower() or 'previous' in result.reasoning.lower(), "Should mention historical context"
print(" β Historical context influences classification")
# Test 3: Defensive response detection
print("\n3. Testing defensive response detection...")
defensive_responses = [
"I'm fine",
"Everything is okay",
"No problems here",
"I don't need help"
]
for response in defensive_responses:
is_defensive = classifier.detect_defensive_responses(response, history_with_distress)
print(f" '{response}' -> Defensive: {is_defensive}")
assert is_defensive == True, f"Should detect '{response}' as defensive with distress history"
print(" β Defensive response detection works")
# Test 4: Contextual indicator weighting
print("\n4. Testing contextual indicator weighting...")
context_scenarios = [
{'historical_mentions': 0, 'recent_mention': False, 'conversation_length': 1},
{'historical_mentions': 3, 'recent_mention': True, 'conversation_length': 5},
{'historical_mentions': 1, 'recent_mention': False, 'conversation_length': 2}
]
for i, context in enumerate(context_scenarios):
weight = classifier.evaluate_contextual_indicators(['stress'], context)
print(f" Scenario {i+1}: {context} -> Weight: {weight:.2f}")
assert 0.0 <= weight <= 1.0, "Weight should be between 0 and 1"
# Higher historical mentions should generally increase weight
if context['historical_mentions'] >= 2:
assert weight >= 0.5, "High historical mentions should increase weight"
print(" β Contextual indicator weighting works")
# Test 5: Contextual follow-up generation
print("\n5. Testing contextual follow-up generation...")
follow_up = classifier.generate_contextual_follow_up(
"I'm not sure how I feel",
history_with_distress,
"YELLOW"
)
print(f" Follow-up question: '{follow_up}'")
assert len(follow_up.strip()) > 0, "Follow-up should not be empty"
assert '?' in follow_up, "Follow-up should be a question"
print(" β Contextual follow-up generation works")
# Test 6: Medical context integration
print("\n6. Testing medical context integration...")
medical_history = ConversationHistory(
messages=[],
distress_indicators_found=[],
context_flags=[],
medical_context={'conditions': ['anxiety disorder'], 'medications': ['SSRI']}
)
medical_message = "I'm managing my anxiety with medication but still feel stressed"
result = classifier.classify_with_context(medical_message, medical_history)
print(f" Message: '{medical_message}'")
print(f" Classification: {result.category} (confidence: {result.confidence:.2f})")
print(f" Reasoning: {result.reasoning}")
# Should consider medical context
assert result.category in ['YELLOW', 'RED'], "Medical context with stress should be YELLOW/RED"
print(" β Medical context integration works")
# Test 7: Classification consistency
print("\n7. Testing classification consistency...")
test_messages = [
("I feel great today", "GREEN"),
("I'm worried about my job", "YELLOW"),
("I want to end it all", "RED")
]
for message, expected_category in test_messages:
result = classifier.classify_with_context(message, empty_history)
print(f" '{message}' -> {result.category} (expected: {expected_category})")
# Allow some flexibility in classification
if expected_category == "RED":
assert result.category == "RED", f"RED messages should be classified as RED"
# Other categories can have some variation based on context
print(" β Classification consistency maintained")
return True
def test_data_model_integration():
"""Test integration with data models."""
print("\nTesting data model integration...")
# Test Message serialization
message = Message(
content="Test message",
classification="YELLOW",
timestamp=datetime.now(),
confidence=0.8
)
message_dict = message.to_dict()
restored_message = Message.from_dict(message_dict)
assert restored_message.content == message.content, "Message content should match"
assert restored_message.classification == message.classification, "Classification should match"
print(" β Message serialization works")
# Test Classification serialization
classification = Classification(
category="YELLOW",
confidence=0.7,
reasoning="Test reasoning",
indicators_found=['stress'],
context_factors=['historical_distress']
)
class_dict = classification.to_dict()
restored_class = Classification.from_dict(class_dict)
assert restored_class.category == classification.category, "Category should match"
assert restored_class.confidence == classification.confidence, "Confidence should match"
print(" β Classification serialization works")
# Test ConversationHistory serialization
history = ConversationHistory(
messages=[message],
distress_indicators_found=['stress', 'anxiety'],
context_flags=['distress_expressed'],
medical_context={'conditions': ['anxiety'], 'medications': []}
)
history_dict = history.to_dict()
restored_history = ConversationHistory.from_dict(history_dict)
assert len(restored_history.messages) == 1, "Should have one message"
assert restored_history.distress_indicators_found == history.distress_indicators_found, "Indicators should match"
print(" β ConversationHistory serialization works")
return True
def main():
"""Run all tests."""
print("=" * 60)
print("CONTEXT-AWARE CLASSIFIER TEST SUITE")
print("=" * 60)
try:
# Run tests
test_context_aware_classifier()
test_data_model_integration()
print("\n" + "=" * 60)
print("β
ALL TESTS PASSED!")
print("Context-Aware Classifier implementation is working correctly.")
print("=" * 60)
return True
except Exception as e:
print(f"\nβ TEST FAILED: {e}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1) |