Spaces:
Sleeping
Sleeping
File size: 9,965 Bytes
24214fc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 |
#!/usr/bin/env python3
"""
Test script for Context-Aware Prompt Integration.
This script validates that the updated spiritual_monitor prompt integrates
properly with the ContextAwareClassifier and maintains all functionality.
"""
import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'src'))
from datetime import datetime, timedelta
from config.prompt_management.context_aware_classifier import ContextAwareClassifier
from config.prompt_management.data_models import ConversationHistory, Message, Classification
from config.prompt_management.prompt_controller import PromptController
def test_prompt_integration():
"""Test integration between updated prompt and context-aware classifier."""
print("Testing Context-Aware Prompt Integration...")
# Test 1: Verify prompt loading
print("\n1. Testing prompt loading...")
controller = PromptController()
# Load the context-aware prompt
try:
with open('src/config/prompts/spiritual_monitor_context_aware.txt', 'r') as f:
prompt_content = f.read()
print(f" β Context-aware prompt loaded ({len(prompt_content)} characters)")
except Exception as e:
print(f" β Failed to load prompt: {e}")
return False
# Test 2: Verify prompt structure
print("\n2. Testing prompt structure...")
required_sections = [
'<system_role>',
'<shared_indicators>',
'<shared_rules>',
'<contextual_evaluation_rules>',
'<context_aware_instructions>',
'<output_format>'
]
for section in required_sections:
if section in prompt_content:
print(f" β Found {section}")
else:
print(f" β Missing {section}")
return False
# Test 3: Test classifier with context-aware scenarios
print("\n3. Testing context-aware classification scenarios...")
classifier = ContextAwareClassifier()
# Scenario 1: Historical distress with dismissive response
history = ConversationHistory(
messages=[
Message("I'm really struggling with my faith", "YELLOW", datetime.now() - timedelta(hours=1)),
Message("I feel like God has abandoned me", "RED", datetime.now() - timedelta(minutes=30))
],
distress_indicators_found=['faith_struggle', 'abandonment'],
context_flags=['spiritual_distress']
)
dismissive_message = "I'm fine now, everything is good"
result = classifier.classify_with_context(dismissive_message, history)
print(f" Scenario 1 - Historical distress + dismissive response:")
print(f" Message: '{dismissive_message}'")
print(f" Classification: {result.category} (confidence: {result.confidence:.2f})")
print(f" Context factors: {result.context_factors}")
# Should be YELLOW due to context
if result.category in ['YELLOW', 'RED']:
print(" β Correctly identified contextual concern")
else:
print(f" β Expected YELLOW/RED, got {result.category}")
return False
# Scenario 2: Escalating distress pattern
escalating_history = ConversationHistory(
messages=[
Message("I'm a bit worried about my treatment", "YELLOW", datetime.now() - timedelta(hours=2)),
Message("I'm really scared about what's happening", "YELLOW", datetime.now() - timedelta(hours=1)),
Message("I don't think I can handle this anymore", "RED", datetime.now() - timedelta(minutes=30))
],
distress_indicators_found=['worry', 'fear', 'overwhelmed'],
context_flags=['escalating_distress']
)
current_message = "I just want it all to stop"
result = classifier.classify_with_context(current_message, escalating_history)
print(f"\n Scenario 2 - Escalating distress pattern:")
print(f" Message: '{current_message}'")
print(f" Classification: {result.category} (confidence: {result.confidence:.2f})")
# Should be RED due to escalation
if result.category == 'RED':
print(" β Correctly identified escalating distress")
else:
print(f" β Expected RED, got {result.category}")
return False
# Scenario 3: Medical context integration
medical_history = ConversationHistory(
messages=[
Message("The doctor said I have depression", "YELLOW", datetime.now() - timedelta(hours=1))
],
distress_indicators_found=['depression'],
context_flags=['medical_diagnosis'],
medical_context={'conditions': ['depression'], 'medications': ['antidepressant']}
)
medical_message = "I'm trying to stay positive but it's hard"
result = classifier.classify_with_context(medical_message, medical_history)
print(f"\n Scenario 3 - Medical context integration:")
print(f" Message: '{medical_message}'")
print(f" Classification: {result.category} (confidence: {result.confidence:.2f})")
# Should consider medical context
if result.category in ['YELLOW', 'RED']:
print(" β Correctly integrated medical context")
else:
print(f" β Expected YELLOW/RED with medical context, got {result.category}")
return False
# Test 4: Follow-up question generation
print("\n4. Testing contextual follow-up generation...")
follow_up = classifier.generate_contextual_follow_up(
"I'm not sure how I feel",
history,
"YELLOW"
)
print(f" Generated follow-up: '{follow_up}'")
# Should be a question that references context
if '?' in follow_up and len(follow_up.strip()) > 0:
print(" β Generated appropriate follow-up question")
else:
print(" β Follow-up question format invalid")
return False
# Test 5: Defensive pattern detection
print("\n5. Testing defensive pattern detection...")
defensive_responses = [
"I'm fine",
"Everything is okay",
"No problems here"
]
for response in defensive_responses:
is_defensive = classifier.detect_defensive_responses(response, history)
print(f" '{response}' -> Defensive: {is_defensive}")
if not is_defensive:
print(f" β Should detect '{response}' as defensive with distress history")
return False
print(" β Defensive pattern detection working correctly")
return True
def test_prompt_consistency():
"""Test that the updated prompt maintains consistency with shared components."""
print("\nTesting prompt consistency with shared components...")
controller = PromptController()
# Test that shared indicators are accessible
indicators = controller.indicator_catalog.get_all_indicators()
print(f" Available indicators: {len(indicators)}")
# Test that shared rules are accessible
rules = controller.rules_catalog.get_all_rules()
print(f" Available rules: {len(rules)}")
# Test that templates are accessible
templates = controller.template_catalog.get_all_templates()
print(f" Available templates: {len(templates)}")
# Verify consistency
if len(indicators) > 0 and len(rules) > 0 and len(templates) > 0:
print(" β Shared components accessible and populated")
return True
else:
print(" β Shared components not properly accessible")
return False
def test_backward_compatibility():
"""Test that context-aware features don't break existing functionality."""
print("\nTesting backward compatibility...")
classifier = ContextAwareClassifier()
# Test with empty history (should work like before)
empty_history = ConversationHistory(
messages=[],
distress_indicators_found=[],
context_flags=[]
)
test_messages = [
("I feel great today", "GREEN"),
("I'm worried about my health", "YELLOW"),
("I want to end my life", "RED")
]
for message, expected_category in test_messages:
result = classifier.classify_with_context(message, empty_history)
print(f" '{message}' -> {result.category} (expected: {expected_category})")
# Allow some flexibility but check basic correctness
if expected_category == "RED" and result.category != "RED":
print(f" β Critical: RED message not classified as RED")
return False
elif expected_category == "GREEN" and result.category == "RED":
print(f" β Critical: GREEN message classified as RED")
return False
print(" β Backward compatibility maintained")
return True
def main():
"""Run all integration tests."""
print("=" * 70)
print("CONTEXT-AWARE PROMPT INTEGRATION TEST SUITE")
print("=" * 70)
try:
# Run tests
if not test_prompt_integration():
return False
if not test_prompt_consistency():
return False
if not test_backward_compatibility():
return False
print("\n" + "=" * 70)
print("β
ALL INTEGRATION TESTS PASSED!")
print("Context-aware prompt integration is working correctly.")
print("The system now supports:")
print("- Historical context consideration")
print("- Defensive response pattern detection")
print("- Medical context integration")
print("- Contextual follow-up generation")
print("- Backward compatibility with existing functionality")
print("=" * 70)
return True
except Exception as e:
print(f"\nβ INTEGRATION TEST FAILED: {e}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1) |