#!/usr/bin/env python3
"""
Test script for Context-Aware Prompt Integration.

This script validates that the updated spiritual_monitor prompt integrates
properly with the ContextAwareClassifier and maintains all functionality.
"""

import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'src'))

from datetime import datetime, timedelta
from config.prompt_management.context_aware_classifier import ContextAwareClassifier
from config.prompt_management.data_models import ConversationHistory, Message, Classification
from config.prompt_management.prompt_controller import PromptController


def test_prompt_integration():
    """Test integration between updated prompt and context-aware classifier."""
    print("Testing Context-Aware Prompt Integration...")
    
    # Test 1: Verify prompt loading
    print("\n1. Testing prompt loading...")
    controller = PromptController()
    
    # Load the context-aware prompt
    try:
        with open('src/config/prompts/spiritual_monitor_context_aware.txt', 'r') as f:
            prompt_content = f.read()
        print(f"   ✓ Context-aware prompt loaded ({len(prompt_content)} characters)")
    except Exception as e:
        print(f"   ❌ Failed to load prompt: {e}")
        return False
    
    # Test 2: Verify prompt structure
    print("\n2. Testing prompt structure...")
    required_sections = [
        '<system_role>',
        '<shared_indicators>',
        '<shared_rules>',
        '<contextual_evaluation_rules>',
        '<context_aware_instructions>',
        '<output_format>'
    ]
    
    for section in required_sections:
        if section in prompt_content:
            print(f"   ✓ Found {section}")
        else:
            print(f"   ❌ Missing {section}")
            return False
    
    # Test 3: Test classifier with context-aware scenarios
    print("\n3. Testing context-aware classification scenarios...")
    classifier = ContextAwareClassifier()
    
    # Scenario 1: Historical distress with dismissive response
    history = ConversationHistory(
        messages=[
            Message("I'm really struggling with my faith", "YELLOW", datetime.now() - timedelta(hours=1)),
            Message("I feel like God has abandoned me", "RED", datetime.now() - timedelta(minutes=30))
        ],
        distress_indicators_found=['faith_struggle', 'abandonment'],
        context_flags=['spiritual_distress']
    )
    
    dismissive_message = "I'm fine now, everything is good"
    result = classifier.classify_with_context(dismissive_message, history)
    
    print(f"   Scenario 1 - Historical distress + dismissive response:")
    print(f"   Message: '{dismissive_message}'")
    print(f"   Classification: {result.category} (confidence: {result.confidence:.2f})")
    print(f"   Context factors: {result.context_factors}")
    
    # Should be YELLOW due to context
    if result.category in ['YELLOW', 'RED']:
        print("   ✓ Correctly identified contextual concern")
    else:
        print(f"   ❌ Expected YELLOW/RED, got {result.category}")
        return False
    
    # Scenario 2: Escalating distress pattern
    escalating_history = ConversationHistory(
        messages=[
            Message("I'm a bit worried about my treatment", "YELLOW", datetime.now() - timedelta(hours=2)),
            Message("I'm really scared about what's happening", "YELLOW", datetime.now() - timedelta(hours=1)),
            Message("I don't think I can handle this anymore", "RED", datetime.now() - timedelta(minutes=30))
        ],
        distress_indicators_found=['worry', 'fear', 'overwhelmed'],
        context_flags=['escalating_distress']
    )
    
    current_message = "I just want it all to stop"
    result = classifier.classify_with_context(current_message, escalating_history)
    
    print(f"\n   Scenario 2 - Escalating distress pattern:")
    print(f"   Message: '{current_message}'")
    print(f"   Classification: {result.category} (confidence: {result.confidence:.2f})")
    
    # Should be RED due to escalation
    if result.category == 'RED':
        print("   ✓ Correctly identified escalating distress")
    else:
        print(f"   ❌ Expected RED, got {result.category}")
        return False
    
    # Scenario 3: Medical context integration
    medical_history = ConversationHistory(
        messages=[
            Message("The doctor said I have depression", "YELLOW", datetime.now() - timedelta(hours=1))
        ],
        distress_indicators_found=['depression'],
        context_flags=['medical_diagnosis'],
        medical_context={'conditions': ['depression'], 'medications': ['antidepressant']}
    )
    
    medical_message = "I'm trying to stay positive but it's hard"
    result = classifier.classify_with_context(medical_message, medical_history)
    
    print(f"\n   Scenario 3 - Medical context integration:")
    print(f"   Message: '{medical_message}'")
    print(f"   Classification: {result.category} (confidence: {result.confidence:.2f})")
    
    # Should consider medical context
    if result.category in ['YELLOW', 'RED']:
        print("   ✓ Correctly integrated medical context")
    else:
        print(f"   ❌ Expected YELLOW/RED with medical context, got {result.category}")
        return False
    
    # Test 4: Follow-up question generation
    print("\n4. Testing contextual follow-up generation...")
    
    follow_up = classifier.generate_contextual_follow_up(
        "I'm not sure how I feel",
        history,
        "YELLOW"
    )
    
    print(f"   Generated follow-up: '{follow_up}'")
    
    # Should be a question that references context
    if '?' in follow_up and len(follow_up.strip()) > 0:
        print("   ✓ Generated appropriate follow-up question")
    else:
        print("   ❌ Follow-up question format invalid")
        return False
    
    # Test 5: Defensive pattern detection
    print("\n5. Testing defensive pattern detection...")
    
    defensive_responses = [
        "I'm fine",
        "Everything is okay",
        "No problems here"
    ]
    
    for response in defensive_responses:
        is_defensive = classifier.detect_defensive_responses(response, history)
        print(f"   '{response}' -> Defensive: {is_defensive}")
        
        if not is_defensive:
            print(f"   ❌ Should detect '{response}' as defensive with distress history")
            return False
    
    print("   ✓ Defensive pattern detection working correctly")
    
    return True


def test_prompt_consistency():
    """Test that the updated prompt maintains consistency with shared components."""
    print("\nTesting prompt consistency with shared components...")
    
    controller = PromptController()
    
    # Test that shared indicators are accessible
    indicators = controller.indicator_catalog.get_all_indicators()
    print(f"   Available indicators: {len(indicators)}")
    
    # Test that shared rules are accessible
    rules = controller.rules_catalog.get_all_rules()
    print(f"   Available rules: {len(rules)}")
    
    # Test that templates are accessible
    templates = controller.template_catalog.get_all_templates()
    print(f"   Available templates: {len(templates)}")
    
    # Verify consistency
    if len(indicators) > 0 and len(rules) > 0 and len(templates) > 0:
        print("   ✓ Shared components accessible and populated")
        return True
    else:
        print("   ❌ Shared components not properly accessible")
        return False


def test_backward_compatibility():
    """Test that context-aware features don't break existing functionality."""
    print("\nTesting backward compatibility...")
    
    classifier = ContextAwareClassifier()
    
    # Test with empty history (should work like before)
    empty_history = ConversationHistory(
        messages=[],
        distress_indicators_found=[],
        context_flags=[]
    )
    
    test_messages = [
        ("I feel great today", "GREEN"),
        ("I'm worried about my health", "YELLOW"),
        ("I want to end my life", "RED")
    ]
    
    for message, expected_category in test_messages:
        result = classifier.classify_with_context(message, empty_history)
        print(f"   '{message}' -> {result.category} (expected: {expected_category})")
        
        # Allow some flexibility but check basic correctness
        if expected_category == "RED" and result.category != "RED":
            print(f"   ❌ Critical: RED message not classified as RED")
            return False
        elif expected_category == "GREEN" and result.category == "RED":
            print(f"   ❌ Critical: GREEN message classified as RED")
            return False
    
    print("   ✓ Backward compatibility maintained")
    return True


def main():
    """Run all integration tests."""
    print("=" * 70)
    print("CONTEXT-AWARE PROMPT INTEGRATION TEST SUITE")
    print("=" * 70)
    
    try:
        # Run tests
        if not test_prompt_integration():
            return False
        
        if not test_prompt_consistency():
            return False
        
        if not test_backward_compatibility():
            return False
        
        print("\n" + "=" * 70)
        print("✅ ALL INTEGRATION TESTS PASSED!")
        print("Context-aware prompt integration is working correctly.")
        print("The system now supports:")
        print("- Historical context consideration")
        print("- Defensive response pattern detection")
        print("- Medical context integration")
        print("- Contextual follow-up generation")
        print("- Backward compatibility with existing functionality")
        print("=" * 70)
        return True
        
    except Exception as e:
        print(f"\n❌ INTEGRATION TEST FAILED: {e}")
        import traceback
        traceback.print_exc()
        return False


if __name__ == "__main__":
    success = main()
    sys.exit(0 if success else 1)