Spaces:

DocUA
/

Spiritual_Health_Project

Sleeping

App Files Files Community

Spiritual_Health_Project / tests /unit /test_context_aware_classifier.py

DocUA

feat: Complete prompt optimization system implementation

24214fc 29 days ago

raw

history blame contribute delete

9.29 kB

	#!/usr/bin/env python3
	"""
	Test script for Context-Aware Classifier implementation.

	This script validates the context-aware classification functionality including:
	- Context-aware classification with conversation history
	- Defensive response pattern detection
	- Contextual indicator weighting
	- Contextual follow-up question generation
	- Medical context integration
	"""

	import sys
	import os
	sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'src'))

	from datetime import datetime, timedelta
	from config.prompt_management.context_aware_classifier import ContextAwareClassifier
	from config.prompt_management.data_models import ConversationHistory, Message, Classification


	def test_context_aware_classifier():
	"""Test the ContextAwareClassifier implementation."""
	print("Testing Context-Aware Classifier...")

	classifier = ContextAwareClassifier()

	# Test 1: Basic classification without context
	print("\n1. Testing basic classification...")
	message = "I'm feeling stressed about work"
	empty_history = ConversationHistory(
	messages=[],
	distress_indicators_found=[],
	context_flags=[]
	)

	result = classifier.classify_with_context(message, empty_history)
	print(f" Message: '{message}'")
	print(f" Classification: {result.category} (confidence: {result.confidence:.2f})")
	print(f" Reasoning: {result.reasoning}")
	assert result.category in ['GREEN', 'YELLOW', 'RED'], "Invalid category"
	assert 0.0 <= result.confidence <= 1.0, "Invalid confidence"
	print(" ✓ Basic classification works")

	# Test 2: Historical distress with dismissive response
	print("\n2. Testing historical distress with dismissive response...")
	history_with_distress = ConversationHistory(
	messages=[
	Message("I'm really struggling with anxiety", "YELLOW", datetime.now() - timedelta(hours=1)),
	Message("I feel overwhelmed and sad", "YELLOW", datetime.now() - timedelta(minutes=30))
	],
	distress_indicators_found=['anxiety', 'overwhelmed', 'sad'],
	context_flags=['distress_expressed']
	)

	dismissive_message = "I'm fine now, everything is okay"
	result = classifier.classify_with_context(dismissive_message, history_with_distress)
	print(f" Message: '{dismissive_message}'")
	print(f" Classification: {result.category} (confidence: {result.confidence:.2f})")
	print(f" Context factors: {result.context_factors}")
	print(f" Reasoning: {result.reasoning}")

	# Should be YELLOW due to historical context
	assert result.category in ['YELLOW', 'RED'], f"Expected YELLOW/RED with historical distress, got {result.category}"
	assert 'historical' in result.reasoning.lower() or 'previous' in result.reasoning.lower(), "Should mention historical context"
	print(" ✓ Historical context influences classification")

	# Test 3: Defensive response detection
	print("\n3. Testing defensive response detection...")
	defensive_responses = [
	"I'm fine",
	"Everything is okay",
	"No problems here",
	"I don't need help"
	]

	for response in defensive_responses:
	is_defensive = classifier.detect_defensive_responses(response, history_with_distress)
	print(f" '{response}' -> Defensive: {is_defensive}")
	assert is_defensive == True, f"Should detect '{response}' as defensive with distress history"

	print(" ✓ Defensive response detection works")

	# Test 4: Contextual indicator weighting
	print("\n4. Testing contextual indicator weighting...")
	context_scenarios = [
	{'historical_mentions': 0, 'recent_mention': False, 'conversation_length': 1},
	{'historical_mentions': 3, 'recent_mention': True, 'conversation_length': 5},
	{'historical_mentions': 1, 'recent_mention': False, 'conversation_length': 2}
	]

	for i, context in enumerate(context_scenarios):
	weight = classifier.evaluate_contextual_indicators(['stress'], context)
	print(f" Scenario {i+1}: {context} -> Weight: {weight:.2f}")
	assert 0.0 <= weight <= 1.0, "Weight should be between 0 and 1"

	# Higher historical mentions should generally increase weight
	if context['historical_mentions'] >= 2:
	assert weight >= 0.5, "High historical mentions should increase weight"

	print(" ✓ Contextual indicator weighting works")

	# Test 5: Contextual follow-up generation
	print("\n5. Testing contextual follow-up generation...")
	follow_up = classifier.generate_contextual_follow_up(
	"I'm not sure how I feel",
	history_with_distress,
	"YELLOW"
	)
	print(f" Follow-up question: '{follow_up}'")
	assert len(follow_up.strip()) > 0, "Follow-up should not be empty"
	assert '?' in follow_up, "Follow-up should be a question"
	print(" ✓ Contextual follow-up generation works")

	# Test 6: Medical context integration
	print("\n6. Testing medical context integration...")
	medical_history = ConversationHistory(
	messages=[],
	distress_indicators_found=[],
	context_flags=[],
	medical_context={'conditions': ['anxiety disorder'], 'medications': ['SSRI']}
	)

	medical_message = "I'm managing my anxiety with medication but still feel stressed"
	result = classifier.classify_with_context(medical_message, medical_history)
	print(f" Message: '{medical_message}'")
	print(f" Classification: {result.category} (confidence: {result.confidence:.2f})")
	print(f" Reasoning: {result.reasoning}")

	# Should consider medical context
	assert result.category in ['YELLOW', 'RED'], "Medical context with stress should be YELLOW/RED"
	print(" ✓ Medical context integration works")

	# Test 7: Classification consistency
	print("\n7. Testing classification consistency...")
	test_messages = [
	("I feel great today", "GREEN"),
	("I'm worried about my job", "YELLOW"),
	("I want to end it all", "RED")
	]

	for message, expected_category in test_messages:
	result = classifier.classify_with_context(message, empty_history)
	print(f" '{message}' -> {result.category} (expected: {expected_category})")
	# Allow some flexibility in classification
	if expected_category == "RED":
	assert result.category == "RED", f"RED messages should be classified as RED"
	# Other categories can have some variation based on context

	print(" ✓ Classification consistency maintained")

	return True


	def test_data_model_integration():
	"""Test integration with data models."""
	print("\nTesting data model integration...")

	# Test Message serialization
	message = Message(
	content="Test message",
	classification="YELLOW",
	timestamp=datetime.now(),
	confidence=0.8
	)

	message_dict = message.to_dict()
	restored_message = Message.from_dict(message_dict)

	assert restored_message.content == message.content, "Message content should match"
	assert restored_message.classification == message.classification, "Classification should match"
	print(" ✓ Message serialization works")

	# Test Classification serialization
	classification = Classification(
	category="YELLOW",
	confidence=0.7,
	reasoning="Test reasoning",
	indicators_found=['stress'],
	context_factors=['historical_distress']
	)

	class_dict = classification.to_dict()
	restored_class = Classification.from_dict(class_dict)

	assert restored_class.category == classification.category, "Category should match"
	assert restored_class.confidence == classification.confidence, "Confidence should match"
	print(" ✓ Classification serialization works")

	# Test ConversationHistory serialization
	history = ConversationHistory(
	messages=[message],
	distress_indicators_found=['stress', 'anxiety'],
	context_flags=['distress_expressed'],
	medical_context={'conditions': ['anxiety'], 'medications': []}
	)

	history_dict = history.to_dict()
	restored_history = ConversationHistory.from_dict(history_dict)

	assert len(restored_history.messages) == 1, "Should have one message"
	assert restored_history.distress_indicators_found == history.distress_indicators_found, "Indicators should match"
	print(" ✓ ConversationHistory serialization works")

	return True


	def main():
	"""Run all tests."""
	print("=" * 60)
	print("CONTEXT-AWARE CLASSIFIER TEST SUITE")
	print("=" * 60)

	try:
	# Run tests
	test_context_aware_classifier()
	test_data_model_integration()

	print("\n" + "=" * 60)
	print("✅ ALL TESTS PASSED!")
	print("Context-Aware Classifier implementation is working correctly.")
	print("=" * 60)
	return True

	except Exception as e:
	print(f"\n❌ TEST FAILED: {e}")
	import traceback
	traceback.print_exc()
	return False


	if __name__ == "__main__":
	success = main()
	sys.exit(0 if success else 1)