Spiritual_Health_Project / tests /unit /test_feedback_system.py
DocUA's picture
feat: Complete prompt optimization system implementation
24214fc
#!/usr/bin/env python3
"""
Test script for the structured feedback system.
Tests Task 4.1 and 4.2 implementation.
"""
import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'src'))
from config.prompt_management.feedback_system import FeedbackSystem
from config.prompt_management.data_models import (
ErrorType, ErrorSubcategory, QuestionIssueType, ReferralProblemType, ScenarioType
)
def test_classification_error_recording():
"""Test recording classification errors with all required fields."""
print("Testing classification error recording...")
feedback_system = FeedbackSystem(storage_path=".verification_data/test_feedback")
# Record a classification error
error_id = feedback_system.record_classification_error(
error_type=ErrorType.WRONG_CLASSIFICATION,
subcategory=ErrorSubcategory.GREEN_TO_YELLOW,
expected_category="YELLOW",
actual_category="GREEN",
message_content="I feel a bit stressed about work lately",
reviewer_comments="Patient expressed stress but system classified as GREEN. Should be YELLOW for follow-up.",
confidence_level=0.85,
session_id="test_session_001",
additional_context={"reviewer_id": "reviewer_123", "review_date": "2024-12-18"}
)
print(f"βœ“ Recorded classification error with ID: {error_id}")
# Verify the error was stored correctly
errors = feedback_system._load_errors()
assert len(errors) >= 1, "Error should be stored"
latest_error = errors[-1]
assert latest_error['error_id'] == error_id
assert latest_error['error_type'] == 'wrong_classification'
assert latest_error['subcategory'] == 'green_to_yellow'
assert latest_error['expected_category'] == 'YELLOW'
assert latest_error['actual_category'] == 'GREEN'
assert latest_error['confidence_level'] == 0.85
print("βœ“ Classification error stored with all required fields")
return True
def test_question_issue_recording():
"""Test recording question issues."""
print("Testing question issue recording...")
feedback_system = FeedbackSystem(storage_path=".verification_data/test_feedback")
# Record a question issue
issue_id = feedback_system.record_question_issue(
issue_type=QuestionIssueType.INAPPROPRIATE_QUESTION,
question_content="Why are you feeling sad?",
scenario_type=ScenarioType.LOSS_OF_INTEREST,
reviewer_comments="Question is too direct and assumes emotional state. Should ask about impact instead.",
severity="medium",
session_id="test_session_002",
suggested_improvement="Ask: 'Is that something that's been weighing on you emotionally?'"
)
print(f"βœ“ Recorded question issue with ID: {issue_id}")
# Verify the issue was stored correctly
issues = feedback_system._load_question_issues()
assert len(issues) >= 1, "Issue should be stored"
latest_issue = issues[-1]
assert latest_issue['issue_id'] == issue_id
assert latest_issue['issue_type'] == 'inappropriate_question'
assert latest_issue['scenario_type'] == 'loss_of_interest'
assert latest_issue['severity'] == 'medium'
print("βœ“ Question issue stored with all required fields")
return True
def test_referral_problem_recording():
"""Test recording referral problems."""
print("Testing referral problem recording...")
feedback_system = FeedbackSystem(storage_path=".verification_data/test_feedback")
# Record a referral problem
problem_id = feedback_system.record_referral_problem(
problem_type=ReferralProblemType.INCOMPLETE_SUMMARY,
referral_content="Patient needs spiritual care support.",
reviewer_comments="Summary lacks specific distress indicators and conversation context.",
severity="high",
session_id="test_session_003",
missing_fields=["distress_indicators", "conversation_context", "urgency_level"]
)
print(f"βœ“ Recorded referral problem with ID: {problem_id}")
# Verify the problem was stored correctly
problems = feedback_system._load_referral_problems()
assert len(problems) >= 1, "Problem should be stored"
latest_problem = problems[-1]
assert latest_problem['problem_id'] == problem_id
assert latest_problem['problem_type'] == 'incomplete_summary'
assert latest_problem['severity'] == 'high'
assert len(latest_problem['missing_fields']) == 3
print("βœ“ Referral problem stored with all required fields")
return True
def test_error_pattern_analysis():
"""Test error pattern analysis functionality."""
print("Testing error pattern analysis...")
feedback_system = FeedbackSystem(storage_path=".verification_data/test_feedback")
# Record multiple similar errors to create a pattern
for i in range(4):
feedback_system.record_classification_error(
error_type=ErrorType.WRONG_CLASSIFICATION,
subcategory=ErrorSubcategory.GREEN_TO_YELLOW,
expected_category="YELLOW",
actual_category="GREEN",
message_content=f"Test message {i} about stress",
reviewer_comments=f"Test comment {i}",
confidence_level=0.8 + (i * 0.05),
session_id=f"pattern_test_{i}"
)
# Analyze patterns
patterns = feedback_system.analyze_error_patterns(min_frequency=3)
print(f"βœ“ Identified {len(patterns)} error patterns")
# Verify pattern structure
for pattern in patterns:
assert hasattr(pattern, 'pattern_id')
assert hasattr(pattern, 'frequency')
assert hasattr(pattern, 'suggested_improvements')
assert pattern.frequency >= 3
assert len(pattern.suggested_improvements) > 0
print(f" - Pattern: {pattern.pattern_type} (frequency: {pattern.frequency})")
for suggestion in pattern.suggested_improvements[:2]: # Show first 2 suggestions
print(f" Suggestion: {suggestion}")
return True
def test_feedback_summary():
"""Test comprehensive feedback summary generation."""
print("Testing feedback summary generation...")
feedback_system = FeedbackSystem(storage_path=".verification_data/test_feedback")
# Get comprehensive summary
summary = feedback_system.get_feedback_summary()
# Verify summary structure
required_fields = [
'total_errors', 'total_question_issues', 'total_referral_problems',
'error_types', 'error_subcategories', 'question_issue_types',
'referral_problem_types', 'average_confidence', 'recent_errors',
'improvement_suggestions'
]
for field in required_fields:
assert field in summary, f"Summary missing required field: {field}"
print("βœ“ Summary contains all required fields")
print(f" - Total errors: {summary['total_errors']}")
print(f" - Total question issues: {summary['total_question_issues']}")
print(f" - Total referral problems: {summary['total_referral_problems']}")
print(f" - Average confidence: {summary['average_confidence']:.2f}")
print(f" - Recent errors: {summary['recent_errors']}")
# Show improvement suggestions
print(" - Top improvement suggestions:")
for i, suggestion in enumerate(summary['improvement_suggestions'][:3], 1):
print(f" {i}. {suggestion}")
return True
def test_data_model_serialization():
"""Test that data models serialize and deserialize correctly."""
print("Testing data model serialization...")
from config.prompt_management.data_models import ClassificationError
from datetime import datetime
# Create a classification error
error = ClassificationError(
error_id="test_error_123",
error_type=ErrorType.SEVERITY_MISJUDGMENT,
subcategory=ErrorSubcategory.UNDERESTIMATED_DISTRESS,
expected_category="RED",
actual_category="YELLOW",
message_content="I don't think I can go on like this anymore",
reviewer_comments="Clear indication of severe distress, should be RED not YELLOW",
confidence_level=0.95,
timestamp=datetime.now(),
session_id="serialization_test",
additional_context={"test": True}
)
# Test serialization
error_dict = error.to_dict()
assert isinstance(error_dict, dict)
assert error_dict['error_id'] == "test_error_123"
assert error_dict['error_type'] == 'severity_misjudgment'
# Test deserialization
reconstructed_error = ClassificationError.from_dict(error_dict)
assert reconstructed_error.error_id == error.error_id
assert reconstructed_error.error_type == error.error_type
assert reconstructed_error.confidence_level == error.confidence_level
print("βœ“ Data model serialization works correctly")
return True
def main():
"""Run all feedback system tests."""
print("=" * 60)
print("STRUCTURED FEEDBACK SYSTEM TESTS")
print("=" * 60)
tests = [
test_classification_error_recording,
test_question_issue_recording,
test_referral_problem_recording,
test_error_pattern_analysis,
test_feedback_summary,
test_data_model_serialization
]
passed = 0
failed = 0
for test in tests:
try:
print(f"\n{test.__name__.replace('_', ' ').title()}:")
print("-" * 40)
result = test()
if result:
passed += 1
print("βœ“ PASSED")
else:
failed += 1
print("βœ— FAILED")
except Exception as e:
failed += 1
print(f"βœ— FAILED: {str(e)}")
print("\n" + "=" * 60)
print(f"RESULTS: {passed} passed, {failed} failed")
print("=" * 60)
if failed == 0:
print("πŸŽ‰ All feedback system tests passed!")
print("\n**Feature: prompt-optimization, Property 3: Structured Feedback Data Capture**")
print("βœ“ VALIDATED: Requirements 3.1, 3.2, 3.3, 3.4, 3.5")
return True
else:
print("❌ Some tests failed. Please check the implementation.")
return False
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)