# test_properties_classification_flow.py """ Property-based tests for Classification Flow Manager. Tests universal properties that should hold across all inputs for RED/YELLOW/GREEN classification flows. """ import pytest from hypothesis import given, strategies as st from src.core.classification_flow_manager import ClassificationFlowManager from src.core.content_generator import ContentGenerator from src.core.chaplain_models import DistressIndicator from tests.chaplain_feedback.conftest import distress_indicator_strategy class TestClassificationFlowProperties: """Property-based tests for ClassificationFlowManager.""" def setup_method(self): """Set up test fixtures.""" self.content_generator = ContentGenerator() self.flow_manager = ClassificationFlowManager(self.content_generator) @given( message=st.text(min_size=1, max_size=500), confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False), indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5), consent_status=st.sampled_from(["granted", "declined"]) ) def test_property_4_red_flow_displays_all_content( self, message, confidence, indicators, consent_status ): """ **Feature: chaplain-feedback-system, Property 4: RED Flow Displays All Content** **Validates: Requirements 1.5** For any RED classification result, the UI should display all three content types: explanation, permission check message, and referral message (if consent granted). """ # Execute RED flow result = self.flow_manager.execute_red_flow( message=message, confidence=confidence, indicators=indicators, consent_status=consent_status ) # Verify all required content is present assert result.classification == "red" assert result.explanation is not None and result.explanation.strip() != "" assert result.permission_check_message is not None and result.permission_check_message.strip() != "" assert result.consent_status == consent_status # If consent granted, referral message should be present if consent_status == "granted": assert result.referral_message is not None and result.referral_message.strip() != "" else: # If consent declined, referral message should be None assert result.referral_message is None # Verify indicators are preserved assert result.indicators == indicators assert result.confidence == confidence @given( message=st.text(min_size=1, max_size=500), confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False), indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5) ) def test_property_5_yellow_explanation_differentiates( self, message, confidence, indicators ): """ **Feature: chaplain-feedback-system, Property 5: YELLOW Explanation Differentiates** **Validates: Requirements 2.1** For any YELLOW classification, the explanation should contain reasoning for why it's not RED and why it's not GREEN. """ # Execute YELLOW flow result = self.flow_manager.execute_yellow_flow( message=message, confidence=confidence, indicators=indicators ) # Verify explanation differentiates from RED and GREEN explanation = result.explanation.lower() # Should explain why not RED assert any(phrase in explanation for phrase in [ "why not red", "not red", "not meet the threshold", "do not meet", "further clarification", "not severe" ]), f"Explanation should explain why not RED: {result.explanation}" # Should explain why not GREEN assert any(phrase in explanation for phrase in [ "why not green", "not green", "indicators", "concerns", "warrant follow-up", "suggest possible" ]), f"Explanation should explain why not GREEN: {result.explanation}" # Verify other YELLOW flow properties assert result.classification == "yellow" assert result.explanation is not None and result.explanation.strip() != "" assert len(result.follow_up_questions) >= 2 assert len(result.follow_up_questions) <= 3 @given( message=st.text(min_size=1, max_size=500), confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False), indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5) ) def test_property_6_yellow_generates_2_3_questions( self, message, confidence, indicators ): """ **Feature: chaplain-feedback-system, Property 6: YELLOW Generates 2-3 Questions** **Validates: Requirements 2.2** For any YELLOW classification, the system should generate between 2 and 3 follow-up questions, each containing 1-2 clarifying questions. """ # Execute YELLOW flow result = self.flow_manager.execute_yellow_flow( message=message, confidence=confidence, indicators=indicators ) # Verify question count assert 2 <= len(result.follow_up_questions) <= 3, ( f"Expected 2-3 questions, got {len(result.follow_up_questions)}" ) # Verify each question has required fields for question in result.follow_up_questions: assert question.question_id is not None and question.question_id.strip() != "" assert question.question_text is not None and question.question_text.strip() != "" assert question.purpose is not None and question.purpose.strip() != "" # Each question should contain 1-2 clarifying questions (check for question marks) question_marks = question.question_text.count("?") assert 1 <= question_marks <= 2, ( f"Expected 1-2 questions per follow-up, got {question_marks} in: {question.question_text}" ) @given( message=st.text(min_size=1, max_size=500), confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False), indicators=st.lists(distress_indicator_strategy(), max_size=2) # GREEN should have few/no indicators ) def test_property_9_green_explanation_generated( self, message, confidence, indicators ): """ **Feature: chaplain-feedback-system, Property 9: GREEN Explanation Generated** **Validates: Requirements 3.1, 3.2** For any GREEN classification, an explanation should be generated explaining why no spiritual indicators were found. """ # Execute GREEN flow result = self.flow_manager.execute_green_flow( message=message, confidence=confidence, indicators=indicators ) # Verify explanation is generated assert result.classification == "green" assert result.explanation is not None and result.explanation.strip() != "" # Explanation should mention no indicators or no distress explanation = result.explanation.lower() assert any(phrase in explanation for phrase in [ "no spiritual distress", "no indicators", "not suggest spiritual", "no spiritual concerns", "no further steps" ]), f"GREEN explanation should mention no distress: {result.explanation}" # Should explain why not RED or YELLOW assert any(phrase in explanation for phrase in [ "why not red", "why not yellow", "not contain", "does not suggest" ]), f"GREEN explanation should differentiate from RED/YELLOW: {result.explanation}" # GREEN flow should not have RED/YELLOW specific content assert result.permission_check_message is None assert result.referral_message is None assert result.consent_status is None assert len(result.follow_up_questions) == 0 assert len(result.patient_responses) == 0 assert result.re_evaluation_result is None @given( message=st.text(min_size=1, max_size=500), confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False), indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5), # Generate responses that contain escalation keywords escalation_responses=st.lists( st.sampled_from([ "I feel hopeless about everything", "I feel worthless and can't go on", "There's no point in anything anymore", "I want to give up completely", "This is unbearable, I can't take it" ]), min_size=1, max_size=3 ) ) def test_property_7_yellow_escalation_to_red( self, message, confidence, indicators, escalation_responses ): """ **Feature: chaplain-feedback-system, Property 7: YELLOW Escalation to RED** **Validates: Requirements 2.4** For any YELLOW classification where simulated patient responses indicate distress, the system should transition to RED FLAG flow. """ # Execute YELLOW flow with escalation responses result = self.flow_manager.execute_yellow_flow( message=message, confidence=confidence, indicators=indicators, patient_responses=escalation_responses ) # Verify escalation occurred assert result.re_evaluation_result == "red", ( f"Expected escalation to RED, got {result.re_evaluation_result} " f"for responses: {escalation_responses}" ) # Test the escalation method escalated_result = self.flow_manager.escalate_yellow_to_red(result, message) # Verify escalated result is RED assert escalated_result.classification == "red" assert escalated_result.explanation is not None assert escalated_result.permission_check_message is not None assert escalated_result.referral_message is not None # Should have consent granted assert escalated_result.consent_status == "granted" @given( message=st.text(min_size=1, max_size=500), confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False), indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5), # Generate responses that contain downgrade keywords downgrade_responses=st.lists( st.sampled_from([ "I'm feeling better now", "Everything is okay", "I have good support from my family", "I'm not worried about it", "I'm managing well", "I feel hopeful about the future" ]), min_size=1, max_size=3 ) ) def test_property_8_yellow_downgrade_to_green( self, message, confidence, indicators, downgrade_responses ): """ **Feature: chaplain-feedback-system, Property 8: YELLOW Downgrade to GREEN** **Validates: Requirements 2.5** For any YELLOW classification where simulated patient responses indicate no distress, the system should transition to GREEN status. """ # Execute YELLOW flow with downgrade responses result = self.flow_manager.execute_yellow_flow( message=message, confidence=confidence, indicators=indicators, patient_responses=downgrade_responses ) # Verify downgrade occurred assert result.re_evaluation_result == "green", ( f"Expected downgrade to GREEN, got {result.re_evaluation_result} " f"for responses: {downgrade_responses}" ) # Test the downgrade method downgraded_result = self.flow_manager.downgrade_yellow_to_green(result, message) # Verify downgraded result is GREEN assert downgraded_result.classification == "green" assert downgraded_result.explanation is not None assert downgraded_result.permission_check_message is None assert downgraded_result.referral_message is None assert downgraded_result.consent_status is None assert len(downgraded_result.follow_up_questions) == 0