Spaces:

DocUA
/

Spiritual_Health_Project

Sleeping

File size: 12,739 Bytes

ab93d81

# test_properties_classification_flow.py
"""
Property-based tests for Classification Flow Manager.

Tests universal properties that should hold across all inputs for
RED/YELLOW/GREEN classification flows.
"""

import pytest
from hypothesis import given, strategies as st

from src.core.classification_flow_manager import ClassificationFlowManager
from src.core.content_generator import ContentGenerator
from src.core.chaplain_models import DistressIndicator
from tests.chaplain_feedback.conftest import distress_indicator_strategy


class TestClassificationFlowProperties:
    """Property-based tests for ClassificationFlowManager."""
    
    def setup_method(self):
        """Set up test fixtures."""
        self.content_generator = ContentGenerator()
        self.flow_manager = ClassificationFlowManager(self.content_generator)
    
    @given(
        message=st.text(min_size=1, max_size=500),
        confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
        indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5),
        consent_status=st.sampled_from(["granted", "declined"])
    )
    def test_property_4_red_flow_displays_all_content(
        self, message, confidence, indicators, consent_status
    ):
        """
        **Feature: chaplain-feedback-system, Property 4: RED Flow Displays All Content**
        **Validates: Requirements 1.5**
        
        For any RED classification result, the UI should display all three content types:
        explanation, permission check message, and referral message (if consent granted).
        """
        # Execute RED flow
        result = self.flow_manager.execute_red_flow(
            message=message,
            confidence=confidence,
            indicators=indicators,
            consent_status=consent_status
        )
        
        # Verify all required content is present
        assert result.classification == "red"
        assert result.explanation is not None and result.explanation.strip() != ""
        assert result.permission_check_message is not None and result.permission_check_message.strip() != ""
        assert result.consent_status == consent_status
        
        # If consent granted, referral message should be present
        if consent_status == "granted":
            assert result.referral_message is not None and result.referral_message.strip() != ""
        else:
            # If consent declined, referral message should be None
            assert result.referral_message is None
        
        # Verify indicators are preserved
        assert result.indicators == indicators
        assert result.confidence == confidence
    
    @given(
        message=st.text(min_size=1, max_size=500),
        confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
        indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5)
    )
    def test_property_5_yellow_explanation_differentiates(
        self, message, confidence, indicators
    ):
        """
        **Feature: chaplain-feedback-system, Property 5: YELLOW Explanation Differentiates**
        **Validates: Requirements 2.1**
        
        For any YELLOW classification, the explanation should contain reasoning 
        for why it's not RED and why it's not GREEN.
        """
        # Execute YELLOW flow
        result = self.flow_manager.execute_yellow_flow(
            message=message,
            confidence=confidence,
            indicators=indicators
        )
        
        # Verify explanation differentiates from RED and GREEN
        explanation = result.explanation.lower()
        
        # Should explain why not RED
        assert any(phrase in explanation for phrase in [
            "why not red", "not red", "not meet the threshold", 
            "do not meet", "further clarification", "not severe"
        ]), f"Explanation should explain why not RED: {result.explanation}"
        
        # Should explain why not GREEN
        assert any(phrase in explanation for phrase in [
            "why not green", "not green", "indicators", "concerns", 
            "warrant follow-up", "suggest possible"
        ]), f"Explanation should explain why not GREEN: {result.explanation}"
        
        # Verify other YELLOW flow properties
        assert result.classification == "yellow"
        assert result.explanation is not None and result.explanation.strip() != ""
        assert len(result.follow_up_questions) >= 2
        assert len(result.follow_up_questions) <= 3
    
    @given(
        message=st.text(min_size=1, max_size=500),
        confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
        indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5)
    )
    def test_property_6_yellow_generates_2_3_questions(
        self, message, confidence, indicators
    ):
        """
        **Feature: chaplain-feedback-system, Property 6: YELLOW Generates 2-3 Questions**
        **Validates: Requirements 2.2**
        
        For any YELLOW classification, the system should generate between 2 and 3 
        follow-up questions, each containing 1-2 clarifying questions.
        """
        # Execute YELLOW flow
        result = self.flow_manager.execute_yellow_flow(
            message=message,
            confidence=confidence,
            indicators=indicators
        )
        
        # Verify question count
        assert 2 <= len(result.follow_up_questions) <= 3, (
            f"Expected 2-3 questions, got {len(result.follow_up_questions)}"
        )
        
        # Verify each question has required fields
        for question in result.follow_up_questions:
            assert question.question_id is not None and question.question_id.strip() != ""
            assert question.question_text is not None and question.question_text.strip() != ""
            assert question.purpose is not None and question.purpose.strip() != ""
            
            # Each question should contain 1-2 clarifying questions (check for question marks)
            question_marks = question.question_text.count("?")
            assert 1 <= question_marks <= 2, (
                f"Expected 1-2 questions per follow-up, got {question_marks} in: {question.question_text}"
            )
    
    @given(
        message=st.text(min_size=1, max_size=500),
        confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
        indicators=st.lists(distress_indicator_strategy(), max_size=2)  # GREEN should have few/no indicators
    )
    def test_property_9_green_explanation_generated(
        self, message, confidence, indicators
    ):
        """
        **Feature: chaplain-feedback-system, Property 9: GREEN Explanation Generated**
        **Validates: Requirements 3.1, 3.2**
        
        For any GREEN classification, an explanation should be generated explaining 
        why no spiritual indicators were found.
        """
        # Execute GREEN flow
        result = self.flow_manager.execute_green_flow(
            message=message,
            confidence=confidence,
            indicators=indicators
        )
        
        # Verify explanation is generated
        assert result.classification == "green"
        assert result.explanation is not None and result.explanation.strip() != ""
        
        # Explanation should mention no indicators or no distress
        explanation = result.explanation.lower()
        assert any(phrase in explanation for phrase in [
            "no spiritual distress", "no indicators", "not suggest spiritual",
            "no spiritual concerns", "no further steps"
        ]), f"GREEN explanation should mention no distress: {result.explanation}"
        
        # Should explain why not RED or YELLOW
        assert any(phrase in explanation for phrase in [
            "why not red", "why not yellow", "not contain", "does not suggest"
        ]), f"GREEN explanation should differentiate from RED/YELLOW: {result.explanation}"
        
        # GREEN flow should not have RED/YELLOW specific content
        assert result.permission_check_message is None
        assert result.referral_message is None
        assert result.consent_status is None
        assert len(result.follow_up_questions) == 0
        assert len(result.patient_responses) == 0
        assert result.re_evaluation_result is None
    
    @given(
        message=st.text(min_size=1, max_size=500),
        confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
        indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5),
        # Generate responses that contain escalation keywords
        escalation_responses=st.lists(
            st.sampled_from([
                "I feel hopeless about everything",
                "I feel worthless and can't go on",
                "There's no point in anything anymore",
                "I want to give up completely",
                "This is unbearable, I can't take it"
            ]),
            min_size=1,
            max_size=3
        )
    )
    def test_property_7_yellow_escalation_to_red(
        self, message, confidence, indicators, escalation_responses
    ):
        """
        **Feature: chaplain-feedback-system, Property 7: YELLOW Escalation to RED**
        **Validates: Requirements 2.4**
        
        For any YELLOW classification where simulated patient responses indicate distress,
        the system should transition to RED FLAG flow.
        """
        # Execute YELLOW flow with escalation responses
        result = self.flow_manager.execute_yellow_flow(
            message=message,
            confidence=confidence,
            indicators=indicators,
            patient_responses=escalation_responses
        )
        
        # Verify escalation occurred
        assert result.re_evaluation_result == "red", (
            f"Expected escalation to RED, got {result.re_evaluation_result} "
            f"for responses: {escalation_responses}"
        )
        
        # Test the escalation method
        escalated_result = self.flow_manager.escalate_yellow_to_red(result, message)
        
        # Verify escalated result is RED
        assert escalated_result.classification == "red"
        assert escalated_result.explanation is not None
        assert escalated_result.permission_check_message is not None
        assert escalated_result.referral_message is not None  # Should have consent granted
        assert escalated_result.consent_status == "granted"
    
    @given(
        message=st.text(min_size=1, max_size=500),
        confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
        indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5),
        # Generate responses that contain downgrade keywords
        downgrade_responses=st.lists(
            st.sampled_from([
                "I'm feeling better now",
                "Everything is okay",
                "I have good support from my family",
                "I'm not worried about it",
                "I'm managing well",
                "I feel hopeful about the future"
            ]),
            min_size=1,
            max_size=3
        )
    )
    def test_property_8_yellow_downgrade_to_green(
        self, message, confidence, indicators, downgrade_responses
    ):
        """
        **Feature: chaplain-feedback-system, Property 8: YELLOW Downgrade to GREEN**
        **Validates: Requirements 2.5**
        
        For any YELLOW classification where simulated patient responses indicate no distress,
        the system should transition to GREEN status.
        """
        # Execute YELLOW flow with downgrade responses
        result = self.flow_manager.execute_yellow_flow(
            message=message,
            confidence=confidence,
            indicators=indicators,
            patient_responses=downgrade_responses
        )
        
        # Verify downgrade occurred
        assert result.re_evaluation_result == "green", (
            f"Expected downgrade to GREEN, got {result.re_evaluation_result} "
            f"for responses: {downgrade_responses}"
        )
        
        # Test the downgrade method
        downgraded_result = self.flow_manager.downgrade_yellow_to_green(result, message)
        
        # Verify downgraded result is GREEN
        assert downgraded_result.classification == "green"
        assert downgraded_result.explanation is not None
        assert downgraded_result.permission_check_message is None
        assert downgraded_result.referral_message is None
        assert downgraded_result.consent_status is None
        assert len(downgraded_result.follow_up_questions) == 0