Spiritual_Health_Project / tests /chaplain_feedback /test_properties_classification_flow.py
DocUA's picture
Fix CSV download button for Hugging Face Spaces - use DownloadButton for direct file download
ab93d81
# test_properties_classification_flow.py
"""
Property-based tests for Classification Flow Manager.
Tests universal properties that should hold across all inputs for
RED/YELLOW/GREEN classification flows.
"""
import pytest
from hypothesis import given, strategies as st
from src.core.classification_flow_manager import ClassificationFlowManager
from src.core.content_generator import ContentGenerator
from src.core.chaplain_models import DistressIndicator
from tests.chaplain_feedback.conftest import distress_indicator_strategy
class TestClassificationFlowProperties:
"""Property-based tests for ClassificationFlowManager."""
def setup_method(self):
"""Set up test fixtures."""
self.content_generator = ContentGenerator()
self.flow_manager = ClassificationFlowManager(self.content_generator)
@given(
message=st.text(min_size=1, max_size=500),
confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5),
consent_status=st.sampled_from(["granted", "declined"])
)
def test_property_4_red_flow_displays_all_content(
self, message, confidence, indicators, consent_status
):
"""
**Feature: chaplain-feedback-system, Property 4: RED Flow Displays All Content**
**Validates: Requirements 1.5**
For any RED classification result, the UI should display all three content types:
explanation, permission check message, and referral message (if consent granted).
"""
# Execute RED flow
result = self.flow_manager.execute_red_flow(
message=message,
confidence=confidence,
indicators=indicators,
consent_status=consent_status
)
# Verify all required content is present
assert result.classification == "red"
assert result.explanation is not None and result.explanation.strip() != ""
assert result.permission_check_message is not None and result.permission_check_message.strip() != ""
assert result.consent_status == consent_status
# If consent granted, referral message should be present
if consent_status == "granted":
assert result.referral_message is not None and result.referral_message.strip() != ""
else:
# If consent declined, referral message should be None
assert result.referral_message is None
# Verify indicators are preserved
assert result.indicators == indicators
assert result.confidence == confidence
@given(
message=st.text(min_size=1, max_size=500),
confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5)
)
def test_property_5_yellow_explanation_differentiates(
self, message, confidence, indicators
):
"""
**Feature: chaplain-feedback-system, Property 5: YELLOW Explanation Differentiates**
**Validates: Requirements 2.1**
For any YELLOW classification, the explanation should contain reasoning
for why it's not RED and why it's not GREEN.
"""
# Execute YELLOW flow
result = self.flow_manager.execute_yellow_flow(
message=message,
confidence=confidence,
indicators=indicators
)
# Verify explanation differentiates from RED and GREEN
explanation = result.explanation.lower()
# Should explain why not RED
assert any(phrase in explanation for phrase in [
"why not red", "not red", "not meet the threshold",
"do not meet", "further clarification", "not severe"
]), f"Explanation should explain why not RED: {result.explanation}"
# Should explain why not GREEN
assert any(phrase in explanation for phrase in [
"why not green", "not green", "indicators", "concerns",
"warrant follow-up", "suggest possible"
]), f"Explanation should explain why not GREEN: {result.explanation}"
# Verify other YELLOW flow properties
assert result.classification == "yellow"
assert result.explanation is not None and result.explanation.strip() != ""
assert len(result.follow_up_questions) >= 2
assert len(result.follow_up_questions) <= 3
@given(
message=st.text(min_size=1, max_size=500),
confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5)
)
def test_property_6_yellow_generates_2_3_questions(
self, message, confidence, indicators
):
"""
**Feature: chaplain-feedback-system, Property 6: YELLOW Generates 2-3 Questions**
**Validates: Requirements 2.2**
For any YELLOW classification, the system should generate between 2 and 3
follow-up questions, each containing 1-2 clarifying questions.
"""
# Execute YELLOW flow
result = self.flow_manager.execute_yellow_flow(
message=message,
confidence=confidence,
indicators=indicators
)
# Verify question count
assert 2 <= len(result.follow_up_questions) <= 3, (
f"Expected 2-3 questions, got {len(result.follow_up_questions)}"
)
# Verify each question has required fields
for question in result.follow_up_questions:
assert question.question_id is not None and question.question_id.strip() != ""
assert question.question_text is not None and question.question_text.strip() != ""
assert question.purpose is not None and question.purpose.strip() != ""
# Each question should contain 1-2 clarifying questions (check for question marks)
question_marks = question.question_text.count("?")
assert 1 <= question_marks <= 2, (
f"Expected 1-2 questions per follow-up, got {question_marks} in: {question.question_text}"
)
@given(
message=st.text(min_size=1, max_size=500),
confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
indicators=st.lists(distress_indicator_strategy(), max_size=2) # GREEN should have few/no indicators
)
def test_property_9_green_explanation_generated(
self, message, confidence, indicators
):
"""
**Feature: chaplain-feedback-system, Property 9: GREEN Explanation Generated**
**Validates: Requirements 3.1, 3.2**
For any GREEN classification, an explanation should be generated explaining
why no spiritual indicators were found.
"""
# Execute GREEN flow
result = self.flow_manager.execute_green_flow(
message=message,
confidence=confidence,
indicators=indicators
)
# Verify explanation is generated
assert result.classification == "green"
assert result.explanation is not None and result.explanation.strip() != ""
# Explanation should mention no indicators or no distress
explanation = result.explanation.lower()
assert any(phrase in explanation for phrase in [
"no spiritual distress", "no indicators", "not suggest spiritual",
"no spiritual concerns", "no further steps"
]), f"GREEN explanation should mention no distress: {result.explanation}"
# Should explain why not RED or YELLOW
assert any(phrase in explanation for phrase in [
"why not red", "why not yellow", "not contain", "does not suggest"
]), f"GREEN explanation should differentiate from RED/YELLOW: {result.explanation}"
# GREEN flow should not have RED/YELLOW specific content
assert result.permission_check_message is None
assert result.referral_message is None
assert result.consent_status is None
assert len(result.follow_up_questions) == 0
assert len(result.patient_responses) == 0
assert result.re_evaluation_result is None
@given(
message=st.text(min_size=1, max_size=500),
confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5),
# Generate responses that contain escalation keywords
escalation_responses=st.lists(
st.sampled_from([
"I feel hopeless about everything",
"I feel worthless and can't go on",
"There's no point in anything anymore",
"I want to give up completely",
"This is unbearable, I can't take it"
]),
min_size=1,
max_size=3
)
)
def test_property_7_yellow_escalation_to_red(
self, message, confidence, indicators, escalation_responses
):
"""
**Feature: chaplain-feedback-system, Property 7: YELLOW Escalation to RED**
**Validates: Requirements 2.4**
For any YELLOW classification where simulated patient responses indicate distress,
the system should transition to RED FLAG flow.
"""
# Execute YELLOW flow with escalation responses
result = self.flow_manager.execute_yellow_flow(
message=message,
confidence=confidence,
indicators=indicators,
patient_responses=escalation_responses
)
# Verify escalation occurred
assert result.re_evaluation_result == "red", (
f"Expected escalation to RED, got {result.re_evaluation_result} "
f"for responses: {escalation_responses}"
)
# Test the escalation method
escalated_result = self.flow_manager.escalate_yellow_to_red(result, message)
# Verify escalated result is RED
assert escalated_result.classification == "red"
assert escalated_result.explanation is not None
assert escalated_result.permission_check_message is not None
assert escalated_result.referral_message is not None # Should have consent granted
assert escalated_result.consent_status == "granted"
@given(
message=st.text(min_size=1, max_size=500),
confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5),
# Generate responses that contain downgrade keywords
downgrade_responses=st.lists(
st.sampled_from([
"I'm feeling better now",
"Everything is okay",
"I have good support from my family",
"I'm not worried about it",
"I'm managing well",
"I feel hopeful about the future"
]),
min_size=1,
max_size=3
)
)
def test_property_8_yellow_downgrade_to_green(
self, message, confidence, indicators, downgrade_responses
):
"""
**Feature: chaplain-feedback-system, Property 8: YELLOW Downgrade to GREEN**
**Validates: Requirements 2.5**
For any YELLOW classification where simulated patient responses indicate no distress,
the system should transition to GREEN status.
"""
# Execute YELLOW flow with downgrade responses
result = self.flow_manager.execute_yellow_flow(
message=message,
confidence=confidence,
indicators=indicators,
patient_responses=downgrade_responses
)
# Verify downgrade occurred
assert result.re_evaluation_result == "green", (
f"Expected downgrade to GREEN, got {result.re_evaluation_result} "
f"for responses: {downgrade_responses}"
)
# Test the downgrade method
downgraded_result = self.flow_manager.downgrade_yellow_to_green(result, message)
# Verify downgraded result is GREEN
assert downgraded_result.classification == "green"
assert downgraded_result.explanation is not None
assert downgraded_result.permission_check_message is None
assert downgraded_result.referral_message is None
assert downgraded_result.consent_status is None
assert len(downgraded_result.follow_up_questions) == 0