Spaces:
Sleeping
Sleeping
File size: 12,739 Bytes
ab93d81 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 |
# test_properties_classification_flow.py
"""
Property-based tests for Classification Flow Manager.
Tests universal properties that should hold across all inputs for
RED/YELLOW/GREEN classification flows.
"""
import pytest
from hypothesis import given, strategies as st
from src.core.classification_flow_manager import ClassificationFlowManager
from src.core.content_generator import ContentGenerator
from src.core.chaplain_models import DistressIndicator
from tests.chaplain_feedback.conftest import distress_indicator_strategy
class TestClassificationFlowProperties:
"""Property-based tests for ClassificationFlowManager."""
def setup_method(self):
"""Set up test fixtures."""
self.content_generator = ContentGenerator()
self.flow_manager = ClassificationFlowManager(self.content_generator)
@given(
message=st.text(min_size=1, max_size=500),
confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5),
consent_status=st.sampled_from(["granted", "declined"])
)
def test_property_4_red_flow_displays_all_content(
self, message, confidence, indicators, consent_status
):
"""
**Feature: chaplain-feedback-system, Property 4: RED Flow Displays All Content**
**Validates: Requirements 1.5**
For any RED classification result, the UI should display all three content types:
explanation, permission check message, and referral message (if consent granted).
"""
# Execute RED flow
result = self.flow_manager.execute_red_flow(
message=message,
confidence=confidence,
indicators=indicators,
consent_status=consent_status
)
# Verify all required content is present
assert result.classification == "red"
assert result.explanation is not None and result.explanation.strip() != ""
assert result.permission_check_message is not None and result.permission_check_message.strip() != ""
assert result.consent_status == consent_status
# If consent granted, referral message should be present
if consent_status == "granted":
assert result.referral_message is not None and result.referral_message.strip() != ""
else:
# If consent declined, referral message should be None
assert result.referral_message is None
# Verify indicators are preserved
assert result.indicators == indicators
assert result.confidence == confidence
@given(
message=st.text(min_size=1, max_size=500),
confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5)
)
def test_property_5_yellow_explanation_differentiates(
self, message, confidence, indicators
):
"""
**Feature: chaplain-feedback-system, Property 5: YELLOW Explanation Differentiates**
**Validates: Requirements 2.1**
For any YELLOW classification, the explanation should contain reasoning
for why it's not RED and why it's not GREEN.
"""
# Execute YELLOW flow
result = self.flow_manager.execute_yellow_flow(
message=message,
confidence=confidence,
indicators=indicators
)
# Verify explanation differentiates from RED and GREEN
explanation = result.explanation.lower()
# Should explain why not RED
assert any(phrase in explanation for phrase in [
"why not red", "not red", "not meet the threshold",
"do not meet", "further clarification", "not severe"
]), f"Explanation should explain why not RED: {result.explanation}"
# Should explain why not GREEN
assert any(phrase in explanation for phrase in [
"why not green", "not green", "indicators", "concerns",
"warrant follow-up", "suggest possible"
]), f"Explanation should explain why not GREEN: {result.explanation}"
# Verify other YELLOW flow properties
assert result.classification == "yellow"
assert result.explanation is not None and result.explanation.strip() != ""
assert len(result.follow_up_questions) >= 2
assert len(result.follow_up_questions) <= 3
@given(
message=st.text(min_size=1, max_size=500),
confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5)
)
def test_property_6_yellow_generates_2_3_questions(
self, message, confidence, indicators
):
"""
**Feature: chaplain-feedback-system, Property 6: YELLOW Generates 2-3 Questions**
**Validates: Requirements 2.2**
For any YELLOW classification, the system should generate between 2 and 3
follow-up questions, each containing 1-2 clarifying questions.
"""
# Execute YELLOW flow
result = self.flow_manager.execute_yellow_flow(
message=message,
confidence=confidence,
indicators=indicators
)
# Verify question count
assert 2 <= len(result.follow_up_questions) <= 3, (
f"Expected 2-3 questions, got {len(result.follow_up_questions)}"
)
# Verify each question has required fields
for question in result.follow_up_questions:
assert question.question_id is not None and question.question_id.strip() != ""
assert question.question_text is not None and question.question_text.strip() != ""
assert question.purpose is not None and question.purpose.strip() != ""
# Each question should contain 1-2 clarifying questions (check for question marks)
question_marks = question.question_text.count("?")
assert 1 <= question_marks <= 2, (
f"Expected 1-2 questions per follow-up, got {question_marks} in: {question.question_text}"
)
@given(
message=st.text(min_size=1, max_size=500),
confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
indicators=st.lists(distress_indicator_strategy(), max_size=2) # GREEN should have few/no indicators
)
def test_property_9_green_explanation_generated(
self, message, confidence, indicators
):
"""
**Feature: chaplain-feedback-system, Property 9: GREEN Explanation Generated**
**Validates: Requirements 3.1, 3.2**
For any GREEN classification, an explanation should be generated explaining
why no spiritual indicators were found.
"""
# Execute GREEN flow
result = self.flow_manager.execute_green_flow(
message=message,
confidence=confidence,
indicators=indicators
)
# Verify explanation is generated
assert result.classification == "green"
assert result.explanation is not None and result.explanation.strip() != ""
# Explanation should mention no indicators or no distress
explanation = result.explanation.lower()
assert any(phrase in explanation for phrase in [
"no spiritual distress", "no indicators", "not suggest spiritual",
"no spiritual concerns", "no further steps"
]), f"GREEN explanation should mention no distress: {result.explanation}"
# Should explain why not RED or YELLOW
assert any(phrase in explanation for phrase in [
"why not red", "why not yellow", "not contain", "does not suggest"
]), f"GREEN explanation should differentiate from RED/YELLOW: {result.explanation}"
# GREEN flow should not have RED/YELLOW specific content
assert result.permission_check_message is None
assert result.referral_message is None
assert result.consent_status is None
assert len(result.follow_up_questions) == 0
assert len(result.patient_responses) == 0
assert result.re_evaluation_result is None
@given(
message=st.text(min_size=1, max_size=500),
confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5),
# Generate responses that contain escalation keywords
escalation_responses=st.lists(
st.sampled_from([
"I feel hopeless about everything",
"I feel worthless and can't go on",
"There's no point in anything anymore",
"I want to give up completely",
"This is unbearable, I can't take it"
]),
min_size=1,
max_size=3
)
)
def test_property_7_yellow_escalation_to_red(
self, message, confidence, indicators, escalation_responses
):
"""
**Feature: chaplain-feedback-system, Property 7: YELLOW Escalation to RED**
**Validates: Requirements 2.4**
For any YELLOW classification where simulated patient responses indicate distress,
the system should transition to RED FLAG flow.
"""
# Execute YELLOW flow with escalation responses
result = self.flow_manager.execute_yellow_flow(
message=message,
confidence=confidence,
indicators=indicators,
patient_responses=escalation_responses
)
# Verify escalation occurred
assert result.re_evaluation_result == "red", (
f"Expected escalation to RED, got {result.re_evaluation_result} "
f"for responses: {escalation_responses}"
)
# Test the escalation method
escalated_result = self.flow_manager.escalate_yellow_to_red(result, message)
# Verify escalated result is RED
assert escalated_result.classification == "red"
assert escalated_result.explanation is not None
assert escalated_result.permission_check_message is not None
assert escalated_result.referral_message is not None # Should have consent granted
assert escalated_result.consent_status == "granted"
@given(
message=st.text(min_size=1, max_size=500),
confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5),
# Generate responses that contain downgrade keywords
downgrade_responses=st.lists(
st.sampled_from([
"I'm feeling better now",
"Everything is okay",
"I have good support from my family",
"I'm not worried about it",
"I'm managing well",
"I feel hopeful about the future"
]),
min_size=1,
max_size=3
)
)
def test_property_8_yellow_downgrade_to_green(
self, message, confidence, indicators, downgrade_responses
):
"""
**Feature: chaplain-feedback-system, Property 8: YELLOW Downgrade to GREEN**
**Validates: Requirements 2.5**
For any YELLOW classification where simulated patient responses indicate no distress,
the system should transition to GREEN status.
"""
# Execute YELLOW flow with downgrade responses
result = self.flow_manager.execute_yellow_flow(
message=message,
confidence=confidence,
indicators=indicators,
patient_responses=downgrade_responses
)
# Verify downgrade occurred
assert result.re_evaluation_result == "green", (
f"Expected downgrade to GREEN, got {result.re_evaluation_result} "
f"for responses: {downgrade_responses}"
)
# Test the downgrade method
downgraded_result = self.flow_manager.downgrade_yellow_to_green(result, message)
# Verify downgraded result is GREEN
assert downgraded_result.classification == "green"
assert downgraded_result.explanation is not None
assert downgraded_result.permission_check_message is None
assert downgraded_result.referral_message is None
assert downgraded_result.consent_status is None
assert len(downgraded_result.follow_up_questions) == 0 |