Spaces:

DocUA
/

Spiritual_Health_Project

Sleeping

App Files Files Community

Spiritual_Health_Project / tests /chaplain_feedback /test_properties_classification_flow.py

DocUA

Fix CSV download button for Hugging Face Spaces - use DownloadButton for direct file download

ab93d81 about 1 month ago

raw

history blame contribute delete

12.7 kB

	# test_properties_classification_flow.py
	"""
	Property-based tests for Classification Flow Manager.

	Tests universal properties that should hold across all inputs for
	RED/YELLOW/GREEN classification flows.
	"""

	import pytest
	from hypothesis import given, strategies as st

	from src.core.classification_flow_manager import ClassificationFlowManager
	from src.core.content_generator import ContentGenerator
	from src.core.chaplain_models import DistressIndicator
	from tests.chaplain_feedback.conftest import distress_indicator_strategy


	class TestClassificationFlowProperties:
	"""Property-based tests for ClassificationFlowManager."""

	def setup_method(self):
	"""Set up test fixtures."""
	self.content_generator = ContentGenerator()
	self.flow_manager = ClassificationFlowManager(self.content_generator)

	@given(
	message=st.text(min_size=1, max_size=500),
	confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
	indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5),
	consent_status=st.sampled_from(["granted", "declined"])
	)
	def test_property_4_red_flow_displays_all_content(
	self, message, confidence, indicators, consent_status
	):
	"""
	Feature: chaplain-feedback-system, Property 4: RED Flow Displays All Content
	Validates: Requirements 1.5

	For any RED classification result, the UI should display all three content types:
	explanation, permission check message, and referral message (if consent granted).
	"""
	# Execute RED flow
	result = self.flow_manager.execute_red_flow(
	message=message,
	confidence=confidence,
	indicators=indicators,
	consent_status=consent_status
	)

	# Verify all required content is present
	assert result.classification == "red"
	assert result.explanation is not None and result.explanation.strip() != ""
	assert result.permission_check_message is not None and result.permission_check_message.strip() != ""
	assert result.consent_status == consent_status

	# If consent granted, referral message should be present
	if consent_status == "granted":
	assert result.referral_message is not None and result.referral_message.strip() != ""
	else:
	# If consent declined, referral message should be None
	assert result.referral_message is None

	# Verify indicators are preserved
	assert result.indicators == indicators
	assert result.confidence == confidence

	@given(
	message=st.text(min_size=1, max_size=500),
	confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
	indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5)
	)
	def test_property_5_yellow_explanation_differentiates(
	self, message, confidence, indicators
	):
	"""
	Feature: chaplain-feedback-system, Property 5: YELLOW Explanation Differentiates
	Validates: Requirements 2.1

	For any YELLOW classification, the explanation should contain reasoning
	for why it's not RED and why it's not GREEN.
	"""
	# Execute YELLOW flow
	result = self.flow_manager.execute_yellow_flow(
	message=message,
	confidence=confidence,
	indicators=indicators
	)

	# Verify explanation differentiates from RED and GREEN
	explanation = result.explanation.lower()

	# Should explain why not RED
	assert any(phrase in explanation for phrase in [
	"why not red", "not red", "not meet the threshold",
	"do not meet", "further clarification", "not severe"
	]), f"Explanation should explain why not RED: {result.explanation}"

	# Should explain why not GREEN
	assert any(phrase in explanation for phrase in [
	"why not green", "not green", "indicators", "concerns",
	"warrant follow-up", "suggest possible"
	]), f"Explanation should explain why not GREEN: {result.explanation}"

	# Verify other YELLOW flow properties
	assert result.classification == "yellow"
	assert result.explanation is not None and result.explanation.strip() != ""
	assert len(result.follow_up_questions) >= 2
	assert len(result.follow_up_questions) <= 3

	@given(
	message=st.text(min_size=1, max_size=500),
	confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
	indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5)
	)
	def test_property_6_yellow_generates_2_3_questions(
	self, message, confidence, indicators
	):
	"""
	Feature: chaplain-feedback-system, Property 6: YELLOW Generates 2-3 Questions
	Validates: Requirements 2.2

	For any YELLOW classification, the system should generate between 2 and 3
	follow-up questions, each containing 1-2 clarifying questions.
	"""
	# Execute YELLOW flow
	result = self.flow_manager.execute_yellow_flow(
	message=message,
	confidence=confidence,
	indicators=indicators
	)

	# Verify question count
	assert 2 <= len(result.follow_up_questions) <= 3, (
	f"Expected 2-3 questions, got {len(result.follow_up_questions)}"
	)

	# Verify each question has required fields
	for question in result.follow_up_questions:
	assert question.question_id is not None and question.question_id.strip() != ""
	assert question.question_text is not None and question.question_text.strip() != ""
	assert question.purpose is not None and question.purpose.strip() != ""

	# Each question should contain 1-2 clarifying questions (check for question marks)
	question_marks = question.question_text.count("?")
	assert 1 <= question_marks <= 2, (
	f"Expected 1-2 questions per follow-up, got {question_marks} in: {question.question_text}"
	)

	@given(
	message=st.text(min_size=1, max_size=500),
	confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
	indicators=st.lists(distress_indicator_strategy(), max_size=2) # GREEN should have few/no indicators
	)
	def test_property_9_green_explanation_generated(
	self, message, confidence, indicators
	):
	"""
	Feature: chaplain-feedback-system, Property 9: GREEN Explanation Generated
	Validates: Requirements 3.1, 3.2

	For any GREEN classification, an explanation should be generated explaining
	why no spiritual indicators were found.
	"""
	# Execute GREEN flow
	result = self.flow_manager.execute_green_flow(
	message=message,
	confidence=confidence,
	indicators=indicators
	)

	# Verify explanation is generated
	assert result.classification == "green"
	assert result.explanation is not None and result.explanation.strip() != ""

	# Explanation should mention no indicators or no distress
	explanation = result.explanation.lower()
	assert any(phrase in explanation for phrase in [
	"no spiritual distress", "no indicators", "not suggest spiritual",
	"no spiritual concerns", "no further steps"
	]), f"GREEN explanation should mention no distress: {result.explanation}"

	# Should explain why not RED or YELLOW
	assert any(phrase in explanation for phrase in [
	"why not red", "why not yellow", "not contain", "does not suggest"
	]), f"GREEN explanation should differentiate from RED/YELLOW: {result.explanation}"

	# GREEN flow should not have RED/YELLOW specific content
	assert result.permission_check_message is None
	assert result.referral_message is None
	assert result.consent_status is None
	assert len(result.follow_up_questions) == 0
	assert len(result.patient_responses) == 0
	assert result.re_evaluation_result is None

	@given(
	message=st.text(min_size=1, max_size=500),
	confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
	indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5),
	# Generate responses that contain escalation keywords
	escalation_responses=st.lists(
	st.sampled_from([
	"I feel hopeless about everything",
	"I feel worthless and can't go on",
	"There's no point in anything anymore",
	"I want to give up completely",
	"This is unbearable, I can't take it"
	]),
	min_size=1,
	max_size=3
	)
	)
	def test_property_7_yellow_escalation_to_red(
	self, message, confidence, indicators, escalation_responses
	):
	"""
	Feature: chaplain-feedback-system, Property 7: YELLOW Escalation to RED
	Validates: Requirements 2.4

	For any YELLOW classification where simulated patient responses indicate distress,
	the system should transition to RED FLAG flow.
	"""
	# Execute YELLOW flow with escalation responses
	result = self.flow_manager.execute_yellow_flow(
	message=message,
	confidence=confidence,
	indicators=indicators,
	patient_responses=escalation_responses
	)

	# Verify escalation occurred
	assert result.re_evaluation_result == "red", (
	f"Expected escalation to RED, got {result.re_evaluation_result} "
	f"for responses: {escalation_responses}"
	)

	# Test the escalation method
	escalated_result = self.flow_manager.escalate_yellow_to_red(result, message)

	# Verify escalated result is RED
	assert escalated_result.classification == "red"
	assert escalated_result.explanation is not None
	assert escalated_result.permission_check_message is not None
	assert escalated_result.referral_message is not None # Should have consent granted
	assert escalated_result.consent_status == "granted"

	@given(
	message=st.text(min_size=1, max_size=500),
	confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
	indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5),
	# Generate responses that contain downgrade keywords
	downgrade_responses=st.lists(
	st.sampled_from([
	"I'm feeling better now",
	"Everything is okay",
	"I have good support from my family",
	"I'm not worried about it",
	"I'm managing well",
	"I feel hopeful about the future"
	]),
	min_size=1,
	max_size=3
	)
	)
	def test_property_8_yellow_downgrade_to_green(
	self, message, confidence, indicators, downgrade_responses
	):
	"""
	Feature: chaplain-feedback-system, Property 8: YELLOW Downgrade to GREEN
	Validates: Requirements 2.5

	For any YELLOW classification where simulated patient responses indicate no distress,
	the system should transition to GREEN status.
	"""
	# Execute YELLOW flow with downgrade responses
	result = self.flow_manager.execute_yellow_flow(
	message=message,
	confidence=confidence,
	indicators=indicators,
	patient_responses=downgrade_responses
	)

	# Verify downgrade occurred
	assert result.re_evaluation_result == "green", (
	f"Expected downgrade to GREEN, got {result.re_evaluation_result} "
	f"for responses: {downgrade_responses}"
	)

	# Test the downgrade method
	downgraded_result = self.flow_manager.downgrade_yellow_to_green(result, message)

	# Verify downgraded result is GREEN
	assert downgraded_result.classification == "green"
	assert downgraded_result.explanation is not None
	assert downgraded_result.permission_check_message is None
	assert downgraded_result.referral_message is None
	assert downgraded_result.consent_status is None
	assert len(downgraded_result.follow_up_questions) == 0