Spaces:

DocUA
/

Spiritual_Health_Project

Running

App Files Files Community

DocUA commited on Dec 11, 2025

Commit

ab93d81

1 Parent(s): a3934b1

Fix CSV download button for Hugging Face Spaces - use DownloadButton for direct file download

Browse files

Files changed (19) hide show

exports/verification_results_2025-12-10.csv +18 -0
src/core/chaplain_models.py +745 -0
src/core/classification_flow_manager.py +310 -0
src/core/content_generator.py +346 -0
src/core/error_pattern_analyzer.py +283 -0
src/core/interaction_logger.py +258 -0
src/core/tagging_service.py +528 -0
src/core/verification_csv_exporter.py +213 -2
src/interface/chaplain_feedback_ui.py +450 -0
src/interface/simplified_gradio_app.py +129 -19
tests/chaplain_feedback/__init__.py +2 -0
tests/chaplain_feedback/conftest.py +145 -0
tests/chaplain_feedback/test_properties_classification_flow.py +297 -0
tests/chaplain_feedback/test_properties_content_generator.py +399 -0
tests/chaplain_feedback/test_properties_csv_export.py +290 -0
tests/chaplain_feedback/test_properties_data_models.py +250 -0
tests/chaplain_feedback/test_properties_error_pattern_analyzer.py +194 -0
tests/chaplain_feedback/test_properties_interaction_logging.py +705 -0
tests/chaplain_feedback/test_properties_tagging_service.py +223 -0

exports/verification_results_2025-12-10.csv ADDED Viewed

	@@ -0,0 +1,18 @@

+VERIFICATION SUMMARY
+Total Messages,11
+Correct,10
+Incorrect,1
+Accuracy %,90.9
+Patient Message,Classifier Said,You Said,Notes,Date
+I don't see any point in living anymore. Everything feels hopeless and I can't go on.,RED,RED,,2025-12-10 18:47:25
+I've been thinking about ending it all. I have a plan and I'm ready.,RED,RED,,2025-12-10 18:47:26
+My family would be better off without me. I'm just a burden to everyone.,RED,RED,,2025-12-10 18:47:27
+I've already said goodbye to my loved ones. I'm going to do it tonight.,RED,RED,,2025-12-10 18:47:27
+Nothing matters anymore. I'm tired of fighting. I just want the pain to stop.,RED,YELLOW,івкрврврарчарвар,2025-12-10 18:47:36
+I've been researching ways to hurt myself. I think I'm ready to try.,RED,RED,,2025-12-10 18:47:38
+Everyone would be happier if I was gone. I'm a failure and a disappointment.,RED,RED,,2025-12-10 18:47:38
+I can't take this anymore. The pain is unbearable and I see no way out.,RED,RED,,2025-12-10 18:47:39
+I've written a note. I'm going to do it. Please don't try to stop me.,RED,RED,,2025-12-10 18:47:39
+I'm standing on the edge right now. I'm about to jump.,RED,RED,,2025-12-10 18:47:40
+I have the pills in my hand. This is it. Goodbye.,RED,RED,,2025-12-10 18:47:41

src/core/chaplain_models.py ADDED Viewed

	@@ -0,0 +1,745 @@

+# chaplain_models.py
+"""
+Data models for Chaplain Feedback & Tagging System.
+Defines core data structures for classification flows, tagging records,
+distress indicators, and interaction logging.
+"""
+from dataclasses import dataclass, field
+from typing import List, Optional, Dict, Any
+from datetime import datetime
+# =============================================================================
+# INDICATOR DEFINITIONS - Based on Spiritual Distress Definitions Document
+# =============================================================================
+# Mapping of all indicators from the definitions document with their categories,
+# subcategories, severity (red/yellow), and definition references.
+# RED (#ea9999): Severe distress - requires immediate attention
+# YELLOW (#ffe599): Potential distress - requires clarification
+INDICATOR_DEFINITIONS: Dict[str, Dict[str, Any]] = {
+    # Section II.A - Emotional expressions
+    "crying": {
+        "category": "Emotional",
+        "subcategory": "Crying",
+        "severity": "red",
+        "definition_reference": "II.A",
+        "description": "Crying as expression of spiritual distress"
+    },
+    "dysomnias": {
+        "category": "Emotional",
+        "subcategory": "Dysomnias/Difficulty sleeping",
+        "severity": "yellow",
+        "definition_reference": "II.A",
+        "description": "Sleep disturbances related to spiritual distress"
+    },
+    "fatigue": {
+        "category": "Emotional",
+        "subcategory": "Fatigue, emotional exhaustion",
+        "severity": "yellow",
+        "definition_reference": "II.A",
+        "description": "Fatigue and emotional exhaustion"
+    },
+    "anxiety": {
+        "category": "Emotional",
+        "subcategory": "Anxiety",
+        "severity": "yellow",
+        "definition_reference": "II.A",
+        "description": "Anxiety as expression of spiritual distress"
+    },
+    "fear": {
+        "category": "Emotional",
+        "subcategory": "Fear",
+        "severity": "yellow",
+        "definition_reference": "II.A",
+        "description": "Fear as expression of spiritual distress"
+    },
+    "anger": {
+        "category": "Emotional",
+        "subcategory": "Anger",
+        "severity": "red",
+        "definition_reference": "II.A",
+        "description": "Anger as expression of spiritual distress"
+    },
+    "depressive_symptoms": {
+        "category": "Emotional",
+        "subcategory": "Depressive symptoms",
+        "severity": "yellow",
+        "definition_reference": "II.A",
+        "description": "Depressive symptoms"
+    },
+    # Section II.B - Decreased engagement
+    "decreased_engagement": {
+        "category": "Engagement",
+        "subcategory": "Decreased engagement with hobbies",
+        "severity": "yellow",
+        "definition_reference": "II.B",
+        "description": "Decreased engagement with hobbies, creative expression, and personal interests"
+    },
+    # Section II.C - Disinterest in nature
+    "disinterest_nature": {
+        "category": "Engagement",
+        "subcategory": "Disinterest in nature",
+        "severity": "yellow",
+        "definition_reference": "II.C",
+        "description": "Disinterest in nature due to spiritual, emotional and physical limitations"
+    },
+    # Section II.D - Excessive guilt
+    "excessive_guilt": {
+        "category": "Guilt",
+        "subcategory": "Excessive guilt",
+        "severity": "red",
+        "definition_reference": "II.D",
+        "description": "Excessive guilt - existential, religious, or relational"
+    },
+    # Section II.E - Anger behaviors of spiritual nature
+    "anger_spiritual": {
+        "category": "Anger",
+        "subcategory": "Anger behaviors of a spiritual nature",
+        "severity": "red",
+        "definition_reference": "II.E",
+        "description": "Anger toward power greater than self"
+    },
+    # Section II.F - Grief types
+    "anticipatory_grieving": {
+        "category": "Grief",
+        "subcategory": "Anticipatory grieving",
+        "severity": "red",
+        "definition_reference": "II.F",
+        "description": "Emotional response to anticipated death"
+    },
+    "disenfranchised_grief": {
+        "category": "Grief",
+        "subcategory": "Disenfranchised grief",
+        "severity": "red",
+        "definition_reference": "II.F",
+        "description": "Grief unacknowledged or unsupported by society"
+    },
+    "life_review_grieving": {
+        "category": "Grief",
+        "subcategory": "Grieving in the setting of life review",
+        "severity": "yellow",
+        "definition_reference": "II.F",
+        "description": "Grieving during life review process"
+    },
+    "maladaptive_grieving": {
+        "category": "Grief",
+        "subcategory": "Maladaptive grieving",
+        "severity": "red",
+        "definition_reference": "II.F",
+        "description": "Prolonged grief disorder"
+    },
+    "complicated_grief": {
+        "category": "Grief",
+        "subcategory": "Complicated grief",
+        "severity": "red",
+        "definition_reference": "II.F",
+        "description": "Persistent, intense grief disrupting daily life"
+    },
+    "loss_loved_one": {
+        "category": "Grief",
+        "subcategory": "Loss of a loved one",
+        "severity": "red",
+        "definition_reference": "II.F",
+        "description": "Loss of family member or friend"
+    },
+    # Section II.G - Expressions of Spiritual Distress
+    "expresses_alienation": {
+        "category": "Expressions",
+        "subcategory": "Expresses alienation",
+        "severity": "yellow",
+        "definition_reference": "II.G",
+        "description": "Feeling separation, isolation, disconnection"
+    },
+    "concern_beliefs": {
+        "category": "Expressions",
+        "subcategory": "Expresses concern about beliefs",
+        "severity": "yellow",
+        "definition_reference": "II.G",
+        "description": "Questions or struggles with spiritual/religious beliefs"
+    },
+    "concern_future": {
+        "category": "Expressions",
+        "subcategory": "Expresses concern about the future",
+        "severity": "red",
+        "definition_reference": "II.G",
+        "description": "Anxious, fearful, or uncertain about what lies ahead"
+    },
+    "concern_values": {
+        "category": "Expressions",
+        "subcategory": "Expresses concern about values system",
+        "severity": "yellow",
+        "definition_reference": "II.G",
+        "description": "Conflicted about moral or ethical principles"
+    },
+    "concern_family": {
+        "category": "Expressions",
+        "subcategory": "Expresses concerns about family",
+        "severity": "yellow",
+        "definition_reference": "II.G",
+        "description": "Distressed about family well-being or relationships"
+    },
+    "feeling_emptiness": {
+        "category": "Expressions",
+        "subcategory": "Expresses feeling of emptiness",
+        "severity": "red",
+        "definition_reference": "II.G",
+        "description": "Deep inner void or lack of meaning"
+    },
+    "feeling_unloved": {
+        "category": "Expressions",
+        "subcategory": "Expresses feeling unloved",
+        "severity": "red",
+        "definition_reference": "II.G",
+        "description": "Feels unworthy of love or disconnected from caring relationships"
+    },
+    "feeling_worthless": {
+        "category": "Expressions",
+        "subcategory": "Expresses feeling worthless",
+        "severity": "red",
+        "definition_reference": "II.G",
+        "description": "Perceives themselves as having little or no value"
+    },
+    "insufficient_courage": {
+        "category": "Expressions",
+        "subcategory": "Expresses insufficient courage",
+        "severity": "yellow",
+        "definition_reference": "II.G",
+        "description": "Fear or lack of strength to face suffering"
+    },
+    "loss_confidence": {
+        "category": "Expressions",
+        "subcategory": "Expresses loss of confidence",
+        "severity": "yellow",
+        "definition_reference": "II.G",
+        "description": "Diminished trust in themselves or abilities"
+    },
+    "loss_control": {
+        "category": "Expressions",
+        "subcategory": "Expresses loss of control",
+        "severity": "yellow",
+        "definition_reference": "II.G",
+        "description": "Feels powerless over life circumstances"
+    },
+    "loss_hope": {
+        "category": "Expressions",
+        "subcategory": "Expresses loss of hope",
+        "severity": "red",
+        "definition_reference": "II.G",
+        "description": "Feels despair or believes future holds no possibility"
+    },
+    "loss_serenity": {
+        "category": "Expressions",
+        "subcategory": "Expresses loss of serenity",
+        "severity": "yellow",
+        "definition_reference": "II.G",
+        "description": "Inner turmoil, anxiety, or restlessness"
+    },
+    "need_forgiveness": {
+        "category": "Expressions",
+        "subcategory": "Expresses need for forgiveness",
+        "severity": "red",
+        "definition_reference": "II.G",
+        "description": "Feels guilt or remorse and desires reconciliation"
+    },
+    "expresses_regret": {
+        "category": "Expressions",
+        "subcategory": "Expresses regret",
+        "severity": "yellow",
+        "definition_reference": "II.G",
+        "description": "Sorrow over past actions or missed opportunities"
+    },
+    "expresses_suffering": {
+        "category": "Expressions",
+        "subcategory": "Expresses suffering",
+        "severity": "red",
+        "definition_reference": "II.G",
+        "description": "Deep physical, emotional, or spiritual pain"
+    },
+    "concern_medical_treatment": {
+        "category": "Medical",
+        "subcategory": "Expresses concern about medical treatment",
+        "severity": "red",
+        "definition_reference": "II.G",
+        "description": "Concern about treatment or medical team"
+    },
+    "unfinished_business": {
+        "category": "Expressions",
+        "subcategory": "Expresses feeling of having unfinished business",
+        "severity": "red",
+        "definition_reference": "II.G",
+        "description": "Important matters remain unresolved"
+    },
+    "desire_share_spiritual": {
+        "category": "Spiritual",
+        "subcategory": "Expresses desire to share intense spiritual experiences",
+        "severity": "yellow",
+        "definition_reference": "II.G",
+        "description": "Wants to share intense spiritual/religious experiences"
+    },
+    "inability_transcendence": {
+        "category": "Spiritual",
+        "subcategory": "Inability to experience transcendence",
+        "severity": "red",
+        "definition_reference": "II.G",
+        "description": "Cannot experience supportive forces larger than oneself"
+    },
+    "impaired_introspection": {
+        "category": "Spiritual",
+        "subcategory": "Impaired ability for introspection",
+        "severity": "yellow",
+        "definition_reference": "II.G",
+        "description": "Impaired ability for self-reflection"
+    },
+    # Section II.H - Existential questioning
+    "questioning_identity": {
+        "category": "Existential",
+        "subcategory": "Questioning one's identity",
+        "severity": "yellow",
+        "definition_reference": "II.H",
+        "description": "Confused about identity when illness takes away roles"
+    },
+    "questioning_meaning_life": {
+        "category": "Existential",
+        "subcategory": "Questioning the meaning of life",
+        "severity": "red",
+        "definition_reference": "II.H",
+        "description": "Grapples with fundamental questions about existence"
+    },
+    "questioning_meaning_suffering": {
+        "category": "Existential",
+        "subcategory": "Questioning the meaning of suffering",
+        "severity": "red",
+        "definition_reference": "II.H",
+        "description": "Struggles to understand if pain has purpose"
+    },
+    "questioning_dignity": {
+        "category": "Existential",
+        "subcategory": "Questioning one's own dignity",
+        "severity": "red",
+        "definition_reference": "II.H",
+        "description": "Questions inherent worth and value as person"
+    },
+    # Section II.I - Social isolation
+    "social_isolation": {
+        "category": "Social",
+        "subcategory": "Social isolation expressions",
+        "severity": "yellow",
+        "definition_reference": "II.I",
+        "description": "Avoids interaction, estrangement, loneliness"
+    },
+    # Section II.J - Changes in spiritual/religious practices
+    "altered_religious_ritual": {
+        "category": "Spiritual",
+        "subcategory": "Altered religious ritual",
+        "severity": "yellow",
+        "definition_reference": "II.J.a",
+        "description": "Disruption to religious practices"
+    },
+    "altered_spiritual_practice": {
+        "category": "Spiritual",
+        "subcategory": "Altered spiritual practice",
+        "severity": "yellow",
+        "definition_reference": "II.J.b",
+        "description": "Disruption to personal spiritual activities"
+    },
+    # Section II.K - Cultural conflict
+    "cultural_conflict": {
+        "category": "Cultural",
+        "subcategory": "Cultural conflict",
+        "severity": "yellow",
+        "definition_reference": "II.K",
+        "description": "Clash between cultural beliefs and healthcare culture"
+    },
+    # Section II.L - Sociocultural deprivation
+    "sociocultural_deprivation": {
+        "category": "Cultural",
+        "subcategory": "Sociocultural deprivation",
+        "severity": "yellow",
+        "definition_reference": "II.L",
+        "description": "Separated from cultural community"
+    },
+    # Section II.M - Difficulty accepting aging
+    "difficulty_accepting_aging": {
+        "category": "Aging",
+        "subcategory": "Difficulty accepting aging",
+        "severity": "yellow",
+        "definition_reference": "II.M",
+        "description": "Grief over lost abilities, resistance to mortality"
+    },
+    # Section II.N - Inadequate environmental control
+    "inadequate_environmental_control": {
+        "category": "Environment",
+        "subcategory": "Inadequate environmental control",
+        "severity": "yellow",
+        "definition_reference": "II.N",
+        "description": "Unable to shape surroundings for spiritual needs"
+    },
+    # Section II.O - Loss of independence
+    "loss_independence": {
+        "category": "Independence",
+        "subcategory": "Loss of independence",
+        "severity": "yellow",
+        "definition_reference": "II.O",
+        "description": "Dependency threatens personal and spiritual agency"
+    },
+    # Section II.P - Uncontrolled pain
+    "uncontrolled_pain": {
+        "category": "Medical",
+        "subcategory": "Uncontrolled pain",
+        "severity": "red",
+        "definition_reference": "II.P",
+        "description": "Persistent physical pain causing existential distress"
+    },
+    # Section II.Q - Spiritual pain
+    "spiritual_pain": {
+        "category": "Spiritual",
+        "subcategory": "Spiritual pain",
+        "severity": "red",
+        "definition_reference": "II.Q",
+        "description": "Soul-level suffering beyond physical symptoms"
+    },
+}
+# =============================================================================
+# DATA MODELS
+# =============================================================================
+@dataclass
+class DistressIndicator:
+    """
+    Detected distress indicator with category and severity.
+    Based on the Spiritual Distress Definitions document with color coding:
+    - RED (#ea9999): Severe distress - requires immediate attention
+    - YELLOW (#ffe599): Potential distress - requires clarification
+    """
+    indicator_text: str
+    category: str  # "Emotional", "Grief", "Existential", "Expressions", "Spiritual", "Medical", "Social", "Cultural"
+    subcategory: str  # Specific indicator name from definitions document
+    severity: str  # "red" or "yellow" - based on color coding in definitions document
+    confidence: float  # 0.0-1.0
+    definition_reference: str = ""  # Section reference (e.g., "II.D", "II.G")
+    def __post_init__(self):
+        """Validate severity value."""
+        if self.severity not in ("red", "yellow"):
+            raise ValueError(f"Severity must be 'red' or 'yellow', got '{self.severity}'")
+        if not 0.0 <= self.confidence <= 1.0:
+            raise ValueError(f"Confidence must be between 0.0 and 1.0, got {self.confidence}")
+    def to_dict(self) -> dict:
+        """Convert indicator to dictionary for serialization."""
+        return {
+            "indicator_text": self.indicator_text,
+            "category": self.category,
+            "subcategory": self.subcategory,
+            "severity": self.severity,
+            "confidence": self.confidence,
+            "definition_reference": self.definition_reference,
+        }
+    @classmethod
+    def from_dict(cls, data: dict) -> "DistressIndicator":
+        """Create indicator from dictionary."""
+        return cls(**data)
+    @classmethod
+    def from_definition(cls, indicator_key: str, indicator_text: str, confidence: float) -> "DistressIndicator":
+        """
+        Create indicator from INDICATOR_DEFINITIONS constant.
+        Args:
+            indicator_key: Key in INDICATOR_DEFINITIONS (e.g., "excessive_guilt")
+            indicator_text: The actual text that triggered this indicator
+            confidence: Confidence score 0.0-1.0
+        Returns:
+            DistressIndicator with category, subcategory, severity from definitions
+        Raises:
+            KeyError: If indicator_key not found in INDICATOR_DEFINITIONS
+        """
+        if indicator_key not in INDICATOR_DEFINITIONS:
+            raise KeyError(f"Unknown indicator key: {indicator_key}")
+        defn = INDICATOR_DEFINITIONS[indicator_key]
+        return cls(
+            indicator_text=indicator_text,
+            category=defn["category"],
+            subcategory=defn["subcategory"],
+            severity=defn["severity"],
+            confidence=confidence,
+            definition_reference=defn["definition_reference"],
+        )
+@dataclass
+class FollowUpQuestion:
+    """
+    Generated follow-up question for YELLOW cases.
+    Contains 1-2 short, sensitive clarifying questions with purpose explanation.
+    """
+    question_id: str
+    question_text: str
+    purpose: str  # Why this question is being asked
+    def to_dict(self) -> dict:
+        """Convert question to dictionary for serialization."""
+        return {
+            "question_id": self.question_id,
+            "question_text": self.question_text,
+            "purpose": self.purpose,
+        }
+    @classmethod
+    def from_dict(cls, data: dict) -> "FollowUpQuestion":
+        """Create question from dictionary."""
+        return cls(**data)
+@dataclass
+class ClassificationFlowResult:
+    """
+    Complete result of classification flow.
+    Contains all flow-specific fields for RED/YELLOW/GREEN classifications.
+    """
+    classification: str  # "red", "yellow", "green"
+    confidence: float  # 0.0-1.0
+    indicators: List[DistressIndicator] = field(default_factory=list)
+    explanation: str = ""
+    # RED-specific fields
+    permission_check_message: Optional[str] = None
+    referral_message: Optional[str] = None
+    consent_status: Optional[str] = None  # "granted", "declined", None
+    # YELLOW-specific fields
+    follow_up_questions: List[FollowUpQuestion] = field(default_factory=list)
+    patient_responses: List[str] = field(default_factory=list)
+    re_evaluation_result: Optional[str] = None  # "red", "green", None
+    def __post_init__(self):
+        """Validate classification value."""
+        if self.classification not in ("red", "yellow", "green"):
+            raise ValueError(f"Classification must be 'red', 'yellow', or 'green', got '{self.classification}'")
+        if not 0.0 <= self.confidence <= 1.0:
+            raise ValueError(f"Confidence must be between 0.0 and 1.0, got {self.confidence}")
+    def to_dict(self) -> dict:
+        """Convert result to dictionary for serialization."""
+        return {
+            "classification": self.classification,
+            "confidence": self.confidence,
+            "indicators": [i.to_dict() for i in self.indicators],
+            "explanation": self.explanation,
+            "permission_check_message": self.permission_check_message,
+            "referral_message": self.referral_message,
+            "consent_status": self.consent_status,
+            "follow_up_questions": [q.to_dict() for q in self.follow_up_questions],
+            "patient_responses": self.patient_responses,
+            "re_evaluation_result": self.re_evaluation_result,
+        }
+    @classmethod
+    def from_dict(cls, data: dict) -> "ClassificationFlowResult":
+        """Create result from dictionary."""
+        data_copy = data.copy()
+        # Convert nested indicators
+        indicators_data = data_copy.pop("indicators", [])
+        indicators = [DistressIndicator.from_dict(i) for i in indicators_data]
+        # Convert nested follow-up questions
+        questions_data = data_copy.pop("follow_up_questions", [])
+        questions = [FollowUpQuestion.from_dict(q) for q in questions_data]
+        result = cls(**data_copy)
+        result.indicators = indicators
+        result.follow_up_questions = questions
+        return result
+# Tagging category constants
+CLASSIFICATION_SUBCATEGORIES = [
+    "missed_indicators",  # Missed key distress indicators
+    "false_positive",     # Overly sensitive (false-positive flag)
+    "missed_distress",    # Not sensitive enough (missed distress)
+]
+QUESTION_ISSUE_TYPES = [
+    "inappropriate",      # Question is inappropriate or intrusive
+    "not_relevant",       # Question is not spiritually relevant
+    "too_leading",        # Question is too leading or assumptive
+    "unclear",            # Question is unclear or confusing
+    "tone_clinical",      # Tone too clinical
+    "tone_religious",     # Tone too religious
+    "tone_casual",        # Tone too casual
+]
+REFERRAL_ISSUE_TYPES = [
+    "incomplete_summary",    # Incorrect or incomplete summary
+    "misrepresentation",     # Misrepresentation of patient message
+    "inappropriate_tone",    # Tone inappropriate for spiritual care team
+]
+@dataclass
+class TaggingRecord:
+    """
+    Structured tagging feedback from chaplain.
+    Supports multi-select for question and referral issues.
+    """
+    record_id: str
+    message_id: str
+    # Classification feedback
+    is_classification_correct: bool = True
+    classification_subcategory: Optional[str] = None  # "missed_indicators", "false_positive", "missed_distress"
+    correct_classification: Optional[str] = None  # "red", "yellow", "green"
+    # Follow-up question feedback (YELLOW only)
+    question_issues: List[str] = field(default_factory=list)  # Multi-select from QUESTION_ISSUE_TYPES
+    question_comments: Optional[str] = None
+    # Referral message feedback (RED only)
+    referral_issues: List[str] = field(default_factory=list)  # Multi-select from REFERRAL_ISSUE_TYPES
+    referral_comments: Optional[str] = None
+    # Indicator feedback
+    indicator_issues: List[str] = field(default_factory=list)  # List of incorrectly identified indicator IDs
+    indicator_comments: Optional[str] = None
+    # General
+    general_notes: str = ""
+    timestamp: datetime = field(default_factory=datetime.now)
+    def __post_init__(self):
+        """Validate tagging values."""
+        if self.classification_subcategory and self.classification_subcategory not in CLASSIFICATION_SUBCATEGORIES:
+            raise ValueError(f"Invalid classification subcategory: {self.classification_subcategory}")
+        if self.correct_classification and self.correct_classification not in ("red", "yellow", "green"):
+            raise ValueError(f"Invalid correct_classification: {self.correct_classification}")
+        for issue in self.question_issues:
+            if issue not in QUESTION_ISSUE_TYPES:
+                raise ValueError(f"Invalid question issue type: {issue}")
+        for issue in self.referral_issues:
+            if issue not in REFERRAL_ISSUE_TYPES:
+                raise ValueError(f"Invalid referral issue type: {issue}")
+    def to_dict(self) -> dict:
+        """Convert record to dictionary for serialization."""
+        return {
+            "record_id": self.record_id,
+            "message_id": self.message_id,
+            "is_classification_correct": self.is_classification_correct,
+            "classification_subcategory": self.classification_subcategory,
+            "correct_classification": self.correct_classification,
+            "question_issues": self.question_issues,
+            "question_comments": self.question_comments,
+            "referral_issues": self.referral_issues,
+            "referral_comments": self.referral_comments,
+            "indicator_issues": self.indicator_issues,
+            "indicator_comments": self.indicator_comments,
+            "general_notes": self.general_notes,
+            "timestamp": self.timestamp.isoformat(),
+        }
+    @classmethod
+    def from_dict(cls, data: dict) -> "TaggingRecord":
+        """Create record from dictionary."""
+        data_copy = data.copy()
+        if isinstance(data_copy.get("timestamp"), str):
+            data_copy["timestamp"] = datetime.fromisoformat(data_copy["timestamp"])
+        return cls(**data_copy)
+# Interaction step types
+INTERACTION_STEP_TYPES = [
+    "classification",      # Initial classification
+    "explanation",         # Explanation generation
+    "permission_check",    # Patient consent request
+    "follow_up",           # Follow-up questions
+    "referral",            # Referral message generation
+    "feedback",            # Chaplain feedback
+]
+@dataclass
+class InteractionStepLog:
+    """
+    Log entry for a single interaction step.
+    Records all interaction steps with input/output for analysis.
+    """
+    step_id: str
+    session_id: str
+    message_id: str
+    step_type: str  # "classification", "explanation", "permission_check", "follow_up", "referral", "feedback"
+    input_text: str
+    model_output: str
+    approval_status: Optional[str] = None  # "approved", "disapproved", None
+    tagging_data: Optional[TaggingRecord] = None
+    timestamp: datetime = field(default_factory=datetime.now)
+    def __post_init__(self):
+        """Validate step type."""
+        if self.step_type not in INTERACTION_STEP_TYPES:
+            raise ValueError(f"Invalid step type: {self.step_type}")
+        if self.approval_status and self.approval_status not in ("approved", "disapproved"):
+            raise ValueError(f"Invalid approval status: {self.approval_status}")
+    def to_dict(self) -> dict:
+        """Convert log entry to dictionary for serialization."""
+        return {
+            "step_id": self.step_id,
+            "session_id": self.session_id,
+            "message_id": self.message_id,
+            "step_type": self.step_type,
+            "input_text": self.input_text,
+            "model_output": self.model_output,
+            "approval_status": self.approval_status,
+            "tagging_data": self.tagging_data.to_dict() if self.tagging_data else None,
+            "timestamp": self.timestamp.isoformat(),
+        }
+    @classmethod
+    def from_dict(cls, data: dict) -> "InteractionStepLog":
+        """Create log entry from dictionary."""
+        data_copy = data.copy()
+        if isinstance(data_copy.get("timestamp"), str):
+            data_copy["timestamp"] = datetime.fromisoformat(data_copy["timestamp"])
+        # Convert nested tagging data
+        tagging_data = data_copy.pop("tagging_data", None)
+        if tagging_data:
+            tagging_data = TaggingRecord.from_dict(tagging_data)
+        log = cls(**data_copy)
+        log.tagging_data = tagging_data
+        return log

src/core/classification_flow_manager.py ADDED Viewed

	@@ -0,0 +1,310 @@

+# classification_flow_manager.py
+"""
+Classification Flow Manager for Chaplain Feedback System.
+Orchestrates RED/YELLOW/GREEN classification flows and integrates with ContentGenerator
+to produce complete classification results with appropriate content.
+"""
+from typing import List, Optional
+import uuid
+from datetime import datetime
+from src.core.chaplain_models import (
+    DistressIndicator,
+    FollowUpQuestion,
+    ClassificationFlowResult,
+)
+from src.core.content_generator import ContentGenerator
+class ClassificationFlowManager:
+    """
+    Orchestrates RED/YELLOW/GREEN classification flows.
+    Integrates with ContentGenerator to produce complete classification results
+    with explanations, permission checks, referral messages, and follow-up questions.
+    """
+    def __init__(self, content_generator: Optional[ContentGenerator] = None):
+        """
+        Initialize flow manager.
+        Args:
+            content_generator: ContentGenerator instance, creates new one if None
+        """
+        self.content_generator = content_generator or ContentGenerator()
+    def execute_classification_flow(
+        self,
+        message: str,
+        classification: str,
+        confidence: float,
+        indicators: List[DistressIndicator]
+    ) -> ClassificationFlowResult:
+        """
+        Execute complete classification flow based on classification type.
+        Args:
+            message: Original patient message
+            classification: "red", "yellow", or "green"
+            confidence: Classification confidence (0.0-1.0)
+            indicators: List of detected distress indicators
+        Returns:
+            Complete ClassificationFlowResult with all generated content
+        """
+        if classification == "red":
+            return self.execute_red_flow(message, confidence, indicators)
+        elif classification == "yellow":
+            return self.execute_yellow_flow(message, confidence, indicators)
+        elif classification == "green":
+            return self.execute_green_flow(message, confidence, indicators)
+        else:
+            raise ValueError(f"Invalid classification: {classification}")
+    def execute_red_flow(
+        self,
+        message: str,
+        confidence: float,
+        indicators: List[DistressIndicator],
+        consent_status: Optional[str] = None
+    ) -> ClassificationFlowResult:
+        """
+        Execute RED flag flow.
+        Generates explanation, permission check, and referral message.
+        Handles consent granted/declined states.
+        Args:
+            message: Original patient message
+            confidence: Classification confidence
+            indicators: List of detected distress indicators
+            consent_status: "granted", "declined", or None for simulation
+        Returns:
+            ClassificationFlowResult with RED flow content
+        """
+        # Generate explanation
+        explanation = self.content_generator.generate_explanation(
+            "red", indicators, message
+        )
+        # Generate permission check message
+        permission_check = self.content_generator.generate_permission_check(indicators)
+        # Simulate consent if not provided
+        if consent_status is None:
+            # For testing/demo purposes, simulate consent as granted
+            # In real implementation, this would come from user interaction
+            consent_status = "granted"
+        # Generate referral message if consent granted
+        referral_message = None
+        if consent_status == "granted":
+            referral_message = self.content_generator.generate_referral_message(
+                message, indicators, explanation
+            )
+        return ClassificationFlowResult(
+            classification="red",
+            confidence=confidence,
+            indicators=indicators,
+            explanation=explanation,
+            permission_check_message=permission_check,
+            referral_message=referral_message,
+            consent_status=consent_status,
+        )
+    def execute_yellow_flow(
+        self,
+        message: str,
+        confidence: float,
+        indicators: List[DistressIndicator],
+        patient_responses: Optional[List[str]] = None
+    ) -> ClassificationFlowResult:
+        """
+        Execute YELLOW flag flow.
+        Generates explanation and follow-up questions.
+        Handles re-evaluation based on responses.
+        Args:
+            message: Original patient message
+            confidence: Classification confidence
+            indicators: List of detected distress indicators
+            patient_responses: Simulated patient responses to follow-up questions
+        Returns:
+            ClassificationFlowResult with YELLOW flow content
+        """
+        # Generate explanation
+        explanation = self.content_generator.generate_explanation(
+            "yellow", indicators, message
+        )
+        # Generate follow-up questions
+        follow_up_questions = self.content_generator.generate_follow_up_questions(
+            message, indicators
+        )
+        # Handle patient responses and re-evaluation
+        re_evaluation_result = None
+        if patient_responses is None:
+            # Simulate patient responses for demo/testing
+            patient_responses = self._simulate_patient_responses(follow_up_questions)
+        if patient_responses:
+            re_evaluation_result = self._evaluate_patient_responses(patient_responses)
+        return ClassificationFlowResult(
+            classification="yellow",
+            confidence=confidence,
+            indicators=indicators,
+            explanation=explanation,
+            follow_up_questions=follow_up_questions,
+            patient_responses=patient_responses,
+            re_evaluation_result=re_evaluation_result,
+        )
+    def execute_green_flow(
+        self,
+        message: str,
+        confidence: float,
+        indicators: List[DistressIndicator]
+    ) -> ClassificationFlowResult:
+        """
+        Execute GREEN flag flow.
+        Generates explanation for no indicators.
+        Displays "No further steps" status.
+        Args:
+            message: Original patient message
+            confidence: Classification confidence
+            indicators: List of detected distress indicators (should be empty)
+        Returns:
+            ClassificationFlowResult with GREEN flow content
+        """
+        # Generate explanation
+        explanation = self.content_generator.generate_explanation(
+            "green", indicators, message
+        )
+        return ClassificationFlowResult(
+            classification="green",
+            confidence=confidence,
+            indicators=indicators,
+            explanation=explanation,
+        )
+    def escalate_yellow_to_red(
+        self,
+        yellow_result: ClassificationFlowResult,
+        message: str
+    ) -> ClassificationFlowResult:
+        """
+        Escalate YELLOW classification to RED based on patient responses.
+        Args:
+            yellow_result: Original YELLOW classification result
+            message: Original patient message
+        Returns:
+            New RED ClassificationFlowResult
+        """
+        # Create new RED indicators based on escalation
+        escalated_indicators = yellow_result.indicators.copy()
+        # Execute RED flow with escalated indicators
+        return self.execute_red_flow(
+            message,
+            confidence=0.85,  # High confidence for escalated case
+            indicators=escalated_indicators,
+            consent_status="granted"  # Assume consent for escalated cases
+        )
+    def downgrade_yellow_to_green(
+        self,
+        yellow_result: ClassificationFlowResult,
+        message: str
+    ) -> ClassificationFlowResult:
+        """
+        Downgrade YELLOW classification to GREEN based on patient responses.
+        Args:
+            yellow_result: Original YELLOW classification result
+            message: Original patient message
+        Returns:
+            New GREEN ClassificationFlowResult
+        """
+        # Execute GREEN flow
+        return self.execute_green_flow(
+            message,
+            confidence=0.80,  # High confidence for downgraded case
+            indicators=[]  # No indicators for GREEN
+        )
+    def _simulate_patient_responses(
+        self,
+        questions: List[FollowUpQuestion]
+    ) -> List[str]:
+        """
+        Simulate patient responses to follow-up questions for demo/testing.
+        Args:
+            questions: List of follow-up questions
+        Returns:
+            List of simulated patient responses
+        """
+        # Simple simulation - in real implementation, these would come from user
+        responses = [
+            "I've been feeling okay, just worried about my treatment.",
+            "I have my family to talk to, but sometimes I feel alone.",
+            "I think I'd like to talk to someone from the care team."
+        ]
+        # Return responses matching the number of questions
+        return responses[:len(questions)]
+    def _evaluate_patient_responses(
+        self,
+        responses: List[str]
+    ) -> str:
+        """
+        Evaluate patient responses to determine if escalation or downgrade needed.
+        Args:
+            responses: List of patient responses
+        Returns:
+            "red" for escalation, "green" for downgrade, None for no change
+        """
+        # Simple evaluation logic for demo/testing
+        # In real implementation, this would use more sophisticated analysis
+        combined_responses = " ".join(responses).lower()
+        # Check for escalation keywords (distress indicators)
+        escalation_keywords = [
+            "hopeless", "worthless", "can't go on", "want to die",
+            "no point", "give up", "unbearable", "can't take it"
+        ]
+        if any(keyword in combined_responses for keyword in escalation_keywords):
+            return "red"
+        # Check for downgrade keywords (positive indicators)
+        downgrade_keywords = [
+            "feeling better", "okay", "fine", "good support",
+            "not worried", "managing well", "hopeful"
+        ]
+        if any(keyword in combined_responses for keyword in downgrade_keywords):
+            return "green"
+        # No clear indication - remain YELLOW
+        return None

src/core/content_generator.py ADDED Viewed

	@@ -0,0 +1,346 @@

+# content_generator.py
+"""
+Content Generation Service for Chaplain Feedback System.
+Generates explanations, permission checks, referral messages, and follow-up questions
+for RED/YELLOW/GREEN classification flows.
+"""
+from typing import List
+import uuid
+from src.core.chaplain_models import (
+    DistressIndicator,
+    FollowUpQuestion,
+)
+class ContentGenerator:
+    """
+    Generates content for classification flows.
+    Provides methods to generate:
+    - Explanations for RED/YELLOW/GREEN classifications
+    - Permission check messages for RED cases
+    - Referral messages for spiritual care team
+    - Follow-up questions for YELLOW cases
+    """
+    def generate_explanation(
+        self,
+        classification: str,
+        indicators: List[DistressIndicator],
+        message: str
+    ) -> str:
+        """
+        Generate explanation for classification.
+        Args:
+            classification: "red", "yellow", or "green"
+            indicators: List of detected distress indicators
+            message: Original patient message
+        Returns:
+            Explanation text referencing distress indicators
+        """
+        if classification == "red":
+            return self._generate_red_explanation(indicators, message)
+        elif classification == "yellow":
+            return self._generate_yellow_explanation(indicators, message)
+        else:
+            return self._generate_green_explanation(message)
+    def _generate_red_explanation(
+        self,
+        indicators: List[DistressIndicator],
+        message: str
+    ) -> str:
+        """Generate explanation for RED classification."""
+        explanation_parts = [
+            "This message has been classified as RED FLAG (severe spiritual distress) "
+            "requiring immediate attention from the spiritual care team."
+        ]
+        if indicators:
+            explanation_parts.append("\n\nDetected distress indicators:")
+            for indicator in indicators:
+                indicator_line = (
+                    f"\n- {indicator.subcategory} ({indicator.category}): "
+                    f"'{indicator.indicator_text}' "
+                    f"[Ref: {indicator.definition_reference}, Confidence: {indicator.confidence:.0%}]"
+                )
+                explanation_parts.append(indicator_line)
+        explanation_parts.append(
+            "\n\nThis classification indicates severe spiritual distress that requires "
+            "immediate referral to the spiritual health team. The indicators suggest "
+            "the patient may benefit from professional spiritual care support."
+        )
+        return "".join(explanation_parts)
+    def _generate_yellow_explanation(
+        self,
+        indicators: List[DistressIndicator],
+        message: str
+    ) -> str:
+        """Generate explanation for YELLOW classification."""
+        explanation_parts = [
+            "This message has been classified as YELLOW FLAG (potential spiritual distress) "
+            "requiring clarifying questions."
+        ]
+        if indicators:
+            explanation_parts.append("\n\nDetected potential distress indicators:")
+            for indicator in indicators:
+                indicator_line = (
+                    f"\n- {indicator.subcategory} ({indicator.category}): "
+                    f"'{indicator.indicator_text}' "
+                    f"[Ref: {indicator.definition_reference}, Confidence: {indicator.confidence:.0%}]"
+                )
+                explanation_parts.append(indicator_line)
+        # Explain why not RED
+        explanation_parts.append(
+            "\n\nWhy not RED: The indicators detected suggest potential distress but "
+            "do not meet the threshold for severe spiritual distress requiring immediate "
+            "referral. Further clarification is needed to determine the severity."
+        )
+        # Explain why not GREEN
+        explanation_parts.append(
+            "\n\nWhy not GREEN: The message contains indicators that suggest possible "
+            "spiritual concerns that warrant follow-up questions to better understand "
+            "the patient's spiritual state."
+        )
+        return "".join(explanation_parts)
+    def _generate_green_explanation(self, message: str) -> str:
+        """Generate explanation for GREEN classification."""
+        explanation_parts = [
+            "This message has been classified as GREEN (no spiritual distress indicators detected)."
+        ]
+        explanation_parts.append(
+            "\n\nNo spiritual distress indicators were found in this message. "
+            "The content does not suggest spiritual concerns that require follow-up "
+            "or referral to the spiritual care team."
+        )
+        # Explain why not RED or YELLOW
+        explanation_parts.append(
+            "\n\nWhy not RED or YELLOW: The message does not contain expressions of "
+            "spiritual distress, grief, existential questioning, or other indicators "
+            "defined in the spiritual distress definitions document."
+        )
+        explanation_parts.append("\n\nNo further steps required.")
+        return "".join(explanation_parts)
+    def generate_permission_check(
+        self,
+        indicators: List[DistressIndicator]
+    ) -> str:
+        """
+        Generate patient consent request message for RED cases.
+        Args:
+            indicators: List of detected distress indicators
+        Returns:
+            Permission check message with spiritual support and consent language
+        """
+        message_parts = [
+            "We noticed some things in your message that suggest you might be going "
+            "through a difficult time spiritually or emotionally."
+        ]
+        message_parts.append(
+            "\n\nOur hospital has a spiritual care team that provides support to "
+            "patients who are experiencing spiritual distress. They can offer "
+            "compassionate listening, spiritual guidance, and emotional support."
+        )
+        message_parts.append(
+            "\n\nWould you like us to connect you with a member of our spiritual "
+            "care team? Your consent is important to us, and this referral is "
+            "entirely voluntary."
+        )
+        message_parts.append(
+            "\n\nPlease let us know if you would like spiritual support, or if you "
+            "prefer not to be contacted by the spiritual care team at this time."
+        )
+        return "".join(message_parts)
+    def generate_referral_message(
+        self,
+        message: str,
+        indicators: List[DistressIndicator],
+        explanation: str
+    ) -> str:
+        """
+        Generate referral message for spiritual care team.
+        Args:
+            message: Original patient message
+            indicators: List of detected distress indicators
+            explanation: Classification explanation
+        Returns:
+            Referral message with background, indicators, and justification
+        """
+        referral_parts = ["SPIRITUAL CARE TEAM REFERRAL"]
+        referral_parts.append("\n" + "=" * 40)
+        # Background section
+        referral_parts.append("\n\nBACKGROUND:")
+        referral_parts.append(
+            f"\nPatient message excerpt: \"{message[:200]}{'...' if len(message) > 200 else ''}\""
+        )
+        # Indicators section
+        referral_parts.append("\n\nINDICATORS DETECTED:")
+        if indicators:
+            for indicator in indicators:
+                referral_parts.append(
+                    f"\n- {indicator.subcategory} ({indicator.category})"
+                )
+                referral_parts.append(f"\n  Severity: {indicator.severity.upper()}")
+                referral_parts.append(f"\n  Reference: {indicator.definition_reference}")
+                referral_parts.append(f"\n  Confidence: {indicator.confidence:.0%}")
+                referral_parts.append(f"\n  Text: \"{indicator.indicator_text}\"")
+        else:
+            referral_parts.append("\n- No specific indicators (general distress detected)")
+        # Justification section
+        referral_parts.append("\n\nJUSTIFICATION FOR RED FLAG:")
+        referral_parts.append(
+            "\nThis patient has been flagged for immediate spiritual care attention "
+            "based on the severity of distress indicators detected in their message. "
+        )
+        if indicators:
+            red_indicators = [i for i in indicators if i.severity == "red"]
+            if red_indicators:
+                referral_parts.append(
+                    f"\n\nThe following severe (RED) indicators were identified: "
+                    f"{', '.join(i.subcategory for i in red_indicators)}."
+                )
+        referral_parts.append(
+            "\n\nRecommended action: Please reach out to this patient at your "
+            "earliest convenience to provide spiritual support and assessment."
+        )
+        referral_parts.append("\n\n" + "=" * 40)
+        referral_parts.append("\nPatient has provided consent for this referral.")
+        return "".join(referral_parts)
+    def generate_follow_up_questions(
+        self,
+        message: str,
+        indicators: List[DistressIndicator]
+    ) -> List[FollowUpQuestion]:
+        """
+        Generate 2-3 clarifying questions for YELLOW cases.
+        Each question contains 1-2 short, sensitive clarifying questions
+        with a purpose explanation.
+        Args:
+            message: Original patient message
+            indicators: List of detected distress indicators
+        Returns:
+            List of 2-3 FollowUpQuestion instances
+        """
+        questions = []
+        # Generate questions based on indicator categories
+        categories = set(i.category for i in indicators) if indicators else set()
+        # Question 1: General well-being check
+        questions.append(FollowUpQuestion(
+            question_id=str(uuid.uuid4())[:8],
+            question_text=(
+                "How have you been feeling overall lately? "
+                "Is there anything specific that's been on your mind?"
+            ),
+            purpose=(
+                "To understand the patient's general emotional and spiritual state "
+                "and identify any underlying concerns."
+            )
+        ))
+        # Question 2: Based on detected categories or general spiritual inquiry
+        if "Grief" in categories:
+            questions.append(FollowUpQuestion(
+                question_id=str(uuid.uuid4())[:8],
+                question_text=(
+                    "It sounds like you may be dealing with some difficult feelings. "
+                    "Would you like to share more about what you're experiencing?"
+                ),
+                purpose=(
+                    "To explore potential grief-related concerns and provide "
+                    "opportunity for the patient to express their feelings."
+                )
+            ))
+        elif "Existential" in categories:
+            questions.append(FollowUpQuestion(
+                question_id=str(uuid.uuid4())[:8],
+                question_text=(
+                    "Sometimes when we're going through health challenges, we find "
+                    "ourselves thinking about bigger questions. Is that something "
+                    "you'd like to talk about?"
+                ),
+                purpose=(
+                    "To explore existential concerns and meaning-making in the "
+                    "context of the patient's health situation."
+                )
+            ))
+        elif "Spiritual" in categories:
+            questions.append(FollowUpQuestion(
+                question_id=str(uuid.uuid4())[:8],
+                question_text=(
+                    "Do you have any spiritual or religious practices that are "
+                    "important to you? How has your current situation affected them?"
+                ),
+                purpose=(
+                    "To understand the patient's spiritual background and how "
+                    "their current situation may be impacting their spiritual life."
+                )
+            ))
+        else:
+            questions.append(FollowUpQuestion(
+                question_id=str(uuid.uuid4())[:8],
+                question_text=(
+                    "Is there anything that's been particularly challenging for you "
+                    "during this time? What kind of support would be most helpful?"
+                ),
+                purpose=(
+                    "To identify specific challenges and understand what type of "
+                    "support the patient might need."
+                )
+            ))
+        # Question 3: Support and resources
+        questions.append(FollowUpQuestion(
+            question_id=str(uuid.uuid4())[:8],
+            question_text=(
+                "Do you have people in your life you can talk to about these things? "
+                "Would you be interested in speaking with someone from our care team?"
+            ),
+            purpose=(
+                "To assess the patient's support system and gauge interest in "
+                "additional spiritual care resources."
+            )
+        ))
+        # Ensure we return 2-3 questions
+        return questions[:3]

src/core/error_pattern_analyzer.py ADDED Viewed

	@@ -0,0 +1,283 @@

+# error_pattern_analyzer.py
+"""
+Error Pattern Analyzer for Chaplain Feedback System.
+Analyzes tagging records to identify error patterns, calculate subcategory
+breakdowns, and provide insights into classifier performance.
+"""
+from typing import List, Dict, Any
+from collections import Counter
+from .chaplain_models import (
+    TaggingRecord,
+    CLASSIFICATION_SUBCATEGORIES,
+    QUESTION_ISSUE_TYPES,
+    REFERRAL_ISSUE_TYPES,
+)
+class ErrorPatternAnalyzer:
+    """
+    Analyzes error patterns from tagging records.
+    Provides methods to calculate subcategory breakdowns, identify common
+    error patterns, and generate statistics for session analysis.
+    """
+    def __init__(self):
+        """Initialize the error pattern analyzer."""
+        pass
+    def analyze_classification_errors(
+        self,
+        records: List[TaggingRecord]
+    ) -> Dict[str, int]:
+        """
+        Get breakdown of classification error subcategories.
+        Counts how many times each classification error subcategory appears
+        in the provided records.
+        Args:
+            records: List of TaggingRecord instances to analyze
+        Returns:
+            Dictionary mapping subcategory names to counts
+            Example: {
+                "missed_indicators": 5,
+                "false_positive": 2,
+                "missed_distress": 3
+            }
+        """
+        subcategory_counts = {subcategory: 0 for subcategory in CLASSIFICATION_SUBCATEGORIES}
+        for record in records:
+            # Only count records where classification is incorrect
+            if not record.is_classification_correct and record.classification_subcategory:
+                subcategory = record.classification_subcategory
+                if subcategory in subcategory_counts:
+                    subcategory_counts[subcategory] += 1
+        return subcategory_counts
+    def analyze_question_issues(
+        self,
+        records: List[TaggingRecord]
+    ) -> Dict[str, int]:
+        """
+        Get breakdown of follow-up question issues by subcategory.
+        Counts how many times each question issue type appears across
+        all records (supporting multi-select).
+        Args:
+            records: List of TaggingRecord instances to analyze
+        Returns:
+            Dictionary mapping issue type names to counts
+            Example: {
+                "inappropriate": 3,
+                "not_relevant": 2,
+                "too_leading": 1,
+                "unclear": 0,
+                "tone_clinical": 2,
+                "tone_religious": 0,
+                "tone_casual": 1
+            }
+        """
+        issue_counts = {issue_type: 0 for issue_type in QUESTION_ISSUE_TYPES}
+        for record in records:
+            # Count each issue type in the multi-select list
+            for issue in record.question_issues:
+                if issue in issue_counts:
+                    issue_counts[issue] += 1
+        return issue_counts
+    def analyze_referral_issues(
+        self,
+        records: List[TaggingRecord]
+    ) -> Dict[str, int]:
+        """
+        Get breakdown of referral message issues by subcategory.
+        Counts how many times each referral issue type appears across
+        all records (supporting multi-select).
+        Args:
+            records: List of TaggingRecord instances to analyze
+        Returns:
+            Dictionary mapping issue type names to counts
+            Example: {
+                "incomplete_summary": 2,
+                "misrepresentation": 1,
+                "inappropriate_tone": 3
+            }
+        """
+        issue_counts = {issue_type: 0 for issue_type in REFERRAL_ISSUE_TYPES}
+        for record in records:
+            # Count each issue type in the multi-select list
+            for issue in record.referral_issues:
+                if issue in issue_counts:
+                    issue_counts[issue] += 1
+        return issue_counts
+    def analyze_indicator_issues(
+        self,
+        records: List[TaggingRecord]
+    ) -> Dict[str, int]:
+        """
+        Get breakdown of commonly missed/incorrectly identified indicators.
+        Counts how many times each indicator ID appears in the indicator_issues
+        lists across all records.
+        Args:
+            records: List of TaggingRecord instances to analyze
+        Returns:
+            Dictionary mapping indicator IDs to counts
+            Example: {
+                "excessive_guilt": 3,
+                "crying": 2,
+                "anxiety": 1
+            }
+        """
+        indicator_counts: Dict[str, int] = {}
+        for record in records:
+            # Count each indicator in the list
+            for indicator_id in record.indicator_issues:
+                if indicator_id not in indicator_counts:
+                    indicator_counts[indicator_id] = 0
+                indicator_counts[indicator_id] += 1
+        return indicator_counts
+    def get_common_patterns(
+        self,
+        records: List[TaggingRecord]
+    ) -> List[str]:
+        """
+        Get list of common error patterns in plain language.
+        Analyzes all error types and returns human-readable descriptions
+        of the most common patterns found in the records.
+        Args:
+            records: List of TaggingRecord instances to analyze
+        Returns:
+            List of plain-language descriptions of common patterns
+            Example: [
+                "Most common classification error: missed_indicators (5 occurrences)",
+                "Most common question issue: inappropriate (3 occurrences)",
+                "Most common referral issue: inappropriate_tone (3 occurrences)"
+            ]
+        """
+        patterns = []
+        # Analyze classification errors
+        classification_errors = self.analyze_classification_errors(records)
+        if any(classification_errors.values()):
+            max_error = max(classification_errors.items(), key=lambda x: x[1])
+            if max_error[1] > 0:
+                patterns.append(
+                    f"Most common classification error: {max_error[0]} ({max_error[1]} occurrences)"
+                )
+        # Analyze question issues
+        question_issues = self.analyze_question_issues(records)
+        if any(question_issues.values()):
+            max_issue = max(question_issues.items(), key=lambda x: x[1])
+            if max_issue[1] > 0:
+                patterns.append(
+                    f"Most common question issue: {max_issue[0]} ({max_issue[1]} occurrences)"
+                )
+        # Analyze referral issues
+        referral_issues = self.analyze_referral_issues(records)
+        if any(referral_issues.values()):
+            max_issue = max(referral_issues.items(), key=lambda x: x[1])
+            if max_issue[1] > 0:
+                patterns.append(
+                    f"Most common referral issue: {max_issue[0]} ({max_issue[1]} occurrences)"
+                )
+        # Analyze indicator issues
+        indicator_issues = self.analyze_indicator_issues(records)
+        if indicator_issues:
+            max_indicator = max(indicator_issues.items(), key=lambda x: x[1])
+            if max_indicator[1] > 0:
+                patterns.append(
+                    f"Most commonly missed/incorrect indicator: {max_indicator[0]} ({max_indicator[1]} occurrences)"
+                )
+        return patterns
+    def get_statistics_summary(
+        self,
+        records: List[TaggingRecord]
+    ) -> Dict[str, Any]:
+        """
+        Get comprehensive statistics summary for a session.
+        Combines all analysis methods into a single summary dictionary
+        suitable for display or export.
+        Args:
+            records: List of TaggingRecord instances to analyze
+        Returns:
+            Dictionary containing all statistics
+            Example: {
+                "total_records": 10,
+                "classification_errors": {...},
+                "question_issues": {...},
+                "referral_issues": {...},
+                "indicator_issues": {...},
+                "common_patterns": [...]
+            }
+        """
+        return {
+            "total_records": len(records),
+            "classification_errors": self.analyze_classification_errors(records),
+            "question_issues": self.analyze_question_issues(records),
+            "referral_issues": self.analyze_referral_issues(records),
+            "indicator_issues": self.analyze_indicator_issues(records),
+            "common_patterns": self.get_common_patterns(records),
+        }
+    def get_error_patterns_grouped_by_type(
+        self,
+        records: List[TaggingRecord]
+    ) -> Dict[str, Dict[str, int]]:
+        """
+        Get error patterns grouped by error type.
+        Returns all error types grouped together with their frequency counts,
+        suitable for display in error pattern summaries.
+        Args:
+            records: List of TaggingRecord instances to analyze
+        Returns:
+            Dictionary with error types as keys and subcategory breakdowns as values
+            Example: {
+                "classification": {"missed_indicators": 5, "false_positive": 2, ...},
+                "question": {"inappropriate": 3, "not_relevant": 2, ...},
+                "referral": {"incomplete_summary": 2, "misrepresentation": 1, ...},
+                "indicator": {"excessive_guilt": 3, "crying": 2, ...}
+            }
+        """
+        return {
+            "classification": self.analyze_classification_errors(records),
+            "question": self.analyze_question_issues(records),
+            "referral": self.analyze_referral_issues(records),
+            "indicator": self.analyze_indicator_issues(records),
+        }

src/core/interaction_logger.py ADDED Viewed

	@@ -0,0 +1,258 @@

+# interaction_logger.py
+"""
+Interaction logging service for Chaplain Feedback System.
+Logs all interaction steps with input/output and supports approval status updates.
+"""
+import uuid
+from typing import List, Optional, Dict, Any
+from datetime import datetime
+from src.core.chaplain_models import (
+    InteractionStepLog,
+    TaggingRecord,
+)
+class InteractionLogger:
+    """
+    Logs all interaction steps in the chaplain feedback system.
+    Records input/output for each step and supports updating approval status
+    with tagging data.
+    """
+    def __init__(self):
+        """Initialize the interaction logger."""
+        # In-memory storage of logs (can be extended to persist to database/file)
+        self._logs: Dict[str, InteractionStepLog] = {}
+        self._session_logs: Dict[str, List[str]] = {}  # session_id -> list of step_ids
+    def log_step(
+        self,
+        session_id: str,
+        message_id: str,
+        step_type: str,
+        input_text: str,
+        model_output: str,
+    ) -> str:
+        """
+        Log an interaction step.
+        Args:
+            session_id: ID of the verification session
+            message_id: ID of the message being processed
+            step_type: Type of step (classification, explanation, permission_check, etc.)
+            input_text: Input text for this step
+            model_output: Output from the model/system for this step
+        Returns:
+            step_id: Unique identifier for this logged step
+        Raises:
+            ValueError: If step_type is invalid
+        """
+        step_id = str(uuid.uuid4())
+        # Create log entry
+        log_entry = InteractionStepLog(
+            step_id=step_id,
+            session_id=session_id,
+            message_id=message_id,
+            step_type=step_type,
+            input_text=input_text,
+            model_output=model_output,
+            approval_status=None,
+            tagging_data=None,
+            timestamp=datetime.now(),
+        )
+        # Store log entry
+        self._logs[step_id] = log_entry
+        # Track logs by session
+        if session_id not in self._session_logs:
+            self._session_logs[session_id] = []
+        self._session_logs[session_id].append(step_id)
+        return step_id
+    def update_approval(
+        self,
+        step_id: str,
+        approval_status: str,
+        tagging_data: Optional[TaggingRecord] = None,
+    ) -> None:
+        """
+        Update a step with approval status and optional tagging data.
+        Args:
+            step_id: ID of the step to update
+            approval_status: "approved" or "disapproved"
+            tagging_data: Optional TaggingRecord with feedback details
+        Raises:
+            ValueError: If step_id not found or approval_status is invalid
+        """
+        if step_id not in self._logs:
+            raise ValueError(f"Step {step_id} not found")
+        if approval_status not in ("approved", "disapproved"):
+            raise ValueError(f"Invalid approval_status: {approval_status}")
+        log_entry = self._logs[step_id]
+        log_entry.approval_status = approval_status
+        log_entry.tagging_data = tagging_data
+    def get_step(self, step_id: str) -> Optional[InteractionStepLog]:
+        """
+        Get a specific logged step.
+        Args:
+            step_id: ID of the step to retrieve
+        Returns:
+            InteractionStepLog if found, None otherwise
+        """
+        return self._logs.get(step_id)
+    def get_session_logs(self, session_id: str) -> List[InteractionStepLog]:
+        """
+        Get all logs for a session.
+        Args:
+            session_id: ID of the session
+        Returns:
+            List of InteractionStepLog entries for the session, in order
+        """
+        step_ids = self._session_logs.get(session_id, [])
+        return [self._logs[step_id] for step_id in step_ids if step_id in self._logs]
+    def get_session_logs_by_type(
+        self,
+        session_id: str,
+        step_type: str,
+    ) -> List[InteractionStepLog]:
+        """
+        Get all logs of a specific type for a session.
+        Args:
+            session_id: ID of the session
+            step_type: Type of step to filter by
+        Returns:
+            List of InteractionStepLog entries matching the type
+        """
+        all_logs = self.get_session_logs(session_id)
+        return [log for log in all_logs if log.step_type == step_type]
+    def get_message_logs(self, message_id: str) -> List[InteractionStepLog]:
+        """
+        Get all logs for a specific message across all sessions.
+        Args:
+            message_id: ID of the message
+        Returns:
+            List of InteractionStepLog entries for the message
+        """
+        return [log for log in self._logs.values() if log.message_id == message_id]
+    def get_unapproved_steps(self, session_id: str) -> List[InteractionStepLog]:
+        """
+        Get all steps in a session that haven't been approved/disapproved yet.
+        Args:
+            session_id: ID of the session
+        Returns:
+            List of InteractionStepLog entries with no approval status
+        """
+        session_logs = self.get_session_logs(session_id)
+        return [log for log in session_logs if log.approval_status is None]
+    def get_disapproved_steps(self, session_id: str) -> List[InteractionStepLog]:
+        """
+        Get all disapproved steps in a session.
+        Args:
+            session_id: ID of the session
+        Returns:
+            List of disapproved InteractionStepLog entries
+        """
+        session_logs = self.get_session_logs(session_id)
+        return [log for log in session_logs if log.approval_status == "disapproved"]
+    def get_session_statistics(self, session_id: str) -> Dict[str, Any]:
+        """
+        Get statistics for a session's interaction logs.
+        Args:
+            session_id: ID of the session
+        Returns:
+            Dictionary with statistics about the session's interactions
+        """
+        session_logs = self.get_session_logs(session_id)
+        if not session_logs:
+            return {
+                "session_id": session_id,
+                "total_steps": 0,
+                "approved_steps": 0,
+                "disapproved_steps": 0,
+                "unapproved_steps": 0,
+                "steps_by_type": {},
+            }
+        # Count by approval status
+        approved = sum(1 for log in session_logs if log.approval_status == "approved")
+        disapproved = sum(1 for log in session_logs if log.approval_status == "disapproved")
+        unapproved = sum(1 for log in session_logs if log.approval_status is None)
+        # Count by step type
+        steps_by_type = {}
+        for log in session_logs:
+            if log.step_type not in steps_by_type:
+                steps_by_type[log.step_type] = 0
+            steps_by_type[log.step_type] += 1
+        return {
+            "session_id": session_id,
+            "total_steps": len(session_logs),
+            "approved_steps": approved,
+            "disapproved_steps": disapproved,
+            "unapproved_steps": unapproved,
+            "steps_by_type": steps_by_type,
+        }
+    def clear_session(self, session_id: str) -> None:
+        """
+        Clear all logs for a session.
+        Args:
+            session_id: ID of the session to clear
+        """
+        step_ids = self._session_logs.get(session_id, [])
+        for step_id in step_ids:
+            if step_id in self._logs:
+                del self._logs[step_id]
+        if session_id in self._session_logs:
+            del self._session_logs[session_id]
+    def export_session_logs(self, session_id: str) -> List[Dict[str, Any]]:
+        """
+        Export all logs for a session as dictionaries.
+        Args:
+            session_id: ID of the session
+        Returns:
+            List of log entries as dictionaries
+        """
+        session_logs = self.get_session_logs(session_id)
+        return [log.to_dict() for log in session_logs]

src/core/tagging_service.py ADDED Viewed

	@@ -0,0 +1,528 @@

+# tagging_service.py
+"""
+Tagging Service for Chaplain Feedback System.
+Handles creation, validation, and management of tagging records
+for chaplain feedback on classification results.
+"""
+from typing import List, Optional, Dict, Any
+import uuid
+from datetime import datetime
+from .chaplain_models import (
+    TaggingRecord,
+    CLASSIFICATION_SUBCATEGORIES,
+    QUESTION_ISSUE_TYPES,
+    REFERRAL_ISSUE_TYPES,
+)
+class TaggingService:
+    """
+    Service for handling tagging record creation and validation.
+    Supports multi-select for question and referral issues,
+    classification subcategories, and indicator issue tracking.
+    """
+    def __init__(self):
+        """Initialize the tagging service."""
+        self._records: Dict[str, TaggingRecord] = {}
+    def create_tagging_record(
+        self,
+        message_id: str,
+        is_classification_correct: bool = True,
+        classification_subcategory: Optional[str] = None,
+        correct_classification: Optional[str] = None,
+        question_issues: Optional[List[str]] = None,
+        question_comments: Optional[str] = None,
+        referral_issues: Optional[List[str]] = None,
+        referral_comments: Optional[str] = None,
+        indicator_issues: Optional[List[str]] = None,
+        indicator_comments: Optional[str] = None,
+        general_notes: str = "",
+    ) -> TaggingRecord:
+        """
+        Create a new tagging record with validation.
+        Args:
+            message_id: ID of the message being tagged
+            is_classification_correct: Whether classification is correct
+            classification_subcategory: Subcategory if classification is wrong
+            correct_classification: Correct classification if wrong
+            question_issues: List of question issue types (multi-select)
+            question_comments: Free-text comments about questions
+            referral_issues: List of referral issue types (multi-select)
+            referral_comments: Free-text comments about referral
+            indicator_issues: List of incorrectly identified indicator IDs
+            indicator_comments: Free-text comments about indicators
+            general_notes: General notes about the message
+        Returns:
+            Created and validated TaggingRecord
+        Raises:
+            ValueError: If validation fails
+        """
+        record_id = str(uuid.uuid4())
+        # Ensure lists are not None
+        question_issues = question_issues or []
+        referral_issues = referral_issues or []
+        indicator_issues = indicator_issues or []
+        # Validate inputs
+        self._validate_classification_tagging(
+            is_classification_correct,
+            classification_subcategory,
+            correct_classification
+        )
+        self._validate_question_issues(question_issues)
+        self._validate_referral_issues(referral_issues)
+        # Create record (validation happens in __post_init__)
+        record = TaggingRecord(
+            record_id=record_id,
+            message_id=message_id,
+            is_classification_correct=is_classification_correct,
+            classification_subcategory=classification_subcategory,
+            correct_classification=correct_classification,
+            question_issues=question_issues,
+            question_comments=question_comments,
+            referral_issues=referral_issues,
+            referral_comments=referral_comments,
+            indicator_issues=indicator_issues,
+            indicator_comments=indicator_comments,
+            general_notes=general_notes,
+        )
+        # Store record
+        self._records[record_id] = record
+        return record
+    def update_tagging_record(
+        self,
+        record_id: str,
+        **updates
+    ) -> TaggingRecord:
+        """
+        Update an existing tagging record.
+        Args:
+            record_id: ID of the record to update
+            **updates: Fields to update
+        Returns:
+            Updated TaggingRecord
+        Raises:
+            KeyError: If record not found
+            ValueError: If validation fails
+        """
+        if record_id not in self._records:
+            raise KeyError(f"Tagging record not found: {record_id}")
+        record = self._records[record_id]
+        # Create updated data
+        record_data = record.to_dict()
+        record_data.update(updates)
+        # Validate updates
+        if 'classification_subcategory' in updates or 'correct_classification' in updates:
+            self._validate_classification_tagging(
+                record_data.get('is_classification_correct', True),
+                record_data.get('classification_subcategory'),
+                record_data.get('correct_classification')
+            )
+        if 'question_issues' in updates:
+            self._validate_question_issues(record_data.get('question_issues', []))
+        if 'referral_issues' in updates:
+            self._validate_referral_issues(record_data.get('referral_issues', []))
+        # Create new record with updates
+        updated_record = TaggingRecord.from_dict(record_data)
+        self._records[record_id] = updated_record
+        return updated_record
+    def get_tagging_record(self, record_id: str) -> Optional[TaggingRecord]:
+        """
+        Get a tagging record by ID.
+        Args:
+            record_id: ID of the record to retrieve
+        Returns:
+            TaggingRecord if found, None otherwise
+        """
+        return self._records.get(record_id)
+    def get_records_for_message(self, message_id: str) -> List[TaggingRecord]:
+        """
+        Get all tagging records for a specific message.
+        Args:
+            message_id: ID of the message
+        Returns:
+            List of TaggingRecord instances for the message
+        """
+        return [
+            record for record in self._records.values()
+            if record.message_id == message_id
+        ]
+    def get_all_records(self) -> List[TaggingRecord]:
+        """
+        Get all tagging records.
+        Returns:
+            List of all TaggingRecord instances
+        """
+        return list(self._records.values())
+    def delete_tagging_record(self, record_id: str) -> bool:
+        """
+        Delete a tagging record.
+        Args:
+            record_id: ID of the record to delete
+        Returns:
+            True if deleted, False if not found
+        """
+        if record_id in self._records:
+            del self._records[record_id]
+            return True
+        return False
+    def get_available_classification_subcategories(self) -> List[str]:
+        """
+        Get list of available classification subcategories.
+        Returns:
+            List of classification subcategory options
+        """
+        return CLASSIFICATION_SUBCATEGORIES.copy()
+    def get_available_question_issue_types(self) -> List[str]:
+        """
+        Get list of available question issue types.
+        Returns:
+            List of question issue type options
+        """
+        return QUESTION_ISSUE_TYPES.copy()
+    def get_available_referral_issue_types(self) -> List[str]:
+        """
+        Get list of available referral issue types.
+        Returns:
+            List of referral issue type options
+        """
+        return REFERRAL_ISSUE_TYPES.copy()
+    def create_classification_correction(
+        self,
+        message_id: str,
+        subcategory: str,
+        correct_classification: str,
+        general_notes: str = ""
+    ) -> TaggingRecord:
+        """
+        Create a tagging record specifically for wrong classification.
+        This is a convenience method that ensures proper validation
+        for classification correction scenarios.
+        Args:
+            message_id: ID of the message being corrected
+            subcategory: Classification error subcategory
+            correct_classification: The correct classification
+            general_notes: Additional notes about the correction
+        Returns:
+            TaggingRecord for the classification correction
+        Raises:
+            ValueError: If subcategory or correct_classification is invalid
+        """
+        return self.create_tagging_record(
+            message_id=message_id,
+            is_classification_correct=False,
+            classification_subcategory=subcategory,
+            correct_classification=correct_classification,
+            general_notes=general_notes
+        )
+    def get_classification_subcategory_descriptions(self) -> Dict[str, str]:
+        """
+        Get descriptions for classification subcategories.
+        Returns:
+            Dictionary mapping subcategory codes to descriptions
+        """
+        return {
+            "missed_indicators": "Missed key distress indicators",
+            "false_positive": "Overly sensitive (false-positive flag)",
+            "missed_distress": "Not sensitive enough (missed distress)",
+        }
+    def create_question_issue_tagging(
+        self,
+        message_id: str,
+        question_issues: List[str],
+        question_comments: Optional[str] = None,
+        general_notes: str = ""
+    ) -> TaggingRecord:
+        """
+        Create a tagging record specifically for follow-up question issues.
+        This is a convenience method for tagging YELLOW flow question issues
+        with multi-select support and free-text comments.
+        Args:
+            message_id: ID of the message being tagged
+            question_issues: List of question issue types (multi-select)
+            question_comments: Free-text comments about questions
+            general_notes: Additional notes
+        Returns:
+            TaggingRecord for the question issues
+        Raises:
+            ValueError: If question_issues contains invalid types
+        """
+        return self.create_tagging_record(
+            message_id=message_id,
+            question_issues=question_issues,
+            question_comments=question_comments,
+            general_notes=general_notes
+        )
+    def get_question_issue_descriptions(self) -> Dict[str, str]:
+        """
+        Get descriptions for question issue types.
+        Returns:
+            Dictionary mapping issue codes to descriptions
+        """
+        return {
+            "inappropriate": "Question is inappropriate or intrusive",
+            "not_relevant": "Question is not spiritually relevant",
+            "too_leading": "Question is too leading or assumptive",
+            "unclear": "Question is unclear or confusing",
+            "tone_clinical": "Tone too clinical",
+            "tone_religious": "Tone too religious",
+            "tone_casual": "Tone too casual",
+        }
+    def create_referral_issue_tagging(
+        self,
+        message_id: str,
+        referral_issues: List[str],
+        referral_comments: Optional[str] = None,
+        general_notes: str = ""
+    ) -> TaggingRecord:
+        """
+        Create a tagging record specifically for referral message issues.
+        This is a convenience method for tagging RED flow referral message issues
+        with multi-select support and free-text comments.
+        Args:
+            message_id: ID of the message being tagged
+            referral_issues: List of referral issue types (multi-select)
+            referral_comments: Free-text comments about referral message
+            general_notes: Additional notes
+        Returns:
+            TaggingRecord for the referral issues
+        Raises:
+            ValueError: If referral_issues contains invalid types
+        """
+        return self.create_tagging_record(
+            message_id=message_id,
+            referral_issues=referral_issues,
+            referral_comments=referral_comments,
+            general_notes=general_notes
+        )
+    def get_referral_issue_descriptions(self) -> Dict[str, str]:
+        """
+        Get descriptions for referral issue types.
+        Returns:
+            Dictionary mapping issue codes to descriptions
+        """
+        return {
+            "incomplete_summary": "Incorrect or incomplete summary",
+            "misrepresentation": "Misrepresentation of patient message",
+            "inappropriate_tone": "Tone inappropriate for spiritual care team",
+        }
+    def create_indicator_issue_tagging(
+        self,
+        message_id: str,
+        indicator_issues: List[str],
+        indicator_comments: Optional[str] = None,
+        general_notes: str = ""
+    ) -> TaggingRecord:
+        """
+        Create a tagging record specifically for indicator issues.
+        This is a convenience method for tagging incorrectly identified indicators
+        with free-text comments.
+        Args:
+            message_id: ID of the message being tagged
+            indicator_issues: List of incorrectly identified indicator IDs
+            indicator_comments: Free-text comments about indicators
+            general_notes: Additional notes
+        Returns:
+            TaggingRecord for the indicator issues
+        """
+        return self.create_tagging_record(
+            message_id=message_id,
+            indicator_issues=indicator_issues,
+            indicator_comments=indicator_comments,
+            general_notes=general_notes
+        )
+    def create_indicator_issue_tagging(
+        self,
+        message_id: str,
+        indicator_issues: List[str],
+        indicator_comments: Optional[str] = None,
+        general_notes: str = ""
+    ) -> TaggingRecord:
+        """
+        Create a tagging record specifically for indicator issues.
+        This is a convenience method for marking incorrectly identified
+        distress indicators with comments.
+        Args:
+            message_id: ID of the message being tagged
+            indicator_issues: List of incorrectly identified indicator IDs
+            indicator_comments: Free-text comments about indicators
+            general_notes: Additional notes
+        Returns:
+            TaggingRecord for the indicator issues
+        """
+        return self.create_tagging_record(
+            message_id=message_id,
+            indicator_issues=indicator_issues,
+            indicator_comments=indicator_comments,
+            general_notes=general_notes
+        )
+    def validate_indicator_ids(self, indicator_ids: List[str]) -> bool:
+        """
+        Validate that indicator IDs are reasonable.
+        This is a basic validation - in a real system, you might
+        validate against actual indicator IDs from the classification result.
+        Args:
+            indicator_ids: List of indicator IDs to validate
+        Returns:
+            True if all IDs are valid format, False otherwise
+        """
+        for indicator_id in indicator_ids:
+            if not isinstance(indicator_id, str) or len(indicator_id.strip()) == 0:
+                return False
+        return True
+    def _validate_classification_tagging(
+        self,
+        is_classification_correct: bool,
+        classification_subcategory: Optional[str],
+        correct_classification: Optional[str]
+    ) -> None:
+        """
+        Validate classification tagging fields.
+        Args:
+            is_classification_correct: Whether classification is correct
+            classification_subcategory: Subcategory if wrong
+            correct_classification: Correct classification if wrong
+        Raises:
+            ValueError: If validation fails
+        """
+        if not is_classification_correct:
+            # If classification is wrong, require subcategory and correct classification
+            if not classification_subcategory:
+                raise ValueError(
+                    "classification_subcategory is required when is_classification_correct is False"
+                )
+            if not correct_classification:
+                raise ValueError(
+                    "correct_classification is required when is_classification_correct is False"
+                )
+            if classification_subcategory not in CLASSIFICATION_SUBCATEGORIES:
+                raise ValueError(
+                    f"Invalid classification_subcategory: {classification_subcategory}. "
+                    f"Must be one of: {CLASSIFICATION_SUBCATEGORIES}"
+                )
+            if correct_classification not in ("red", "yellow", "green"):
+                raise ValueError(
+                    f"Invalid correct_classification: {correct_classification}. "
+                    f"Must be one of: red, yellow, green"
+                )
+        else:
+            # If classification is correct, these fields should be None
+            if classification_subcategory is not None:
+                raise ValueError(
+                    "classification_subcategory must be None when is_classification_correct is True"
+                )
+            if correct_classification is not None:
+                raise ValueError(
+                    "correct_classification must be None when is_classification_correct is True"
+                )
+    def _validate_question_issues(self, question_issues: List[str]) -> None:
+        """
+        Validate question issue types.
+        Args:
+            question_issues: List of question issue types
+        Raises:
+            ValueError: If any issue type is invalid
+        """
+        for issue in question_issues:
+            if issue not in QUESTION_ISSUE_TYPES:
+                raise ValueError(
+                    f"Invalid question issue type: {issue}. "
+                    f"Must be one of: {QUESTION_ISSUE_TYPES}"
+                )
+    def _validate_referral_issues(self, referral_issues: List[str]) -> None:
+        """
+        Validate referral issue types.
+        Args:
+            referral_issues: List of referral issue types
+        Raises:
+            ValueError: If any issue type is invalid
+        """
+        for issue in referral_issues:
+            if issue not in REFERRAL_ISSUE_TYPES:
+                raise ValueError(
+                    f"Invalid referral issue type: {issue}. "
+                    f"Must be one of: {REFERRAL_ISSUE_TYPES}"
+                )

src/core/verification_csv_exporter.py CHANGED Viewed

@@ -2,14 +2,21 @@
 """
 CSV export functionality for verification sessions.
-Provides methods for generating CSV files with verification results and summaries.
 """
 import csv
 import io
 from datetime import datetime
-from typing import List
 from src.core.verification_models import VerificationRecord, VerificationSession
 class VerificationCSVExporter:
@@ -135,3 +142,207 @@ class VerificationCSVExporter:
             "incorrect": session.incorrect_count,
             "accuracy_percent": accuracy,
         }

 """
 CSV export functionality for verification sessions.
+Provides methods for generating CSV files with verification results and summaries,
+including tagging data, generated content, interaction logs, and error statistics.
 """
 import csv
 import io
 from datetime import datetime
+from typing import List, Optional, Dict, Any
 from src.core.verification_models import VerificationRecord, VerificationSession
+from src.core.chaplain_models import (
+    TaggingRecord,
+    ClassificationFlowResult,
+    InteractionStepLog,
+)
+from src.core.error_pattern_analyzer import ErrorPatternAnalyzer
 class VerificationCSVExporter:
             "incorrect": session.incorrect_count,
             "accuracy_percent": accuracy,
         }
+    @staticmethod
+    def generate_enhanced_csv_content(
+        session: VerificationSession,
+        tagging_records: Optional[List[TaggingRecord]] = None,
+        flow_results: Optional[Dict[str, ClassificationFlowResult]] = None,
+        interaction_logs: Optional[List[InteractionStepLog]] = None,
+    ) -> str:
+        """
+        Generate enhanced CSV content with tagging data, generated content, and statistics.
+        Includes:
+        - Summary section with accuracy metrics
+        - Detailed records with tagging categories and subcategories
+        - Generated content (explanations, questions, referral messages)
+        - Interaction logs
+        - Error pattern statistics
+        Args:
+            session: The verification session to export
+            tagging_records: List of TaggingRecord instances (optional)
+            flow_results: Dict mapping message_id to ClassificationFlowResult (optional)
+            interaction_logs: List of InteractionStepLog instances (optional)
+        Returns:
+            Enhanced CSV content as a string
+        Raises:
+            ValueError: If session has no verified messages
+        """
+        if session.verified_count == 0:
+            raise ValueError("No verified messages to export")
+        output = io.StringIO()
+        # Add summary section
+        accuracy = (
+            session.correct_count / session.verified_count * 100
+            if session.verified_count > 0
+            else 0.0
+        )
+        output.write("VERIFICATION SUMMARY\n")
+        output.write(f"Total Messages,{session.verified_count}\n")
+        output.write(f"Correct,{session.correct_count}\n")
+        output.write(f"Incorrect,{session.incorrect_count}\n")
+        output.write(f"Accuracy %,{accuracy:.1f}\n")
+        output.write("\n")
+        # Add detailed records section
+        output.write("DETAILED RECORDS\n")
+        output.write("Patient Message,Classifier Said,You Said,Notes,Date\n")
+        writer = csv.writer(output)
+        for record in session.verifications:
+            classifier_decision = record.classifier_decision.upper()
+            ground_truth = record.ground_truth_label.upper()
+            timestamp = record.timestamp.strftime("%Y-%m-%d %H:%M:%S")
+            writer.writerow([
+                record.original_message,
+                classifier_decision,
+                ground_truth,
+                record.verifier_notes,
+                timestamp,
+            ])
+        output.write("\n")
+        # Add tagging data section if provided
+        if tagging_records:
+            output.write("TAGGING DATA\n")
+            output.write("Message ID,Classification Correct,Classification Subcategory,Correct Classification,Question Issues,Question Comments,Referral Issues,Referral Comments,Indicator Issues,Indicator Comments,General Notes\n")
+            for record in tagging_records:
+                writer.writerow([
+                    record.message_id,
+                    "Yes" if record.is_classification_correct else "No",
+                    record.classification_subcategory or "",
+                    record.correct_classification or "",
+                    "; ".join(record.question_issues) if record.question_issues else "",
+                    record.question_comments or "",
+                    "; ".join(record.referral_issues) if record.referral_issues else "",
+                    record.referral_comments or "",
+                    "; ".join(record.indicator_issues) if record.indicator_issues else "",
+                    record.indicator_comments or "",
+                    record.general_notes,
+                ])
+            output.write("\n")
+        # Add generated content section if provided
+        if flow_results:
+            output.write("GENERATED CONTENT\n")
+            output.write("Message ID,Classification,Explanation,Permission Check Message,Referral Message,Follow-Up Questions,Patient Responses,Re-evaluation Result\n")
+            for message_id, result in flow_results.items():
+                questions_text = "; ".join([q.question_text for q in result.follow_up_questions]) if result.follow_up_questions else ""
+                responses_text = "; ".join(result.patient_responses) if result.patient_responses else ""
+                writer.writerow([
+                    message_id,
+                    result.classification.upper(),
+                    result.explanation,
+                    result.permission_check_message or "",
+                    result.referral_message or "",
+                    questions_text,
+                    responses_text,
+                    result.re_evaluation_result or "",
+                ])
+            output.write("\n")
+        # Add interaction logs section if provided
+        if interaction_logs:
+            output.write("INTERACTION LOGS\n")
+            output.write("Step ID,Session ID,Message ID,Step Type,Input Text,Model Output,Approval Status,Timestamp\n")
+            for log in interaction_logs:
+                writer.writerow([
+                    log.step_id,
+                    log.session_id,
+                    log.message_id,
+                    log.step_type,
+                    log.input_text,
+                    log.model_output,
+                    log.approval_status or "",
+                    log.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
+                ])
+            output.write("\n")
+        # Add statistics section if tagging records provided
+        if tagging_records:
+            output.write("ERROR PATTERN STATISTICS\n")
+            analyzer = ErrorPatternAnalyzer()
+            stats = analyzer.get_statistics_summary(tagging_records)
+            # Classification errors
+            output.write("Classification Errors\n")
+            for subcategory, count in stats["classification_errors"].items():
+                output.write(f"{subcategory},{count}\n")
+            output.write("\n")
+            # Question issues
+            output.write("Question Issues\n")
+            for issue_type, count in stats["question_issues"].items():
+                output.write(f"{issue_type},{count}\n")
+            output.write("\n")
+            # Referral issues
+            output.write("Referral Issues\n")
+            for issue_type, count in stats["referral_issues"].items():
+                output.write(f"{issue_type},{count}\n")
+            output.write("\n")
+            # Indicator issues
+            output.write("Indicator Issues\n")
+            for indicator_id, count in stats["indicator_issues"].items():
+                output.write(f"{indicator_id},{count}\n")
+            output.write("\n")
+            # Common patterns
+            output.write("Common Patterns\n")
+            for pattern in stats["common_patterns"]:
+                output.write(f"{pattern}\n")
+            output.write("\n")
+        return output.getvalue()
+    @staticmethod
+    def export_enhanced_session_to_csv(
+        session: VerificationSession,
+        tagging_records: Optional[List[TaggingRecord]] = None,
+        flow_results: Optional[Dict[str, ClassificationFlowResult]] = None,
+        interaction_logs: Optional[List[InteractionStepLog]] = None,
+    ) -> tuple:
+        """
+        Export a verification session with enhanced data to CSV format.
+        Returns both the CSV content and the filename.
+        Args:
+            session: The verification session to export
+            tagging_records: List of TaggingRecord instances (optional)
+            flow_results: Dict mapping message_id to ClassificationFlowResult (optional)
+            interaction_logs: List of InteractionStepLog instances (optional)
+        Returns:
+            Tuple of (csv_content, filename)
+        Raises:
+            ValueError: If session has no verified messages
+        """
+        csv_content = VerificationCSVExporter.generate_enhanced_csv_content(
+            session,
+            tagging_records=tagging_records,
+            flow_results=flow_results,
+            interaction_logs=interaction_logs,
+        )
+        filename = VerificationCSVExporter.generate_csv_filename(session.created_at)
+        return csv_content, filename

src/interface/chaplain_feedback_ui.py ADDED Viewed

	@@ -0,0 +1,450 @@

+# chaplain_feedback_ui.py
+"""
+Gradio UI components for Chaplain Feedback & Tagging System.
+Provides interface components for displaying classification flows,
+collecting chaplain feedback, and displaying error patterns.
+Requirements: 1.5, 2.3, 3.3, 4.1, 5.1, 5.3, 6.1, 6.3, 8.1, 8.2, 8.3, 10.1, 10.2, 10.3
+"""
+import gradio as gr
+from typing import List, Dict, Tuple, Optional, Any
+from dataclasses import dataclass
+from src.core.chaplain_models import (
+    ClassificationFlowResult,
+    DistressIndicator,
+    FollowUpQuestion,
+    TaggingRecord,
+    CLASSIFICATION_SUBCATEGORIES,
+    QUESTION_ISSUE_TYPES,
+    REFERRAL_ISSUE_TYPES,
+)
+class ChaplainFeedbackUIComponents:
+    """Manages Gradio UI components for chaplain feedback system."""
+    # Color mappings for classification badges
+    BADGE_COLORS = {
+        "red": "🔴",
+        "yellow": "🟡",
+        "green": "🟢",
+    }
+    BADGE_LABELS = {
+        "red": "RED - Severe Distress",
+        "yellow": "YELLOW - Potential Distress",
+        "green": "GREEN - No Distress",
+    }
+    # Severity color codes for indicators
+    SEVERITY_COLORS = {
+        "red": "#ea9999",      # Red from definitions document
+        "yellow": "#ffe599",   # Yellow from definitions document
+    }
+    @staticmethod
+    def create_classification_flow_display() -> Tuple[gr.Component, gr.Component, gr.Component, gr.Component]:
+        """
+        Create ClassificationFlowDisplay component.
+        Displays RED/YELLOW/GREEN flow results with all generated content.
+        Returns:
+            Tuple of (classification_badge, explanation, content_section, indicators_section) components
+        Requirements: 1.5, 2.3, 3.3
+        """
+        classification_badge = gr.Markdown(
+            value="🔄 Loading classification...",
+            label="Classification Result",
+        )
+        explanation = gr.Markdown(
+            value="",
+            label="Explanation",
+        )
+        content_section = gr.Markdown(
+            value="",
+            label="Generated Content",
+        )
+        indicators_section = gr.Markdown(
+            value="",
+            label="Detected Indicators",
+        )
+        return classification_badge, explanation, content_section, indicators_section
+    @staticmethod
+    def render_classification_flow(
+        flow_result: ClassificationFlowResult,
+    ) -> Tuple[str, str, str, str]:
+        """
+        Render complete classification flow result.
+        Args:
+            flow_result: ClassificationFlowResult with all flow data
+        Returns:
+            Tuple of (badge, explanation, content, indicators) markdown strings
+        """
+        # Classification badge
+        badge_emoji = ChaplainFeedbackUIComponents.BADGE_COLORS.get(flow_result.classification, "❓")
+        badge_label = ChaplainFeedbackUIComponents.BADGE_LABELS.get(flow_result.classification, "UNKNOWN")
+        confidence_pct = int(round(flow_result.confidence * 100))
+        badge = f"## {badge_emoji} {badge_label}\n\n**Confidence:** {confidence_pct}%"
+        # Explanation
+        explanation = f"### Explanation\n\n{flow_result.explanation}"
+        # Generated content based on classification
+        content = ""
+        if flow_result.classification == "red":
+            content = ChaplainFeedbackUIComponents._render_red_flow_content(flow_result)
+        elif flow_result.classification == "yellow":
+            content = ChaplainFeedbackUIComponents._render_yellow_flow_content(flow_result)
+        elif flow_result.classification == "green":
+            content = ChaplainFeedbackUIComponents._render_green_flow_content(flow_result)
+        # Indicators
+        indicators = ChaplainFeedbackUIComponents._render_indicators(flow_result.indicators)
+        return badge, explanation, content, indicators
+    @staticmethod
+    def _render_red_flow_content(flow_result: ClassificationFlowResult) -> str:
+        """Render RED flow content (permission check + referral message)."""
+        content = "### 🔴 RED FLAG - Severe Distress Detected\n\n"
+        if flow_result.permission_check_message:
+            content += "#### Patient Permission Check\n\n"
+            content += f"{flow_result.permission_check_message}\n\n"
+        if flow_result.consent_status:
+            content += f"**Consent Status:** {flow_result.consent_status}\n\n"
+        if flow_result.referral_message and flow_result.consent_status == "granted":
+            content += "#### Referral Message for Spiritual Care Team\n\n"
+            content += f"{flow_result.referral_message}\n\n"
+        elif flow_result.consent_status == "declined":
+            content += "**Status:** No further action - patient declined spiritual support referral\n\n"
+        return content
+    @staticmethod
+    def _render_yellow_flow_content(flow_result: ClassificationFlowResult) -> str:
+        """Render YELLOW flow content (follow-up questions + re-evaluation)."""
+        content = "### 🟡 YELLOW FLAG - Potential Distress\n\n"
+        if flow_result.follow_up_questions:
+            content += "#### Follow-Up Questions\n\n"
+            for i, question in enumerate(flow_result.follow_up_questions, 1):
+                content += f"**Question {i}:** {question.question_text}\n\n"
+                content += f"*Purpose:* {question.purpose}\n\n"
+        if flow_result.patient_responses:
+            content += "#### Patient Responses\n\n"
+            for i, response in enumerate(flow_result.patient_responses, 1):
+                content += f"**Response {i}:** {response}\n\n"
+        if flow_result.re_evaluation_result:
+            content += f"#### Re-Evaluation Result\n\n"
+            if flow_result.re_evaluation_result == "red":
+                content += "🔴 **Escalated to RED** - Severe distress detected in responses\n\n"
+            elif flow_result.re_evaluation_result == "green":
+                content += "🟢 **Downgraded to GREEN** - No distress indicators in responses\n\n"
+        return content
+    @staticmethod
+    def _render_green_flow_content(flow_result: ClassificationFlowResult) -> str:
+        """Render GREEN flow content (no distress)."""
+        content = "### 🟢 GREEN FLAG - No Distress Detected\n\n"
+        content += "**Status:** No further steps required\n\n"
+        content += "No spiritual distress indicators were detected in this message.\n\n"
+        return content
+    @staticmethod
+    def _render_indicators(indicators: List[DistressIndicator]) -> str:
+        """Render detected indicators with categories and severity."""
+        if not indicators:
+            return "### Detected Indicators\n\nNo indicators detected"
+        content = "### Detected Indicators\n\n"
+        # Group by severity
+        red_indicators = [i for i in indicators if i.severity == "red"]
+        yellow_indicators = [i for i in indicators if i.severity == "yellow"]
+        if red_indicators:
+            content += "#### 🔴 RED Indicators (Severe)\n\n"
+            for indicator in red_indicators:
+                confidence_pct = int(round(indicator.confidence * 100))
+                content += f"• **{indicator.subcategory}** ({confidence_pct}% confidence)\n"
+                content += f"  - Category: {indicator.category}\n"
+                content += f"  - Reference: {indicator.definition_reference}\n\n"
+        if yellow_indicators:
+            content += "#### 🟡 YELLOW Indicators (Potential)\n\n"
+            for indicator in yellow_indicators:
+                confidence_pct = int(round(indicator.confidence * 100))
+                content += f"• **{indicator.subcategory}** ({confidence_pct}% confidence)\n"
+                content += f"  - Category: {indicator.category}\n"
+                content += f"  - Reference: {indicator.definition_reference}\n\n"
+        return content
+    @staticmethod
+    def create_tagging_interface() -> Tuple[gr.Component, gr.Component, gr.Component, gr.Component, gr.Component, gr.Component, gr.Component, gr.Component, gr.Component, gr.Component]:
+        """
+        Create TaggingInterface component.
+        Provides classification subcategory selector, multi-select for issues,
+        and free-text comment fields.
+        Returns:
+            Tuple of individual tagging components for use in event handlers
+        Requirements: 4.1, 5.1, 5.3, 6.1, 6.3
+        """
+        # Classification tagging components
+        is_correct = gr.Radio(
+            choices=[("✓ Correct", True), ("✗ Incorrect", False)],
+            label="Is the classification correct?",
+            interactive=True,
+            visible=False,
+        )
+        subcategory = gr.Dropdown(
+            choices=CLASSIFICATION_SUBCATEGORIES,
+            label="What type of error? (if incorrect)",
+            interactive=True,
+            visible=False,
+        )
+        correct_classification = gr.Radio(
+            choices=[
+                ("🟢 GREEN - No Distress", "green"),
+                ("🟡 YELLOW - Potential Distress", "yellow"),
+                ("🔴 RED - Severe Distress", "red"),
+            ],
+            label="What should the correct classification be?",
+            interactive=True,
+            visible=False,
+        )
+        # Follow-up question issues components
+        question_issues = gr.CheckboxGroup(
+            choices=QUESTION_ISSUE_TYPES,
+            label="Issues with follow-up questions (select all that apply)",
+            interactive=True,
+            visible=False,
+        )
+        question_comments = gr.Textbox(
+            label="Comments on questions",
+            placeholder="e.g., 'Too clinical', 'Not spiritually relevant'",
+            lines=2,
+            interactive=True,
+            visible=False,
+        )
+        # Referral message issues components
+        referral_issues = gr.CheckboxGroup(
+            choices=REFERRAL_ISSUE_TYPES,
+            label="Issues with referral message (select all that apply)",
+            interactive=True,
+            visible=False,
+        )
+        referral_comments = gr.Textbox(
+            label="Comments on referral message",
+            placeholder="e.g., 'Incomplete summary', 'Tone inappropriate'",
+            lines=2,
+            interactive=True,
+            visible=False,
+        )
+        # Indicator issues components
+        indicator_issues = gr.Textbox(
+            label="Incorrectly identified indicators",
+            placeholder="List indicator IDs or names that were incorrectly identified",
+            lines=2,
+            interactive=True,
+            visible=False,
+        )
+        indicator_comments = gr.Textbox(
+            label="Comments on indicators",
+            placeholder="e.g., 'Missed anxiety indicators', 'False positive on grief'",
+            lines=2,
+            interactive=True,
+            visible=False,
+        )
+        # General notes component
+        notes_section = gr.Textbox(
+            label="General Notes",
+            placeholder="Any additional feedback or observations",
+            lines=3,
+            interactive=True,
+            visible=False,
+        )
+        return is_correct, subcategory, correct_classification, question_issues, question_comments, referral_issues, referral_comments, indicator_issues, indicator_comments, notes_section
+    @staticmethod
+    def create_indicator_display() -> Tuple[gr.Component, gr.Component]:
+        """
+        Create IndicatorDisplay component.
+        Shows indicators with categories and allows tagging incorrect indicators.
+        Returns:
+            Tuple of (indicators_display, indicator_tagging) components
+        Requirements: 8.1, 8.2, 8.3
+        """
+        indicators_display = gr.Markdown(
+            value="No indicators to display",
+            label="Detected Indicators",
+        )
+        indicator_tagging = gr.Group(visible=False)
+        with indicator_tagging:
+            incorrect_indicators = gr.CheckboxGroup(
+                choices=[],
+                label="Select indicators that are incorrectly identified",
+                interactive=True,
+            )
+            indicator_notes = gr.Textbox(
+                label="Why are these indicators incorrect?",
+                placeholder="Explain why these indicators don't apply",
+                lines=2,
+                interactive=True,
+            )
+        return indicators_display, indicator_tagging
+    @staticmethod
+    def create_error_pattern_summary() -> Tuple[gr.Component, gr.Component, gr.Component]:
+        """
+        Create ErrorPatternSummary component.
+        Displays error patterns grouped by type with frequent subcategories highlighted.
+        Returns:
+            Tuple of (error_patterns, subcategory_breakdown, recommendations) components
+        Requirements: 10.1, 10.2, 10.3
+        """
+        error_patterns = gr.Markdown(
+            value="No error patterns yet",
+            label="Error Patterns",
+        )
+        subcategory_breakdown = gr.Markdown(
+            value="No data",
+            label="Subcategory Breakdown",
+        )
+        recommendations = gr.Markdown(
+            value="No recommendations yet",
+            label="Recommendations for Improvement",
+        )
+        return error_patterns, subcategory_breakdown, recommendations
+    @staticmethod
+    def render_error_patterns(
+        classification_errors: Dict[str, int],
+        question_errors: Dict[str, int],
+        referral_errors: Dict[str, int],
+    ) -> Tuple[str, str, str]:
+        """
+        Render error patterns summary.
+        Args:
+            classification_errors: Dict of classification error subcategories with counts
+            question_errors: Dict of question issue types with counts
+            referral_errors: Dict of referral issue types with counts
+        Returns:
+            Tuple of (patterns, breakdown, recommendations) markdown strings
+        """
+        # Error patterns grouped by type
+        patterns = "### Error Patterns\n\n"
+        total_classification_errors = sum(classification_errors.values())
+        total_question_errors = sum(question_errors.values())
+        total_referral_errors = sum(referral_errors.values())
+        if total_classification_errors > 0:
+            patterns += f"#### Classification Errors: {total_classification_errors} total\n\n"
+            for subcategory, count in sorted(classification_errors.items(), key=lambda x: x[1], reverse=True):
+                patterns += f"• {subcategory}: {count}\n"
+            patterns += "\n"
+        if total_question_errors > 0:
+            patterns += f"#### Follow-Up Question Issues: {total_question_errors} total\n\n"
+            for issue_type, count in sorted(question_errors.items(), key=lambda x: x[1], reverse=True):
+                patterns += f"• {issue_type}: {count}\n"
+            patterns += "\n"
+        if total_referral_errors > 0:
+            patterns += f"#### Referral Message Issues: {total_referral_errors} total\n\n"
+            for issue_type, count in sorted(referral_errors.items(), key=lambda x: x[1], reverse=True):
+                patterns += f"• {issue_type}: {count}\n"
+            patterns += "\n"
+        # Subcategory breakdown
+        breakdown = "### Subcategory Breakdown\n\n"
+        if classification_errors:
+            breakdown += "**Classification Errors:**\n"
+            for subcategory, count in sorted(classification_errors.items(), key=lambda x: x[1], reverse=True):
+                breakdown += f"- {subcategory}: {count}\n"
+            breakdown += "\n"
+        if question_errors:
+            breakdown += "**Question Issues:**\n"
+            for issue_type, count in sorted(question_errors.items(), key=lambda x: x[1], reverse=True):
+                breakdown += f"- {issue_type}: {count}\n"
+            breakdown += "\n"
+        if referral_errors:
+            breakdown += "**Referral Issues:**\n"
+            for issue_type, count in sorted(referral_errors.items(), key=lambda x: x[1], reverse=True):
+                breakdown += f"- {issue_type}: {count}\n"
+            breakdown += "\n"
+        # Recommendations
+        recommendations = "### Recommendations for Improvement\n\n"
+        # Find most common errors
+        all_errors = {}
+        for subcategory, count in classification_errors.items():
+            all_errors[f"Classification: {subcategory}"] = count
+        for issue_type, count in question_errors.items():
+            all_errors[f"Questions: {issue_type}"] = count
+        for issue_type, count in referral_errors.items():
+            all_errors[f"Referral: {issue_type}"] = count
+        if all_errors:
+            sorted_errors = sorted(all_errors.items(), key=lambda x: x[1], reverse=True)
+            top_3 = sorted_errors[:3]
+            recommendations += "**Top areas for improvement:**\n\n"
+            for error_type, count in top_3:
+                recommendations += f"1. **{error_type}** ({count} occurrences)\n"
+                recommendations += f"   - Review prompts and logic for this error type\n"
+                recommendations += f"   - Consider additional training data\n\n"
+        else:
+            recommendations += "No errors detected yet. Great job!\n\n"
+        return patterns, breakdown, recommendations

src/interface/simplified_gradio_app.py CHANGED Viewed

@@ -29,10 +29,13 @@ from typing import Dict, Any, Optional, List
 from src.core.simplified_medical_app import SimplifiedMedicalApp
 from src.core.spiritual_state import SpiritualState
 from src.interface.verification_ui import VerificationUIComponents
 from src.core.test_datasets import TestDatasetManager
 from src.core.verification_models import VerificationSession, VerificationRecord, TestMessage
 from src.core.verification_store import JSONVerificationStore
 from src.core.verification_csv_exporter import VerificationCSVExporter
 try:
     from app_config import GRADIO_CONFIG
@@ -159,9 +162,9 @@ def create_simplified_interface():
                             skip_btn = gr.Button("⏭️ Skip", scale=1)
                             next_btn = gr.Button("Next ➡️", scale=1)
-                        # Save results button
                         with gr.Row():
-                            save_results_btn = gr.Button("💾 Save Results (CSV)", variant="primary", scale=2)
                             clear_session_btn = gr.Button("🗑️ Clear Session", scale=1)
                     with gr.Column(scale=1):
@@ -174,6 +177,28 @@ def create_simplified_interface():
                         # Summary card
                         summary_card = VerificationUIComponents.create_summary_card_component()
                 # Results section
                 with gr.Row(visible=False) as results_section:
                     with gr.Column():
@@ -196,8 +221,8 @@ def create_simplified_interface():
                 # Error message display
                 error_message = gr.Markdown(
                     value="",
-                    visible=False,
-                    label="Error"
                 )
                 # Hidden state for tracking
@@ -1238,32 +1263,30 @@ To revert, use "Reset to Default" button.
                 )
         def handle_download_csv(session: VerificationSession, store: JSONVerificationStore):
-            """Handle CSV download."""
             try:
                 if not session or session.verified_count == 0:
-                    return None, "❌ No verified messages to export"
                 csv_content = VerificationCSVExporter.generate_csv_content(session)
                 filename = VerificationCSVExporter.generate_csv_filename()
-                # Write to temporary file
-                import tempfile
                 import os
-                # Create temp directory if it doesn't exist
-                temp_dir = "/tmp/verification_exports"
-                os.makedirs(temp_dir, exist_ok=True)
-                # Write to file with proper filename
-                temp_path = os.path.join(temp_dir, filename)
-                with open(temp_path, 'w') as f:
                     f.write(csv_content)
-                success_msg = f"✅ Results exported: {filename}"
-                return temp_path, success_msg
             except Exception as e:
-                return None, f"❌ Error exporting CSV: {str(e)}"
         # Bind verification events
         load_dataset_btn.click(
@@ -1536,11 +1559,11 @@ To revert, use "Reset to Default" button.
             ]
         )
-        # Save results button
         save_results_btn.click(
             handle_download_csv,
             inputs=[verification_session, verification_store],
-            outputs=[csv_download, error_message]
         )
         # Clear session button
@@ -1576,6 +1599,93 @@ To revert, use "Reset to Default" button.
             ]
         )
         # Bind events
         demo.load(
             initialize_session,

 from src.core.simplified_medical_app import SimplifiedMedicalApp
 from src.core.spiritual_state import SpiritualState
 from src.interface.verification_ui import VerificationUIComponents
+from src.interface.chaplain_feedback_ui import ChaplainFeedbackUIComponents
 from src.core.test_datasets import TestDatasetManager
 from src.core.verification_models import VerificationSession, VerificationRecord, TestMessage
 from src.core.verification_store import JSONVerificationStore
 from src.core.verification_csv_exporter import VerificationCSVExporter
+from src.core.chaplain_models import ClassificationFlowResult, DistressIndicator, FollowUpQuestion
+from src.core.error_pattern_analyzer import ErrorPatternAnalyzer
 try:
     from app_config import GRADIO_CONFIG
                             skip_btn = gr.Button("⏭️ Skip", scale=1)
                             next_btn = gr.Button("Next ➡️", scale=1)
+                        # Save results button - using DownloadButton for Hugging Face compatibility
                         with gr.Row():
+                            save_results_btn = gr.DownloadButton("💾 Download Results (CSV)", variant="primary", scale=2)
                             clear_session_btn = gr.Button("🗑️ Clear Session", scale=1)
                     with gr.Column(scale=1):
                         # Summary card
                         summary_card = VerificationUIComponents.create_summary_card_component()
+                # Chaplain Feedback Section - for displaying classification flows and collecting feedback
+                chaplain_feedback_section = gr.Row(visible=False)
+                with chaplain_feedback_section:
+                    with gr.Column(scale=2):
+                        # Classification flow display
+                        flow_badge, flow_explanation, flow_content, flow_indicators = ChaplainFeedbackUIComponents.create_classification_flow_display()
+                        # Tagging interface - returns individual components
+                        (is_correct, subcategory, correct_classification,
+                         question_issues, question_comments,
+                         referral_issues, referral_comments,
+                         indicator_issues, indicator_comments, general_notes) = ChaplainFeedbackUIComponents.create_tagging_interface()
+                        # Submit feedback button
+                        with gr.Row():
+                            submit_feedback_btn = gr.Button("✓ Submit Feedback", variant="primary", scale=2)
+                            skip_feedback_btn = gr.Button("⏭️ Skip Feedback", scale=1)
+                    with gr.Column(scale=1):
+                        # Error pattern summary
+                        error_patterns, subcategory_breakdown, recommendations = ChaplainFeedbackUIComponents.create_error_pattern_summary()
                 # Results section
                 with gr.Row(visible=False) as results_section:
                     with gr.Column():
                 # Error message display
                 error_message = gr.Markdown(
                     value="",
+                    visible=True,
+                    label="Status"
                 )
                 # Hidden state for tracking
                 )
         def handle_download_csv(session: VerificationSession, store: JSONVerificationStore):
+            """Handle CSV download - returns file path for DownloadButton."""
             try:
                 if not session or session.verified_count == 0:
+                    return None
                 csv_content = VerificationCSVExporter.generate_csv_content(session)
                 filename = VerificationCSVExporter.generate_csv_filename()
                 import os
+                import tempfile
+                # Use temp directory for Hugging Face compatibility
+                temp_dir = tempfile.gettempdir()
+                file_path = os.path.join(temp_dir, filename)
+                with open(file_path, 'w', encoding='utf-8') as f:
                     f.write(csv_content)
+                return file_path
             except Exception as e:
+                import traceback
+                print(f"CSV Export Error: {traceback.format_exc()}")
+                return None
         # Bind verification events
         load_dataset_btn.click(
             ]
         )
+        # Save results button - DownloadButton triggers download directly
         save_results_btn.click(
             handle_download_csv,
             inputs=[verification_session, verification_store],
+            outputs=[save_results_btn]
         )
         # Clear session button
             ]
         )
+        # Chaplain Feedback Event Handlers
+        def show_chaplain_feedback_section():
+            """Show chaplain feedback section after message review."""
+            return gr.Row(visible=True)
+        def handle_submit_feedback(
+            classification_correct: bool,
+            classification_subcategory: Optional[str],
+            correct_classification: Optional[str],
+            question_issues: List[str],
+            question_comments: str,
+            referral_issues: List[str],
+            referral_comments: str,
+            indicator_issues: str,
+            indicator_comments: str,
+            general_notes: str,
+            session: VerificationSession,
+            current_idx: int,
+            message_queue: List[str],
+        ):
+            """Handle chaplain feedback submission."""
+            try:
+                if not session or current_idx >= len(message_queue):
+                    return "❌ Error: Invalid session state", session, current_idx
+                # Create tagging record
+                from src.core.chaplain_models import TaggingRecord
+                import uuid
+                current_message_id = message_queue[current_idx]
+                tagging_record = TaggingRecord(
+                    record_id=str(uuid.uuid4()),
+                    message_id=current_message_id,
+                    is_classification_correct=classification_correct,
+                    classification_subcategory=classification_subcategory,
+                    correct_classification=correct_classification,
+                    question_issues=question_issues or [],
+                    question_comments=question_comments,
+                    referral_issues=referral_issues or [],
+                    referral_comments=referral_comments,
+                    indicator_issues=[i.strip() for i in indicator_issues.split(",") if i.strip()],
+                    indicator_comments=indicator_comments,
+                    general_notes=general_notes,
+                )
+                # Store tagging record in session (would need to extend VerificationSession)
+                # For now, just confirm submission
+                success_msg = f"✅ Feedback submitted for message {current_idx + 1}"
+                return success_msg, session, current_idx
+            except Exception as e:
+                return f"❌ Error: {str(e)}", session, current_idx
+        def display_classification_flow(flow_result: Optional[ClassificationFlowResult]):
+            """Display classification flow result."""
+            if not flow_result:
+                return "", "", "", ""
+            badge, explanation, content, indicators = ChaplainFeedbackUIComponents.render_classification_flow(flow_result)
+            return badge, explanation, content, indicators
+        # Bind chaplain feedback events
+        submit_feedback_btn.click(
+            handle_submit_feedback,
+            inputs=[
+                is_correct,  # is_correct radio
+                subcategory,  # subcategory dropdown
+                correct_classification,  # correct_classification radio
+                question_issues,  # question_issues checkbox
+                question_comments,  # question_comments textbox
+                referral_issues,  # referral_issues checkbox
+                referral_comments,  # referral_comments textbox
+                indicator_issues,  # indicator_issues textbox
+                indicator_comments,  # indicator_comments textbox
+                general_notes,
+                verification_session,
+                current_message_index,
+                message_queue,
+            ],
+            outputs=[error_message, verification_session, current_message_index]
+        ).then(
+            lambda: gr.Row(visible=False),
+            outputs=[chaplain_feedback_section]
+        )
         # Bind events
         demo.load(
             initialize_session,

tests/chaplain_feedback/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # tests/chaplain_feedback/__init__.py
2	+ """Tests for Chaplain Feedback & Tagging System."""

tests/chaplain_feedback/conftest.py ADDED Viewed

	@@ -0,0 +1,145 @@

+# conftest.py
+"""
+Pytest fixtures for Chaplain Feedback tests.
+"""
+import pytest
+from hypothesis import strategies as st
+from datetime import datetime
+from src.core.chaplain_models import (
+    DistressIndicator,
+    FollowUpQuestion,
+    ClassificationFlowResult,
+    TaggingRecord,
+    InteractionStepLog,
+    INDICATOR_DEFINITIONS,
+    CLASSIFICATION_SUBCATEGORIES,
+    QUESTION_ISSUE_TYPES,
+    REFERRAL_ISSUE_TYPES,
+    INTERACTION_STEP_TYPES,
+)
+# =============================================================================
+# Hypothesis Strategies for generating test data
+# =============================================================================
+def valid_id_strategy():
+    """Generate valid IDs."""
+    return st.text(
+        alphabet="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-",
+        min_size=1,
+        max_size=20,
+    )
+def distress_indicator_strategy():
+    """Generate random DistressIndicator instances."""
+    return st.builds(
+        DistressIndicator,
+        indicator_text=st.text(min_size=1, max_size=200),
+        category=st.sampled_from([
+            "Emotional", "Grief", "Existential", "Expressions",
+            "Spiritual", "Medical", "Social", "Cultural",
+            "Engagement", "Guilt", "Anger", "Aging",
+            "Environment", "Independence"
+        ]),
+        subcategory=st.text(min_size=1, max_size=100),
+        severity=st.sampled_from(["red", "yellow"]),
+        confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
+        definition_reference=st.text(max_size=20),
+    )
+def follow_up_question_strategy():
+    """Generate random FollowUpQuestion instances."""
+    return st.builds(
+        FollowUpQuestion,
+        question_id=valid_id_strategy(),
+        question_text=st.text(min_size=1, max_size=500),
+        purpose=st.text(min_size=1, max_size=200),
+    )
+def classification_flow_result_strategy():
+    """Generate random ClassificationFlowResult instances."""
+    return st.builds(
+        ClassificationFlowResult,
+        classification=st.sampled_from(["red", "yellow", "green"]),
+        confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
+        indicators=st.lists(distress_indicator_strategy(), max_size=5),
+        explanation=st.text(max_size=500),
+        permission_check_message=st.one_of(st.none(), st.text(max_size=300)),
+        referral_message=st.one_of(st.none(), st.text(max_size=500)),
+        consent_status=st.one_of(st.none(), st.sampled_from(["granted", "declined"])),
+        follow_up_questions=st.lists(follow_up_question_strategy(), max_size=3),
+        patient_responses=st.lists(st.text(max_size=200), max_size=3),
+        re_evaluation_result=st.one_of(st.none(), st.sampled_from(["red", "green"])),
+    )
+def tagging_record_strategy():
+    """Generate random TaggingRecord instances."""
+    return st.builds(
+        TaggingRecord,
+        record_id=valid_id_strategy(),
+        message_id=valid_id_strategy(),
+        is_classification_correct=st.booleans(),
+        classification_subcategory=st.one_of(
+            st.none(),
+            st.sampled_from(CLASSIFICATION_SUBCATEGORIES)
+        ),
+        correct_classification=st.one_of(
+            st.none(),
+            st.sampled_from(["red", "yellow", "green"])
+        ),
+        question_issues=st.lists(
+            st.sampled_from(QUESTION_ISSUE_TYPES),
+            max_size=3,
+            unique=True
+        ),
+        question_comments=st.one_of(st.none(), st.text(max_size=200)),
+        referral_issues=st.lists(
+            st.sampled_from(REFERRAL_ISSUE_TYPES),
+            max_size=3,
+            unique=True
+        ),
+        referral_comments=st.one_of(st.none(), st.text(max_size=200)),
+        indicator_issues=st.lists(st.text(min_size=1, max_size=50), max_size=5),
+        indicator_comments=st.one_of(st.none(), st.text(max_size=200)),
+        general_notes=st.text(max_size=300),
+        timestamp=st.just(datetime.now()),
+    )
+def interaction_step_log_strategy():
+    """Generate random InteractionStepLog instances (without nested tagging)."""
+    return st.builds(
+        InteractionStepLog,
+        step_id=valid_id_strategy(),
+        session_id=valid_id_strategy(),
+        message_id=valid_id_strategy(),
+        step_type=st.sampled_from(INTERACTION_STEP_TYPES),
+        input_text=st.text(max_size=500),
+        model_output=st.text(max_size=500),
+        approval_status=st.one_of(st.none(), st.sampled_from(["approved", "disapproved"])),
+        tagging_data=st.none(),  # Simplified - no nested tagging for basic tests
+        timestamp=st.just(datetime.now()),
+    )
+def interaction_step_log_with_tagging_strategy():
+    """Generate random InteractionStepLog instances with nested tagging."""
+    return st.builds(
+        InteractionStepLog,
+        step_id=valid_id_strategy(),
+        session_id=valid_id_strategy(),
+        message_id=valid_id_strategy(),
+        step_type=st.sampled_from(INTERACTION_STEP_TYPES),
+        input_text=st.text(max_size=500),
+        model_output=st.text(max_size=500),
+        approval_status=st.one_of(st.none(), st.sampled_from(["approved", "disapproved"])),
+        tagging_data=st.one_of(st.none(), tagging_record_strategy()),
+        timestamp=st.just(datetime.now()),
+    )

tests/chaplain_feedback/test_properties_classification_flow.py ADDED Viewed

	@@ -0,0 +1,297 @@

+# test_properties_classification_flow.py
+"""
+Property-based tests for Classification Flow Manager.
+Tests universal properties that should hold across all inputs for
+RED/YELLOW/GREEN classification flows.
+"""
+import pytest
+from hypothesis import given, strategies as st
+from src.core.classification_flow_manager import ClassificationFlowManager
+from src.core.content_generator import ContentGenerator
+from src.core.chaplain_models import DistressIndicator
+from tests.chaplain_feedback.conftest import distress_indicator_strategy
+class TestClassificationFlowProperties:
+    """Property-based tests for ClassificationFlowManager."""
+    def setup_method(self):
+        """Set up test fixtures."""
+        self.content_generator = ContentGenerator()
+        self.flow_manager = ClassificationFlowManager(self.content_generator)
+    @given(
+        message=st.text(min_size=1, max_size=500),
+        confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
+        indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5),
+        consent_status=st.sampled_from(["granted", "declined"])
+    )
+    def test_property_4_red_flow_displays_all_content(
+        self, message, confidence, indicators, consent_status
+    ):
+        """
+        **Feature: chaplain-feedback-system, Property 4: RED Flow Displays All Content**
+        **Validates: Requirements 1.5**
+        For any RED classification result, the UI should display all three content types:
+        explanation, permission check message, and referral message (if consent granted).
+        """
+        # Execute RED flow
+        result = self.flow_manager.execute_red_flow(
+            message=message,
+            confidence=confidence,
+            indicators=indicators,
+            consent_status=consent_status
+        )
+        # Verify all required content is present
+        assert result.classification == "red"
+        assert result.explanation is not None and result.explanation.strip() != ""
+        assert result.permission_check_message is not None and result.permission_check_message.strip() != ""
+        assert result.consent_status == consent_status
+        # If consent granted, referral message should be present
+        if consent_status == "granted":
+            assert result.referral_message is not None and result.referral_message.strip() != ""
+        else:
+            # If consent declined, referral message should be None
+            assert result.referral_message is None
+        # Verify indicators are preserved
+        assert result.indicators == indicators
+        assert result.confidence == confidence
+    @given(
+        message=st.text(min_size=1, max_size=500),
+        confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
+        indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5)
+    )
+    def test_property_5_yellow_explanation_differentiates(
+        self, message, confidence, indicators
+    ):
+        """
+        **Feature: chaplain-feedback-system, Property 5: YELLOW Explanation Differentiates**
+        **Validates: Requirements 2.1**
+        For any YELLOW classification, the explanation should contain reasoning
+        for why it's not RED and why it's not GREEN.
+        """
+        # Execute YELLOW flow
+        result = self.flow_manager.execute_yellow_flow(
+            message=message,
+            confidence=confidence,
+            indicators=indicators
+        )
+        # Verify explanation differentiates from RED and GREEN
+        explanation = result.explanation.lower()
+        # Should explain why not RED
+        assert any(phrase in explanation for phrase in [
+            "why not red", "not red", "not meet the threshold",
+            "do not meet", "further clarification", "not severe"
+        ]), f"Explanation should explain why not RED: {result.explanation}"
+        # Should explain why not GREEN
+        assert any(phrase in explanation for phrase in [
+            "why not green", "not green", "indicators", "concerns",
+            "warrant follow-up", "suggest possible"
+        ]), f"Explanation should explain why not GREEN: {result.explanation}"
+        # Verify other YELLOW flow properties
+        assert result.classification == "yellow"
+        assert result.explanation is not None and result.explanation.strip() != ""
+        assert len(result.follow_up_questions) >= 2
+        assert len(result.follow_up_questions) <= 3
+    @given(
+        message=st.text(min_size=1, max_size=500),
+        confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
+        indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5)
+    )
+    def test_property_6_yellow_generates_2_3_questions(
+        self, message, confidence, indicators
+    ):
+        """
+        **Feature: chaplain-feedback-system, Property 6: YELLOW Generates 2-3 Questions**
+        **Validates: Requirements 2.2**
+        For any YELLOW classification, the system should generate between 2 and 3
+        follow-up questions, each containing 1-2 clarifying questions.
+        """
+        # Execute YELLOW flow
+        result = self.flow_manager.execute_yellow_flow(
+            message=message,
+            confidence=confidence,
+            indicators=indicators
+        )
+        # Verify question count
+        assert 2 <= len(result.follow_up_questions) <= 3, (
+            f"Expected 2-3 questions, got {len(result.follow_up_questions)}"
+        )
+        # Verify each question has required fields
+        for question in result.follow_up_questions:
+            assert question.question_id is not None and question.question_id.strip() != ""
+            assert question.question_text is not None and question.question_text.strip() != ""
+            assert question.purpose is not None and question.purpose.strip() != ""
+            # Each question should contain 1-2 clarifying questions (check for question marks)
+            question_marks = question.question_text.count("?")
+            assert 1 <= question_marks <= 2, (
+                f"Expected 1-2 questions per follow-up, got {question_marks} in: {question.question_text}"
+            )
+    @given(
+        message=st.text(min_size=1, max_size=500),
+        confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
+        indicators=st.lists(distress_indicator_strategy(), max_size=2)  # GREEN should have few/no indicators
+    )
+    def test_property_9_green_explanation_generated(
+        self, message, confidence, indicators
+    ):
+        """
+        **Feature: chaplain-feedback-system, Property 9: GREEN Explanation Generated**
+        **Validates: Requirements 3.1, 3.2**
+        For any GREEN classification, an explanation should be generated explaining
+        why no spiritual indicators were found.
+        """
+        # Execute GREEN flow
+        result = self.flow_manager.execute_green_flow(
+            message=message,
+            confidence=confidence,
+            indicators=indicators
+        )
+        # Verify explanation is generated
+        assert result.classification == "green"
+        assert result.explanation is not None and result.explanation.strip() != ""
+        # Explanation should mention no indicators or no distress
+        explanation = result.explanation.lower()
+        assert any(phrase in explanation for phrase in [
+            "no spiritual distress", "no indicators", "not suggest spiritual",
+            "no spiritual concerns", "no further steps"
+        ]), f"GREEN explanation should mention no distress: {result.explanation}"
+        # Should explain why not RED or YELLOW
+        assert any(phrase in explanation for phrase in [
+            "why not red", "why not yellow", "not contain", "does not suggest"
+        ]), f"GREEN explanation should differentiate from RED/YELLOW: {result.explanation}"
+        # GREEN flow should not have RED/YELLOW specific content
+        assert result.permission_check_message is None
+        assert result.referral_message is None
+        assert result.consent_status is None
+        assert len(result.follow_up_questions) == 0
+        assert len(result.patient_responses) == 0
+        assert result.re_evaluation_result is None
+    @given(
+        message=st.text(min_size=1, max_size=500),
+        confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
+        indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5),
+        # Generate responses that contain escalation keywords
+        escalation_responses=st.lists(
+            st.sampled_from([
+                "I feel hopeless about everything",
+                "I feel worthless and can't go on",
+                "There's no point in anything anymore",
+                "I want to give up completely",
+                "This is unbearable, I can't take it"
+            ]),
+            min_size=1,
+            max_size=3
+        )
+    )
+    def test_property_7_yellow_escalation_to_red(
+        self, message, confidence, indicators, escalation_responses
+    ):
+        """
+        **Feature: chaplain-feedback-system, Property 7: YELLOW Escalation to RED**
+        **Validates: Requirements 2.4**
+        For any YELLOW classification where simulated patient responses indicate distress,
+        the system should transition to RED FLAG flow.
+        """
+        # Execute YELLOW flow with escalation responses
+        result = self.flow_manager.execute_yellow_flow(
+            message=message,
+            confidence=confidence,
+            indicators=indicators,
+            patient_responses=escalation_responses
+        )
+        # Verify escalation occurred
+        assert result.re_evaluation_result == "red", (
+            f"Expected escalation to RED, got {result.re_evaluation_result} "
+            f"for responses: {escalation_responses}"
+        )
+        # Test the escalation method
+        escalated_result = self.flow_manager.escalate_yellow_to_red(result, message)
+        # Verify escalated result is RED
+        assert escalated_result.classification == "red"
+        assert escalated_result.explanation is not None
+        assert escalated_result.permission_check_message is not None
+        assert escalated_result.referral_message is not None  # Should have consent granted
+        assert escalated_result.consent_status == "granted"
+    @given(
+        message=st.text(min_size=1, max_size=500),
+        confidence=st.floats(min_value=0.0, max_value=1.0, allow_nan=False),
+        indicators=st.lists(distress_indicator_strategy(), min_size=1, max_size=5),
+        # Generate responses that contain downgrade keywords
+        downgrade_responses=st.lists(
+            st.sampled_from([
+                "I'm feeling better now",
+                "Everything is okay",
+                "I have good support from my family",
+                "I'm not worried about it",
+                "I'm managing well",
+                "I feel hopeful about the future"
+            ]),
+            min_size=1,
+            max_size=3
+        )
+    )
+    def test_property_8_yellow_downgrade_to_green(
+        self, message, confidence, indicators, downgrade_responses
+    ):
+        """
+        **Feature: chaplain-feedback-system, Property 8: YELLOW Downgrade to GREEN**
+        **Validates: Requirements 2.5**
+        For any YELLOW classification where simulated patient responses indicate no distress,
+        the system should transition to GREEN status.
+        """
+        # Execute YELLOW flow with downgrade responses
+        result = self.flow_manager.execute_yellow_flow(
+            message=message,
+            confidence=confidence,
+            indicators=indicators,
+            patient_responses=downgrade_responses
+        )
+        # Verify downgrade occurred
+        assert result.re_evaluation_result == "green", (
+            f"Expected downgrade to GREEN, got {result.re_evaluation_result} "
+            f"for responses: {downgrade_responses}"
+        )
+        # Test the downgrade method
+        downgraded_result = self.flow_manager.downgrade_yellow_to_green(result, message)
+        # Verify downgraded result is GREEN
+        assert downgraded_result.classification == "green"
+        assert downgraded_result.explanation is not None
+        assert downgraded_result.permission_check_message is None
+        assert downgraded_result.referral_message is None
+        assert downgraded_result.consent_status is None
+        assert len(downgraded_result.follow_up_questions) == 0

tests/chaplain_feedback/test_properties_content_generator.py ADDED Viewed

	@@ -0,0 +1,399 @@

+# test_properties_content_generator.py
+"""
+Property-based tests for Content Generator Service.
+Tests that content generation follows the specification requirements.
+"""
+import pytest
+from hypothesis import given, settings, assume
+from hypothesis import strategies as st
+from src.core.chaplain_models import (
+    DistressIndicator,
+    FollowUpQuestion,
+)
+from src.core.content_generator import ContentGenerator
+from tests.chaplain_feedback.conftest import (
+    distress_indicator_strategy,
+)
+# =============================================================================
+# Strategies for content generator tests
+# =============================================================================
+def non_empty_indicators_strategy():
+    """Generate non-empty list of distress indicators."""
+    return st.lists(distress_indicator_strategy(), min_size=1, max_size=5)
+def red_indicators_strategy():
+    """Generate list with at least one RED severity indicator."""
+    return st.lists(
+        distress_indicator_strategy(),
+        min_size=1,
+        max_size=5
+    ).filter(lambda indicators: any(i.severity == "red" for i in indicators))
+def patient_message_strategy():
+    """Generate patient message text."""
+    return st.text(min_size=10, max_size=500).filter(lambda s: s.strip())
+# =============================================================================
+# Property Tests for RED Explanation
+# =============================================================================
+class TestRedExplanationContainsIndicators:
+    """
+    **Feature: chaplain-feedback-system, Property 1: RED Explanation Contains Indicators**
+    **Validates: Requirements 1.1**
+    For any RED classification, the generated explanation should reference
+    at least one distress indicator from the definitions document categories.
+    """
+    @given(
+        indicators=non_empty_indicators_strategy(),
+        message=patient_message_strategy()
+    )
+    @settings(max_examples=100)
+    def test_red_explanation_contains_indicator_references(self, indicators, message):
+        """
+        **Feature: chaplain-feedback-system, Property 1: RED Explanation Contains Indicators**
+        **Validates: Requirements 1.1**
+        For any RED classification with indicators, the explanation should
+        reference at least one indicator's subcategory or category.
+        """
+        generator = ContentGenerator()
+        explanation = generator.generate_explanation("red", indicators, message)
+        # The explanation should contain at least one indicator reference
+        indicator_referenced = False
+        for indicator in indicators:
+            if indicator.subcategory in explanation or indicator.category in explanation:
+                indicator_referenced = True
+                break
+        assert indicator_referenced, (
+            f"RED explanation should reference at least one indicator. "
+            f"Indicators: {[i.subcategory for i in indicators]}"
+        )
+    @given(
+        indicators=non_empty_indicators_strategy(),
+        message=patient_message_strategy()
+    )
+    @settings(max_examples=100)
+    def test_red_explanation_mentions_red_flag(self, indicators, message):
+        """
+        **Feature: chaplain-feedback-system, Property 1: RED Explanation Contains Indicators**
+        **Validates: Requirements 1.1**
+        For any RED classification, the explanation should mention RED FLAG.
+        """
+        generator = ContentGenerator()
+        explanation = generator.generate_explanation("red", indicators, message)
+        assert "RED FLAG" in explanation or "red" in explanation.lower(), (
+            "RED explanation should mention RED FLAG classification"
+        )
+    @given(
+        indicators=non_empty_indicators_strategy(),
+        message=patient_message_strategy()
+    )
+    @settings(max_examples=100)
+    def test_red_explanation_mentions_spiritual_care(self, indicators, message):
+        """
+        **Feature: chaplain-feedback-system, Property 1: RED Explanation Contains Indicators**
+        **Validates: Requirements 1.1**
+        For any RED classification, the explanation should mention spiritual care team.
+        """
+        generator = ContentGenerator()
+        explanation = generator.generate_explanation("red", indicators, message)
+        assert "spiritual" in explanation.lower(), (
+            "RED explanation should mention spiritual care"
+        )
+# =============================================================================
+# Property Tests for Permission Check Message
+# =============================================================================
+class TestRedPermissionCheckGenerated:
+    """
+    **Feature: chaplain-feedback-system, Property 2: RED Permission Check Generated**
+    **Validates: Requirements 1.2**
+    For any RED classification, a patient permission check message should be
+    generated and contain consent-related language.
+    """
+    @given(indicators=non_empty_indicators_strategy())
+    @settings(max_examples=100)
+    def test_permission_check_contains_spiritual_support(self, indicators):
+        """
+        **Feature: chaplain-feedback-system, Property 2: RED Permission Check Generated**
+        **Validates: Requirements 1.2**
+        For any RED classification, the permission check message should
+        contain "spiritual" language.
+        """
+        generator = ContentGenerator()
+        message = generator.generate_permission_check(indicators)
+        assert "spiritual" in message.lower(), (
+            "Permission check message should mention spiritual support"
+        )
+    @given(indicators=non_empty_indicators_strategy())
+    @settings(max_examples=100)
+    def test_permission_check_contains_consent_language(self, indicators):
+        """
+        **Feature: chaplain-feedback-system, Property 2: RED Permission Check Generated**
+        **Validates: Requirements 1.2**
+        For any RED classification, the permission check message should
+        contain consent-related language.
+        """
+        generator = ContentGenerator()
+        message = generator.generate_permission_check(indicators)
+        # Check for consent-related terms
+        consent_terms = ["consent", "permission", "voluntary", "would you like"]
+        has_consent_language = any(term in message.lower() for term in consent_terms)
+        assert has_consent_language, (
+            f"Permission check message should contain consent language. "
+            f"Message: {message[:200]}..."
+        )
+    @given(indicators=non_empty_indicators_strategy())
+    @settings(max_examples=100)
+    def test_permission_check_is_non_empty(self, indicators):
+        """
+        **Feature: chaplain-feedback-system, Property 2: RED Permission Check Generated**
+        **Validates: Requirements 1.2**
+        For any RED classification, a non-empty permission check message
+        should be generated.
+        """
+        generator = ContentGenerator()
+        message = generator.generate_permission_check(indicators)
+        assert message and len(message.strip()) > 0, (
+            "Permission check message should not be empty"
+        )
+# =============================================================================
+# Property Tests for Referral Message
+# =============================================================================
+class TestRedReferralMessageContainsRequiredSections:
+    """
+    **Feature: chaplain-feedback-system, Property 3: RED Referral Message Contains Required Sections**
+    **Validates: Requirements 1.3**
+    For any RED classification with granted consent, the referral message should
+    contain: background information, detected indicators, and justification.
+    """
+    @given(
+        indicators=non_empty_indicators_strategy(),
+        message=patient_message_strategy()
+    )
+    @settings(max_examples=100)
+    def test_referral_message_contains_background(self, indicators, message):
+        """
+        **Feature: chaplain-feedback-system, Property 3: RED Referral Message Contains Required Sections**
+        **Validates: Requirements 1.3**
+        For any RED classification, the referral message should contain
+        background information section.
+        """
+        generator = ContentGenerator()
+        explanation = generator.generate_explanation("red", indicators, message)
+        referral = generator.generate_referral_message(message, indicators, explanation)
+        assert "BACKGROUND" in referral.upper(), (
+            "Referral message should contain BACKGROUND section"
+        )
+    @given(
+        indicators=non_empty_indicators_strategy(),
+        message=patient_message_strategy()
+    )
+    @settings(max_examples=100)
+    def test_referral_message_contains_indicators_section(self, indicators, message):
+        """
+        **Feature: chaplain-feedback-system, Property 3: RED Referral Message Contains Required Sections**
+        **Validates: Requirements 1.3**
+        For any RED classification, the referral message should contain
+        indicators section.
+        """
+        generator = ContentGenerator()
+        explanation = generator.generate_explanation("red", indicators, message)
+        referral = generator.generate_referral_message(message, indicators, explanation)
+        assert "INDICATORS" in referral.upper(), (
+            "Referral message should contain INDICATORS section"
+        )
+    @given(
+        indicators=non_empty_indicators_strategy(),
+        message=patient_message_strategy()
+    )
+    @settings(max_examples=100)
+    def test_referral_message_contains_justification(self, indicators, message):
+        """
+        **Feature: chaplain-feedback-system, Property 3: RED Referral Message Contains Required Sections**
+        **Validates: Requirements 1.3**
+        For any RED classification, the referral message should contain
+        justification section.
+        """
+        generator = ContentGenerator()
+        explanation = generator.generate_explanation("red", indicators, message)
+        referral = generator.generate_referral_message(message, indicators, explanation)
+        assert "JUSTIFICATION" in referral.upper(), (
+            "Referral message should contain JUSTIFICATION section"
+        )
+    @given(
+        indicators=non_empty_indicators_strategy(),
+        message=patient_message_strategy()
+    )
+    @settings(max_examples=100)
+    def test_referral_message_references_indicators(self, indicators, message):
+        """
+        **Feature: chaplain-feedback-system, Property 3: RED Referral Message Contains Required Sections**
+        **Validates: Requirements 1.3**
+        For any RED classification with indicators, the referral message should
+        reference at least one indicator.
+        """
+        generator = ContentGenerator()
+        explanation = generator.generate_explanation("red", indicators, message)
+        referral = generator.generate_referral_message(message, indicators, explanation)
+        # Check that at least one indicator is referenced
+        indicator_referenced = False
+        for indicator in indicators:
+            if indicator.subcategory in referral or indicator.category in referral:
+                indicator_referenced = True
+                break
+        assert indicator_referenced, (
+            f"Referral message should reference at least one indicator. "
+            f"Indicators: {[i.subcategory for i in indicators]}"
+        )
+# =============================================================================
+# Property Tests for Follow-Up Questions
+# =============================================================================
+class TestYellowGenerates2To3Questions:
+    """
+    **Feature: chaplain-feedback-system, Property 6: YELLOW Generates 2-3 Questions**
+    **Validates: Requirements 2.2**
+    For any YELLOW classification, the system should generate between 2 and 3
+    follow-up questions, each containing 1-2 clarifying questions.
+    """
+    @given(
+        indicators=st.lists(distress_indicator_strategy(), min_size=0, max_size=5),
+        message=patient_message_strategy()
+    )
+    @settings(max_examples=100)
+    def test_follow_up_questions_count_in_range(self, indicators, message):
+        """
+        **Feature: chaplain-feedback-system, Property 6: YELLOW Generates 2-3 Questions**
+        **Validates: Requirements 2.2**
+        For any YELLOW classification, the number of follow-up questions
+        should be between 2 and 3.
+        """
+        generator = ContentGenerator()
+        questions = generator.generate_follow_up_questions(message, indicators)
+        assert 2 <= len(questions) <= 3, (
+            f"Should generate 2-3 follow-up questions, got {len(questions)}"
+        )
+    @given(
+        indicators=st.lists(distress_indicator_strategy(), min_size=0, max_size=5),
+        message=patient_message_strategy()
+    )
+    @settings(max_examples=100)
+    def test_follow_up_questions_have_required_fields(self, indicators, message):
+        """
+        **Feature: chaplain-feedback-system, Property 6: YELLOW Generates 2-3 Questions**
+        **Validates: Requirements 2.2**
+        For any YELLOW classification, each follow-up question should have
+        question_id, question_text, and purpose fields.
+        """
+        generator = ContentGenerator()
+        questions = generator.generate_follow_up_questions(message, indicators)
+        for question in questions:
+            assert question.question_id, "Question should have question_id"
+            assert question.question_text, "Question should have question_text"
+            assert question.purpose, "Question should have purpose"
+    @given(
+        indicators=st.lists(distress_indicator_strategy(), min_size=0, max_size=5),
+        message=patient_message_strategy()
+    )
+    @settings(max_examples=100)
+    def test_follow_up_questions_are_follow_up_question_instances(self, indicators, message):
+        """
+        **Feature: chaplain-feedback-system, Property 6: YELLOW Generates 2-3 Questions**
+        **Validates: Requirements 2.2**
+        For any YELLOW classification, all generated questions should be
+        FollowUpQuestion instances.
+        """
+        generator = ContentGenerator()
+        questions = generator.generate_follow_up_questions(message, indicators)
+        for question in questions:
+            assert isinstance(question, FollowUpQuestion), (
+                f"Question should be FollowUpQuestion instance, got {type(question)}"
+            )
+    @given(
+        indicators=st.lists(distress_indicator_strategy(), min_size=0, max_size=5),
+        message=patient_message_strategy()
+    )
+    @settings(max_examples=100)
+    def test_follow_up_questions_have_unique_ids(self, indicators, message):
+        """
+        **Feature: chaplain-feedback-system, Property 6: YELLOW Generates 2-3 Questions**
+        **Validates: Requirements 2.2**
+        For any YELLOW classification, all generated questions should have
+        unique question_ids.
+        """
+        generator = ContentGenerator()
+        questions = generator.generate_follow_up_questions(message, indicators)
+        question_ids = [q.question_id for q in questions]
+        assert len(question_ids) == len(set(question_ids)), (
+            f"Question IDs should be unique, got: {question_ids}"
+        )

tests/chaplain_feedback/test_properties_csv_export.py ADDED Viewed

	@@ -0,0 +1,290 @@

+# test_properties_csv_export.py
+"""
+Property-based tests for Enhanced CSV Export functionality.
+Tests that CSV export includes all tagging data, generated content,
+interaction logs, and statistics.
+"""
+import pytest
+from hypothesis import given, settings
+from datetime import datetime
+from src.core.verification_csv_exporter import VerificationCSVExporter
+from src.core.verification_models import VerificationSession, VerificationRecord
+from src.core.chaplain_models import (
+    TaggingRecord,
+    ClassificationFlowResult,
+    InteractionStepLog,
+    DistressIndicator,
+    FollowUpQuestion,
+)
+from tests.chaplain_feedback.conftest import (
+    tagging_record_strategy,
+    classification_flow_result_strategy,
+    interaction_step_log_strategy,
+)
+class TestExportContainsAllTags:
+    """
+    **Feature: chaplain-feedback-system, Property 17: Export Contains All Tags**
+    Tests that CSV export includes all tagging categories and subcategories.
+    """
+    @given(tagging_record_strategy())
+    @settings(max_examples=100)
+    def test_export_contains_all_tags(self, tagging_record):
+        """
+        **Feature: chaplain-feedback-system, Property 17: Export Contains All Tags**
+        **Validates: Requirements 9.1**
+        For any TaggingRecord, the CSV export should contain all tagging
+        categories and subcategories from that record.
+        """
+        # Create a minimal session
+        session = VerificationSession(
+            session_id="test_session",
+            verifier_name="Test Verifier",
+            dataset_id="test_dataset",
+            dataset_name="Test Dataset",
+            total_messages=1,
+            verified_count=1,
+            correct_count=1,
+            incorrect_count=0,
+        )
+        # Add a verification record
+        verification = VerificationRecord(
+            message_id=tagging_record.message_id,
+            original_message="Test message",
+            classifier_decision="red",
+            classifier_confidence=0.9,
+            classifier_indicators=["indicator1"],
+            ground_truth_label="red",
+            is_correct=True,
+        )
+        session.verifications.append(verification)
+        # Generate CSV with tagging records
+        csv_content = VerificationCSVExporter.generate_enhanced_csv_content(
+            session,
+            tagging_records=[tagging_record],
+        )
+        # Verify tagging data section exists
+        assert "TAGGING DATA" in csv_content
+        # Verify message ID is in export
+        assert tagging_record.message_id in csv_content
+        # Verify classification correctness is in export
+        correctness_str = "Yes" if tagging_record.is_classification_correct else "No"
+        assert correctness_str in csv_content
+        # Verify classification subcategory is in export (if present)
+        if tagging_record.classification_subcategory:
+            assert tagging_record.classification_subcategory in csv_content
+        # Verify correct classification is in export (if present)
+        if tagging_record.correct_classification:
+            assert tagging_record.correct_classification in csv_content
+        # Verify question issues are in export (if present)
+        if tagging_record.question_issues:
+            for issue in tagging_record.question_issues:
+                assert issue in csv_content
+        # Verify referral issues are in export (if present)
+        if tagging_record.referral_issues:
+            for issue in tagging_record.referral_issues:
+                assert issue in csv_content
+        # Verify indicator issues are in export (if present)
+        if tagging_record.indicator_issues:
+            for indicator_id in tagging_record.indicator_issues:
+                assert indicator_id in csv_content
+class TestExportContainsGeneratedContent:
+    """
+    **Feature: chaplain-feedback-system, Property 18: Export Contains Generated Content**
+    Tests that CSV export includes all generated content.
+    """
+    @given(classification_flow_result_strategy())
+    @settings(max_examples=100)
+    def test_export_contains_generated_content(self, flow_result):
+        """
+        **Feature: chaplain-feedback-system, Property 18: Export Contains Generated Content**
+        **Validates: Requirements 9.2**
+        For any ClassificationFlowResult, the CSV export should contain
+        all generated content (explanations, questions, referral messages).
+        """
+        # Create a minimal session
+        session = VerificationSession(
+            session_id="test_session",
+            verifier_name="Test Verifier",
+            dataset_id="test_dataset",
+            dataset_name="Test Dataset",
+            total_messages=1,
+            verified_count=1,
+            correct_count=1,
+            incorrect_count=0,
+        )
+        # Add a verification record
+        message_id = "msg_001"
+        verification = VerificationRecord(
+            message_id=message_id,
+            original_message="Test message",
+            classifier_decision=flow_result.classification,
+            classifier_confidence=flow_result.confidence,
+            classifier_indicators=[ind.indicator_text for ind in flow_result.indicators],
+            ground_truth_label=flow_result.classification,
+            is_correct=True,
+        )
+        session.verifications.append(verification)
+        # Generate CSV with flow results
+        flow_results = {message_id: flow_result}
+        csv_content = VerificationCSVExporter.generate_enhanced_csv_content(
+            session,
+            flow_results=flow_results,
+        )
+        # Verify generated content section exists
+        assert "GENERATED CONTENT" in csv_content
+        # Verify message ID is in export
+        assert message_id in csv_content
+        # Verify classification is in export
+        assert flow_result.classification.upper() in csv_content
+class TestExportContainsInteractionLogs:
+    """
+    Tests that CSV export includes interaction logs.
+    """
+    @given(interaction_step_log_strategy())
+    @settings(max_examples=100)
+    def test_export_contains_interaction_logs(self, log):
+        """
+        For any InteractionStepLog, the CSV export should contain
+        all logged interaction steps.
+        """
+        # Create a minimal session
+        session = VerificationSession(
+            session_id=log.session_id,
+            verifier_name="Test Verifier",
+            dataset_id="test_dataset",
+            dataset_name="Test Dataset",
+            total_messages=1,
+            verified_count=1,
+            correct_count=1,
+            incorrect_count=0,
+        )
+        # Add a verification record
+        verification = VerificationRecord(
+            message_id=log.message_id,
+            original_message="Test message",
+            classifier_decision="red",
+            classifier_confidence=0.9,
+            classifier_indicators=["indicator1"],
+            ground_truth_label="red",
+            is_correct=True,
+        )
+        session.verifications.append(verification)
+        # Generate CSV with interaction logs
+        csv_content = VerificationCSVExporter.generate_enhanced_csv_content(
+            session,
+            interaction_logs=[log],
+        )
+        # Verify interaction logs section exists
+        assert "INTERACTION LOGS" in csv_content
+        # Verify step ID is in export
+        assert log.step_id in csv_content
+        # Verify session ID is in export
+        assert log.session_id in csv_content
+        # Verify message ID is in export
+        assert log.message_id in csv_content
+        # Verify step type is in export
+        assert log.step_type in csv_content
+        # Verify approval status is in export (if present)
+        if log.approval_status:
+            assert log.approval_status in csv_content
+class TestExportContainsStatistics:
+    """
+    Tests that CSV export includes error pattern statistics.
+    """
+    @given(tagging_record_strategy())
+    @settings(max_examples=100)
+    def test_export_contains_statistics(self, tagging_record):
+        """
+        For any set of TaggingRecords, the CSV export should contain
+        error pattern statistics with subcategory breakdowns.
+        """
+        # Create a minimal session
+        session = VerificationSession(
+            session_id="test_session",
+            verifier_name="Test Verifier",
+            dataset_id="test_dataset",
+            dataset_name="Test Dataset",
+            total_messages=1,
+            verified_count=1,
+            correct_count=1,
+            incorrect_count=0,
+        )
+        # Add a verification record
+        verification = VerificationRecord(
+            message_id=tagging_record.message_id,
+            original_message="Test message",
+            classifier_decision="red",
+            classifier_confidence=0.9,
+            classifier_indicators=["indicator1"],
+            ground_truth_label="red",
+            is_correct=True,
+        )
+        session.verifications.append(verification)
+        # Generate CSV with tagging records (which triggers statistics)
+        csv_content = VerificationCSVExporter.generate_enhanced_csv_content(
+            session,
+            tagging_records=[tagging_record],
+        )
+        # Verify statistics section exists
+        assert "ERROR PATTERN STATISTICS" in csv_content
+        # Verify classification errors section exists
+        assert "Classification Errors" in csv_content
+        # Verify question issues section exists
+        assert "Question Issues" in csv_content
+        # Verify referral issues section exists
+        assert "Referral Issues" in csv_content
+        # Verify indicator issues section exists
+        assert "Indicator Issues" in csv_content
+        # Verify common patterns section exists
+        assert "Common Patterns" in csv_content

tests/chaplain_feedback/test_properties_data_models.py ADDED Viewed

	@@ -0,0 +1,250 @@

+# test_properties_data_models.py
+"""
+Property-based tests for Chaplain Feedback data model serialization.
+Tests that all data models serialize and deserialize correctly (round-trip).
+"""
+import pytest
+from hypothesis import given, settings
+from datetime import datetime
+from src.core.chaplain_models import (
+    DistressIndicator,
+    FollowUpQuestion,
+    ClassificationFlowResult,
+    TaggingRecord,
+    InteractionStepLog,
+    INDICATOR_DEFINITIONS,
+)
+from tests.chaplain_feedback.conftest import (
+    distress_indicator_strategy,
+    follow_up_question_strategy,
+    classification_flow_result_strategy,
+    tagging_record_strategy,
+    interaction_step_log_strategy,
+    interaction_step_log_with_tagging_strategy,
+)
+class TestDistressIndicatorRoundTrip:
+    """
+    **Feature: chaplain-feedback-system, Property: Data Model Round Trip**
+    Tests that DistressIndicator serializes and deserializes correctly.
+    """
+    @given(distress_indicator_strategy())
+    @settings(max_examples=100)
+    def test_distress_indicator_round_trip(self, indicator):
+        """
+        **Feature: chaplain-feedback-system, Property: Data Model Round Trip**
+        **Validates: Requirements 8.5**
+        For any DistressIndicator, converting to dict and back should
+        preserve all fields exactly.
+        """
+        # Convert to dict and back
+        indicator_dict = indicator.to_dict()
+        restored = DistressIndicator.from_dict(indicator_dict)
+        # Verify all fields match
+        assert restored.indicator_text == indicator.indicator_text
+        assert restored.category == indicator.category
+        assert restored.subcategory == indicator.subcategory
+        assert restored.severity == indicator.severity
+        assert restored.confidence == indicator.confidence
+        assert restored.definition_reference == indicator.definition_reference
+    def test_distress_indicator_from_definition(self):
+        """
+        Test creating DistressIndicator from INDICATOR_DEFINITIONS.
+        """
+        # Test with a known indicator
+        indicator = DistressIndicator.from_definition(
+            indicator_key="excessive_guilt",
+            indicator_text="I feel so guilty about everything",
+            confidence=0.85
+        )
+        assert indicator.category == "Guilt"
+        assert indicator.subcategory == "Excessive guilt"
+        assert indicator.severity == "red"
+        assert indicator.definition_reference == "II.D"
+        assert indicator.confidence == 0.85
+class TestFollowUpQuestionRoundTrip:
+    """
+    **Feature: chaplain-feedback-system, Property: Data Model Round Trip**
+    Tests that FollowUpQuestion serializes and deserializes correctly.
+    """
+    @given(follow_up_question_strategy())
+    @settings(max_examples=100)
+    def test_follow_up_question_round_trip(self, question):
+        """
+        **Feature: chaplain-feedback-system, Property: Data Model Round Trip**
+        **Validates: Requirements 8.5**
+        For any FollowUpQuestion, converting to dict and back should
+        preserve all fields exactly.
+        """
+        # Convert to dict and back
+        question_dict = question.to_dict()
+        restored = FollowUpQuestion.from_dict(question_dict)
+        # Verify all fields match
+        assert restored.question_id == question.question_id
+        assert restored.question_text == question.question_text
+        assert restored.purpose == question.purpose
+class TestClassificationFlowResultRoundTrip:
+    """
+    **Feature: chaplain-feedback-system, Property: Data Model Round Trip**
+    Tests that ClassificationFlowResult serializes and deserializes correctly.
+    """
+    @given(classification_flow_result_strategy())
+    @settings(max_examples=100)
+    def test_classification_flow_result_round_trip(self, result):
+        """
+        **Feature: chaplain-feedback-system, Property: Data Model Round Trip**
+        **Validates: Requirements 8.5**
+        For any ClassificationFlowResult, converting to dict and back should
+        preserve all fields exactly.
+        """
+        # Convert to dict and back
+        result_dict = result.to_dict()
+        restored = ClassificationFlowResult.from_dict(result_dict)
+        # Verify basic fields match
+        assert restored.classification == result.classification
+        assert restored.confidence == result.confidence
+        assert restored.explanation == result.explanation
+        assert restored.permission_check_message == result.permission_check_message
+        assert restored.referral_message == result.referral_message
+        assert restored.consent_status == result.consent_status
+        assert restored.patient_responses == result.patient_responses
+        assert restored.re_evaluation_result == result.re_evaluation_result
+        # Verify nested indicators
+        assert len(restored.indicators) == len(result.indicators)
+        for orig, rest in zip(result.indicators, restored.indicators):
+            assert rest.indicator_text == orig.indicator_text
+            assert rest.category == orig.category
+            assert rest.severity == orig.severity
+        # Verify nested follow-up questions
+        assert len(restored.follow_up_questions) == len(result.follow_up_questions)
+        for orig, rest in zip(result.follow_up_questions, restored.follow_up_questions):
+            assert rest.question_id == orig.question_id
+            assert rest.question_text == orig.question_text
+            assert rest.purpose == orig.purpose
+class TestTaggingRecordRoundTrip:
+    """
+    **Feature: chaplain-feedback-system, Property: Data Model Round Trip**
+    Tests that TaggingRecord serializes and deserializes correctly.
+    """
+    @given(tagging_record_strategy())
+    @settings(max_examples=100)
+    def test_tagging_record_round_trip(self, record):
+        """
+        **Feature: chaplain-feedback-system, Property: Data Model Round Trip**
+        **Validates: Requirements 8.5**
+        For any TaggingRecord, converting to dict and back should
+        preserve all fields exactly.
+        """
+        # Convert to dict and back
+        record_dict = record.to_dict()
+        restored = TaggingRecord.from_dict(record_dict)
+        # Verify all fields match
+        assert restored.record_id == record.record_id
+        assert restored.message_id == record.message_id
+        assert restored.is_classification_correct == record.is_classification_correct
+        assert restored.classification_subcategory == record.classification_subcategory
+        assert restored.correct_classification == record.correct_classification
+        assert restored.question_issues == record.question_issues
+        assert restored.question_comments == record.question_comments
+        assert restored.referral_issues == record.referral_issues
+        assert restored.referral_comments == record.referral_comments
+        assert restored.indicator_issues == record.indicator_issues
+        assert restored.indicator_comments == record.indicator_comments
+        assert restored.general_notes == record.general_notes
+class TestInteractionStepLogRoundTrip:
+    """
+    **Feature: chaplain-feedback-system, Property: Data Model Round Trip**
+    Tests that InteractionStepLog serializes and deserializes correctly.
+    """
+    @given(interaction_step_log_strategy())
+    @settings(max_examples=100)
+    def test_interaction_step_log_round_trip(self, log):
+        """
+        **Feature: chaplain-feedback-system, Property: Data Model Round Trip**
+        **Validates: Requirements 8.5**
+        For any InteractionStepLog, converting to dict and back should
+        preserve all fields exactly.
+        """
+        # Convert to dict and back
+        log_dict = log.to_dict()
+        restored = InteractionStepLog.from_dict(log_dict)
+        # Verify all fields match
+        assert restored.step_id == log.step_id
+        assert restored.session_id == log.session_id
+        assert restored.message_id == log.message_id
+        assert restored.step_type == log.step_type
+        assert restored.input_text == log.input_text
+        assert restored.model_output == log.model_output
+        assert restored.approval_status == log.approval_status
+        assert restored.tagging_data == log.tagging_data
+    @given(interaction_step_log_with_tagging_strategy())
+    @settings(max_examples=100)
+    def test_interaction_step_log_with_tagging_round_trip(self, log):
+        """
+        **Feature: chaplain-feedback-system, Property: Data Model Round Trip**
+        **Validates: Requirements 8.5**
+        For any InteractionStepLog with nested TaggingRecord, converting to dict
+        and back should preserve all fields exactly.
+        """
+        # Convert to dict and back
+        log_dict = log.to_dict()
+        restored = InteractionStepLog.from_dict(log_dict)
+        # Verify basic fields match
+        assert restored.step_id == log.step_id
+        assert restored.session_id == log.session_id
+        assert restored.message_id == log.message_id
+        assert restored.step_type == log.step_type
+        assert restored.input_text == log.input_text
+        assert restored.model_output == log.model_output
+        assert restored.approval_status == log.approval_status
+        # Verify nested tagging data
+        if log.tagging_data is None:
+            assert restored.tagging_data is None
+        else:
+            assert restored.tagging_data is not None
+            assert restored.tagging_data.record_id == log.tagging_data.record_id
+            assert restored.tagging_data.message_id == log.tagging_data.message_id
+            assert restored.tagging_data.is_classification_correct == log.tagging_data.is_classification_correct
+            assert restored.tagging_data.question_issues == log.tagging_data.question_issues
+            assert restored.tagging_data.referral_issues == log.tagging_data.referral_issues

tests/chaplain_feedback/test_properties_error_pattern_analyzer.py ADDED Viewed

	@@ -0,0 +1,194 @@

+"""
+Property-based tests for ErrorPatternAnalyzer.
+Tests universal properties that should hold across all inputs
+for the error pattern analysis functionality.
+"""
+from hypothesis import given, strategies as st
+from src.core.error_pattern_analyzer import ErrorPatternAnalyzer
+from src.core.chaplain_models import (
+    CLASSIFICATION_SUBCATEGORIES,
+    QUESTION_ISSUE_TYPES,
+    REFERRAL_ISSUE_TYPES,
+)
+from .conftest import tagging_record_strategy
+class TestErrorPatternAnalyzerProperties:
+    """Property-based tests for ErrorPatternAnalyzer."""
+    @given(st.lists(tagging_record_strategy(), min_size=1, max_size=20))
+    def test_property_19_statistics_include_subcategory_breakdown(self, records):
+        """
+        **Feature: chaplain-feedback-system, Property 19: Statistics Include Subcategory Breakdown**
+        **Validates: Requirements 4.4, 5.4, 6.4**
+        """
+        analyzer = ErrorPatternAnalyzer()
+        stats = analyzer.get_statistics_summary(records)
+        assert "total_records" in stats
+        assert "classification_errors" in stats
+        assert "question_issues" in stats
+        assert "referral_issues" in stats
+        assert "indicator_issues" in stats
+        assert "common_patterns" in stats
+        assert stats["total_records"] == len(records)
+        classification_errors = stats["classification_errors"]
+        for subcategory in CLASSIFICATION_SUBCATEGORIES:
+            assert subcategory in classification_errors
+            assert isinstance(classification_errors[subcategory], int)
+            assert classification_errors[subcategory] >= 0
+        question_issues = stats["question_issues"]
+        for issue_type in QUESTION_ISSUE_TYPES:
+            assert issue_type in question_issues
+            assert isinstance(question_issues[issue_type], int)
+            assert question_issues[issue_type] >= 0
+        referral_issues = stats["referral_issues"]
+        for issue_type in REFERRAL_ISSUE_TYPES:
+            assert issue_type in referral_issues
+            assert isinstance(referral_issues[issue_type], int)
+            assert referral_issues[issue_type] >= 0
+        indicator_issues = stats["indicator_issues"]
+        assert isinstance(indicator_issues, dict)
+        for indicator_id, count in indicator_issues.items():
+            assert isinstance(indicator_id, str)
+            assert isinstance(count, int)
+            assert count >= 0
+        common_patterns = stats["common_patterns"]
+        assert isinstance(common_patterns, list)
+    @given(st.lists(tagging_record_strategy(), min_size=1, max_size=20))
+    def test_property_20_error_patterns_grouped_by_type(self, records):
+        """
+        **Feature: chaplain-feedback-system, Property 20: Error Patterns Grouped by Type**
+        **Validates: Requirements 10.2, 10.3**
+        """
+        analyzer = ErrorPatternAnalyzer()
+        grouped_patterns = analyzer.get_error_patterns_grouped_by_type(records)
+        assert "classification" in grouped_patterns
+        assert "question" in grouped_patterns
+        assert "referral" in grouped_patterns
+        assert "indicator" in grouped_patterns
+        classification_group = grouped_patterns["classification"]
+        assert isinstance(classification_group, dict)
+        for subcategory in CLASSIFICATION_SUBCATEGORIES:
+            assert subcategory in classification_group
+            assert isinstance(classification_group[subcategory], int)
+            assert classification_group[subcategory] >= 0
+        question_group = grouped_patterns["question"]
+        assert isinstance(question_group, dict)
+        for issue_type in QUESTION_ISSUE_TYPES:
+            assert issue_type in question_group
+            assert isinstance(question_group[issue_type], int)
+            assert question_group[issue_type] >= 0
+        referral_group = grouped_patterns["referral"]
+        assert isinstance(referral_group, dict)
+        for issue_type in REFERRAL_ISSUE_TYPES:
+            assert issue_type in referral_group
+            assert isinstance(referral_group[issue_type], int)
+            assert referral_group[issue_type] >= 0
+        indicator_group = grouped_patterns["indicator"]
+        assert isinstance(indicator_group, dict)
+        for indicator_id, count in indicator_group.items():
+            assert isinstance(indicator_id, str)
+            assert isinstance(count, int)
+            assert count >= 0
+    @given(st.lists(tagging_record_strategy(), min_size=0, max_size=20))
+    def test_classification_error_analysis_counts_correctly(self, records):
+        """Test that classification error analysis counts errors correctly."""
+        analyzer = ErrorPatternAnalyzer()
+        error_counts = analyzer.analyze_classification_errors(records)
+        for subcategory in CLASSIFICATION_SUBCATEGORIES:
+            assert subcategory in error_counts
+            assert isinstance(error_counts[subcategory], int)
+            assert error_counts[subcategory] >= 0
+        expected_counts = {subcategory: 0 for subcategory in CLASSIFICATION_SUBCATEGORIES}
+        for record in records:
+            if not record.is_classification_correct and record.classification_subcategory:
+                if record.classification_subcategory in expected_counts:
+                    expected_counts[record.classification_subcategory] += 1
+        assert error_counts == expected_counts
+    @given(st.lists(tagging_record_strategy(), min_size=0, max_size=20))
+    def test_question_issue_analysis_counts_correctly(self, records):
+        """Test that question issue analysis counts issues correctly."""
+        analyzer = ErrorPatternAnalyzer()
+        issue_counts = analyzer.analyze_question_issues(records)
+        for issue_type in QUESTION_ISSUE_TYPES:
+            assert issue_type in issue_counts
+            assert isinstance(issue_counts[issue_type], int)
+            assert issue_counts[issue_type] >= 0
+        expected_counts = {issue_type: 0 for issue_type in QUESTION_ISSUE_TYPES}
+        for record in records:
+            for issue in record.question_issues:
+                if issue in expected_counts:
+                    expected_counts[issue] += 1
+        assert issue_counts == expected_counts
+    @given(st.lists(tagging_record_strategy(), min_size=0, max_size=20))
+    def test_referral_issue_analysis_counts_correctly(self, records):
+        """Test that referral issue analysis counts issues correctly."""
+        analyzer = ErrorPatternAnalyzer()
+        issue_counts = analyzer.analyze_referral_issues(records)
+        for issue_type in REFERRAL_ISSUE_TYPES:
+            assert issue_type in issue_counts
+            assert isinstance(issue_counts[issue_type], int)
+            assert issue_counts[issue_type] >= 0
+        expected_counts = {issue_type: 0 for issue_type in REFERRAL_ISSUE_TYPES}
+        for record in records:
+            for issue in record.referral_issues:
+                if issue in expected_counts:
+                    expected_counts[issue] += 1
+        assert issue_counts == expected_counts
+    @given(st.lists(tagging_record_strategy(), min_size=0, max_size=20))
+    def test_indicator_issue_analysis_counts_correctly(self, records):
+        """Test that indicator issue analysis counts indicators correctly."""
+        analyzer = ErrorPatternAnalyzer()
+        indicator_counts = analyzer.analyze_indicator_issues(records)
+        assert isinstance(indicator_counts, dict)
+        expected_counts = {}
+        for record in records:
+            for indicator_id in record.indicator_issues:
+                if indicator_id not in expected_counts:
+                    expected_counts[indicator_id] = 0
+                expected_counts[indicator_id] += 1
+        assert indicator_counts == expected_counts
+    @given(st.lists(tagging_record_strategy(), min_size=0, max_size=20))
+    def test_common_patterns_returns_list(self, records):
+        """Test that common patterns analysis returns a list of strings."""
+        analyzer = ErrorPatternAnalyzer()
+        patterns = analyzer.get_common_patterns(records)
+        assert isinstance(patterns, list)
+        for pattern in patterns:
+            assert isinstance(pattern, str)
+            assert len(pattern) > 0

tests/chaplain_feedback/test_properties_interaction_logging.py ADDED Viewed

	@@ -0,0 +1,705 @@

+# test_properties_interaction_logging.py
+"""
+Property-based tests for Chaplain Feedback interaction logging.
+Tests that interaction logging correctly records all steps with input/output
+and supports approval status updates.
+"""
+import pytest
+from hypothesis import given, settings
+from datetime import datetime
+from src.core.interaction_logger import InteractionLogger
+from src.core.chaplain_models import (
+    InteractionStepLog,
+    TaggingRecord,
+    INTERACTION_STEP_TYPES,
+)
+from tests.chaplain_feedback.conftest import (
+    valid_id_strategy,
+    tagging_record_strategy,
+)
+class TestInteractionLoggingCompleteness:
+    """
+    **Feature: chaplain-feedback-system, Property 14: Interaction Step Logging Complete**
+    Tests that interaction logging records all required fields for each step.
+    """
+    def test_interaction_step_logging_complete_all_types(self):
+        """
+        **Feature: chaplain-feedback-system, Property 14: Interaction Step Logging Complete**
+        **Validates: Requirements 7.1, 7.2**
+        For any interaction step, the log should contain: input text, model output, and timestamp.
+        """
+        logger = InteractionLogger()
+        # Test all step types
+        for step_type in INTERACTION_STEP_TYPES:
+            session_id = f"session_{step_type}"
+            message_id = f"msg_{step_type}"
+            input_text = f"input for {step_type}"
+            model_output = f"output for {step_type}"
+            # Log a step
+            step_id = logger.log_step(
+                session_id=session_id,
+                message_id=message_id,
+                step_type=step_type,
+                input_text=input_text,
+                model_output=model_output,
+            )
+            # Retrieve the logged step
+            logged_step = logger.get_step(step_id)
+            # Verify all required fields are present and correct
+            assert logged_step is not None
+            assert logged_step.step_id == step_id
+            assert logged_step.session_id == session_id
+            assert logged_step.message_id == message_id
+            assert logged_step.step_type == step_type
+            assert logged_step.input_text == input_text
+            assert logged_step.model_output == model_output
+            assert logged_step.timestamp is not None
+            assert isinstance(logged_step.timestamp, datetime)
+            assert logged_step.approval_status is None  # Initially no approval
+            assert logged_step.tagging_data is None  # Initially no tagging
+    def test_interaction_step_logging_multiple_steps(self):
+        """
+        Test that multiple steps are logged correctly for a session.
+        """
+        logger = InteractionLogger()
+        session_id = "test_session_1"
+        message_id = "test_message_1"
+        # Log multiple steps
+        step_ids = []
+        for i in range(3):
+            step_id = logger.log_step(
+                session_id=session_id,
+                message_id=message_id,
+                step_type="classification",
+                input_text=f"input {i}",
+                model_output=f"output {i}",
+            )
+            step_ids.append(step_id)
+        # Retrieve all session logs
+        session_logs = logger.get_session_logs(session_id)
+        # Verify all steps are logged
+        assert len(session_logs) == 3
+        for i, log in enumerate(session_logs):
+            assert log.input_text == f"input {i}"
+            assert log.model_output == f"output {i}"
+    def test_interaction_step_logging_preserves_order(self):
+        """
+        Test that logged steps are retrieved in the order they were logged.
+        """
+        logger = InteractionLogger()
+        session_id = "test_session_order"
+        # Log steps in order
+        step_ids = []
+        for i in range(5):
+            step_id = logger.log_step(
+                session_id=session_id,
+                message_id=f"msg_{i}",
+                step_type="classification",
+                input_text=f"input_{i}",
+                model_output=f"output_{i}",
+            )
+            step_ids.append(step_id)
+        # Retrieve logs
+        session_logs = logger.get_session_logs(session_id)
+        # Verify order is preserved
+        assert len(session_logs) == 5
+        for i, log in enumerate(session_logs):
+            assert log.message_id == f"msg_{i}"
+            assert log.input_text == f"input_{i}"
+    def test_interaction_step_logging_by_type(self):
+        """
+        Test filtering logs by step type.
+        """
+        logger = InteractionLogger()
+        session_id = "test_session_types"
+        # Log different types of steps
+        logger.log_step(session_id, "msg1", "classification", "input1", "output1")
+        logger.log_step(session_id, "msg2", "explanation", "input2", "output2")
+        logger.log_step(session_id, "msg3", "classification", "input3", "output3")
+        logger.log_step(session_id, "msg4", "referral", "input4", "output4")
+        # Filter by type
+        classification_logs = logger.get_session_logs_by_type(session_id, "classification")
+        explanation_logs = logger.get_session_logs_by_type(session_id, "explanation")
+        referral_logs = logger.get_session_logs_by_type(session_id, "referral")
+        # Verify filtering
+        assert len(classification_logs) == 2
+        assert len(explanation_logs) == 1
+        assert len(referral_logs) == 1
+    def test_interaction_step_logging_message_logs(self):
+        """
+        Test retrieving logs for a specific message across sessions.
+        """
+        logger = InteractionLogger()
+        message_id = "shared_message"
+        # Log same message in different sessions
+        logger.log_step("session1", message_id, "classification", "input1", "output1")
+        logger.log_step("session2", message_id, "explanation", "input2", "output2")
+        logger.log_step("session1", "other_msg", "referral", "input3", "output3")
+        # Get logs for the message
+        message_logs = logger.get_message_logs(message_id)
+        # Verify we get logs from both sessions
+        assert len(message_logs) == 2
+        assert all(log.message_id == message_id for log in message_logs)
+    def test_interaction_step_logging_empty_strings(self):
+        """
+        Test that empty input/output strings are logged correctly.
+        """
+        logger = InteractionLogger()
+        step_id = logger.log_step(
+            session_id="test_session",
+            message_id="test_msg",
+            step_type="classification",
+            input_text="",
+            model_output="",
+        )
+        logged_step = logger.get_step(step_id)
+        assert logged_step.input_text == ""
+        assert logged_step.model_output == ""
+    def test_interaction_step_logging_long_text(self):
+        """
+        Test that long input/output text is logged correctly.
+        """
+        logger = InteractionLogger()
+        long_text = "x" * 10000
+        step_id = logger.log_step(
+            session_id="test_session",
+            message_id="test_msg",
+            step_type="classification",
+            input_text=long_text,
+            model_output=long_text,
+        )
+        logged_step = logger.get_step(step_id)
+        assert logged_step.input_text == long_text
+        assert logged_step.model_output == long_text
+        assert len(logged_step.input_text) == 10000
+    def test_interaction_step_logging_special_characters(self):
+        """
+        Test that special characters in input/output are preserved.
+        """
+        logger = InteractionLogger()
+        special_text = "Test with special chars: !@#$%^&*()_+-=[]{}|;:',.<>?/~`"
+        step_id = logger.log_step(
+            session_id="test_session",
+            message_id="test_msg",
+            step_type="classification",
+            input_text=special_text,
+            model_output=special_text,
+        )
+        logged_step = logger.get_step(step_id)
+        assert logged_step.input_text == special_text
+        assert logged_step.model_output == special_text
+    def test_interaction_step_logging_unicode(self):
+        """
+        Test that Unicode characters in input/output are preserved.
+        """
+        logger = InteractionLogger()
+        unicode_text = "Test with Unicode: 你好世界 🌍 Привет мир"
+        step_id = logger.log_step(
+            session_id="test_session",
+            message_id="test_msg",
+            step_type="classification",
+            input_text=unicode_text,
+            model_output=unicode_text,
+        )
+        logged_step = logger.get_step(step_id)
+        assert logged_step.input_text == unicode_text
+        assert logged_step.model_output == unicode_text
+    def test_interaction_step_logging_statistics(self):
+        """
+        Test that session statistics are calculated correctly.
+        """
+        logger = InteractionLogger()
+        session_id = "test_session_stats"
+        # Log some steps
+        logger.log_step(session_id, "msg1", "classification", "input1", "output1")
+        logger.log_step(session_id, "msg2", "explanation", "input2", "output2")
+        logger.log_step(session_id, "msg3", "referral", "input3", "output3")
+        # Get statistics
+        stats = logger.get_session_statistics(session_id)
+        # Verify statistics
+        assert stats["session_id"] == session_id
+        assert stats["total_steps"] == 3
+        assert stats["approved_steps"] == 0
+        assert stats["disapproved_steps"] == 0
+        assert stats["unapproved_steps"] == 3
+        assert stats["steps_by_type"]["classification"] == 1
+        assert stats["steps_by_type"]["explanation"] == 1
+        assert stats["steps_by_type"]["referral"] == 1
+    def test_interaction_step_logging_invalid_step_type(self):
+        """
+        Test that invalid step types raise an error.
+        """
+        logger = InteractionLogger()
+        with pytest.raises(ValueError):
+            logger.log_step(
+                session_id="test_session",
+                message_id="test_msg",
+                step_type="invalid_type",
+                input_text="input",
+                model_output="output",
+            )
+    def test_interaction_step_logging_nonexistent_step(self):
+        """
+        Test that retrieving a nonexistent step returns None.
+        """
+        logger = InteractionLogger()
+        result = logger.get_step("nonexistent_step_id")
+        assert result is None
+    def test_interaction_step_logging_empty_session(self):
+        """
+        Test that retrieving logs for an empty session returns empty list.
+        """
+        logger = InteractionLogger()
+        session_logs = logger.get_session_logs("nonexistent_session")
+        assert session_logs == []
+    def test_interaction_step_logging_export(self):
+        """
+        Test that session logs can be exported as dictionaries.
+        """
+        logger = InteractionLogger()
+        session_id = "test_session_export"
+        # Log some steps
+        logger.log_step(session_id, "msg1", "classification", "input1", "output1")
+        logger.log_step(session_id, "msg2", "explanation", "input2", "output2")
+        # Export logs
+        exported = logger.export_session_logs(session_id)
+        # Verify export
+        assert len(exported) == 2
+        assert all(isinstance(log, dict) for log in exported)
+        assert all("step_id" in log for log in exported)
+        assert all("input_text" in log for log in exported)
+        assert all("model_output" in log for log in exported)
+        assert all("timestamp" in log for log in exported)
+class TestFeedbackLogging:
+    """
+    **Feature: chaplain-feedback-system, Property 15: Feedback Logging Complete**
+    Tests that feedback logging correctly records approval/disapproval status
+    with tagging categories and comments.
+    """
+    def test_feedback_logging_approved_status(self):
+        """
+        **Feature: chaplain-feedback-system, Property 15: Feedback Logging Complete**
+        **Validates: Requirements 7.3, 7.4**
+        For any feedback, the log should record approval status.
+        """
+        logger = InteractionLogger()
+        session_id = "test_session_feedback"
+        # Log a step
+        step_id = logger.log_step(
+            session_id=session_id,
+            message_id="msg1",
+            step_type="classification",
+            input_text="input",
+            model_output="output",
+        )
+        # Update with approved status
+        logger.update_approval(step_id, "approved")
+        # Retrieve and verify
+        logged_step = logger.get_step(step_id)
+        assert logged_step.approval_status == "approved"
+        assert logged_step.tagging_data is None
+    def test_feedback_logging_disapproved_status(self):
+        """
+        Test that disapproved status is recorded correctly.
+        """
+        logger = InteractionLogger()
+        session_id = "test_session_feedback"
+        # Log a step
+        step_id = logger.log_step(
+            session_id=session_id,
+            message_id="msg1",
+            step_type="classification",
+            input_text="input",
+            model_output="output",
+        )
+        # Update with disapproved status
+        logger.update_approval(step_id, "disapproved")
+        # Retrieve and verify
+        logged_step = logger.get_step(step_id)
+        assert logged_step.approval_status == "disapproved"
+    @given(tagging_record_strategy())
+    @settings(max_examples=100)
+    def test_feedback_logging_with_tagging_data(self, tagging_record):
+        """
+        **Feature: chaplain-feedback-system, Property 15: Feedback Logging Complete**
+        **Validates: Requirements 7.3, 7.4**
+        For any chaplain feedback, the log should contain: approval/disapproval status,
+        and if disapproved, the tagging categories and comments.
+        """
+        logger = InteractionLogger()
+        session_id = "test_session_tagging"
+        # Log a step
+        step_id = logger.log_step(
+            session_id=session_id,
+            message_id=tagging_record.message_id,
+            step_type="classification",
+            input_text="input",
+            model_output="output",
+        )
+        # Update with disapproved status and tagging data
+        logger.update_approval(step_id, "disapproved", tagging_record)
+        # Retrieve and verify
+        logged_step = logger.get_step(step_id)
+        assert logged_step.approval_status == "disapproved"
+        assert logged_step.tagging_data is not None
+        assert logged_step.tagging_data.record_id == tagging_record.record_id
+        assert logged_step.tagging_data.message_id == tagging_record.message_id
+        assert logged_step.tagging_data.is_classification_correct == tagging_record.is_classification_correct
+        assert logged_step.tagging_data.question_issues == tagging_record.question_issues
+        assert logged_step.tagging_data.referral_issues == tagging_record.referral_issues
+    def test_feedback_logging_classification_subcategory(self):
+        """
+        Test that classification subcategory is recorded in tagging data.
+        """
+        logger = InteractionLogger()
+        session_id = "test_session_classification"
+        # Create tagging record with classification subcategory
+        tagging = TaggingRecord(
+            record_id="tag1",
+            message_id="msg1",
+            is_classification_correct=False,
+            classification_subcategory="missed_indicators",
+            correct_classification="red",
+        )
+        # Log a step
+        step_id = logger.log_step(
+            session_id=session_id,
+            message_id="msg1",
+            step_type="classification",
+            input_text="input",
+            model_output="output",
+        )
+        # Update with tagging
+        logger.update_approval(step_id, "disapproved", tagging)
+        # Retrieve and verify
+        logged_step = logger.get_step(step_id)
+        assert logged_step.tagging_data.classification_subcategory == "missed_indicators"
+        assert logged_step.tagging_data.correct_classification == "red"
+    def test_feedback_logging_question_issues(self):
+        """
+        Test that question issues are recorded in tagging data.
+        """
+        logger = InteractionLogger()
+        session_id = "test_session_questions"
+        # Create tagging record with question issues
+        tagging = TaggingRecord(
+            record_id="tag1",
+            message_id="msg1",
+            is_classification_correct=True,
+            question_issues=["inappropriate", "too_leading"],
+            question_comments="Questions were too intrusive",
+        )
+        # Log a step
+        step_id = logger.log_step(
+            session_id=session_id,
+            message_id="msg1",
+            step_type="follow_up",
+            input_text="input",
+            model_output="output",
+        )
+        # Update with tagging
+        logger.update_approval(step_id, "disapproved", tagging)
+        # Retrieve and verify
+        logged_step = logger.get_step(step_id)
+        assert logged_step.tagging_data.question_issues == ["inappropriate", "too_leading"]
+        assert logged_step.tagging_data.question_comments == "Questions were too intrusive"
+    def test_feedback_logging_referral_issues(self):
+        """
+        Test that referral issues are recorded in tagging data.
+        """
+        logger = InteractionLogger()
+        session_id = "test_session_referral"
+        # Create tagging record with referral issues
+        tagging = TaggingRecord(
+            record_id="tag1",
+            message_id="msg1",
+            is_classification_correct=True,
+            referral_issues=["incomplete_summary", "inappropriate_tone"],
+            referral_comments="Message was incomplete",
+        )
+        # Log a step
+        step_id = logger.log_step(
+            session_id=session_id,
+            message_id="msg1",
+            step_type="referral",
+            input_text="input",
+            model_output="output",
+        )
+        # Update with tagging
+        logger.update_approval(step_id, "disapproved", tagging)
+        # Retrieve and verify
+        logged_step = logger.get_step(step_id)
+        assert logged_step.tagging_data.referral_issues == ["incomplete_summary", "inappropriate_tone"]
+        assert logged_step.tagging_data.referral_comments == "Message was incomplete"
+    def test_feedback_logging_indicator_issues(self):
+        """
+        Test that indicator issues are recorded in tagging data.
+        """
+        logger = InteractionLogger()
+        session_id = "test_session_indicators"
+        # Create tagging record with indicator issues
+        tagging = TaggingRecord(
+            record_id="tag1",
+            message_id="msg1",
+            is_classification_correct=True,
+            indicator_issues=["indicator_1", "indicator_2"],
+            indicator_comments="These indicators were incorrectly identified",
+        )
+        # Log a step
+        step_id = logger.log_step(
+            session_id=session_id,
+            message_id="msg1",
+            step_type="classification",
+            input_text="input",
+            model_output="output",
+        )
+        # Update with tagging
+        logger.update_approval(step_id, "disapproved", tagging)
+        # Retrieve and verify
+        logged_step = logger.get_step(step_id)
+        assert logged_step.tagging_data.indicator_issues == ["indicator_1", "indicator_2"]
+        assert logged_step.tagging_data.indicator_comments == "These indicators were incorrectly identified"
+    def test_feedback_logging_general_notes(self):
+        """
+        Test that general notes are recorded in tagging data.
+        """
+        logger = InteractionLogger()
+        session_id = "test_session_notes"
+        # Create tagging record with general notes
+        tagging = TaggingRecord(
+            record_id="tag1",
+            message_id="msg1",
+            is_classification_correct=True,
+            general_notes="Overall good classification but needs improvement in tone",
+        )
+        # Log a step
+        step_id = logger.log_step(
+            session_id=session_id,
+            message_id="msg1",
+            step_type="classification",
+            input_text="input",
+            model_output="output",
+        )
+        # Update with tagging
+        logger.update_approval(step_id, "approved", tagging)
+        # Retrieve and verify
+        logged_step = logger.get_step(step_id)
+        assert logged_step.tagging_data.general_notes == "Overall good classification but needs improvement in tone"
+    def test_feedback_logging_disapproved_steps_retrieval(self):
+        """
+        Test that disapproved steps can be retrieved from a session.
+        """
+        logger = InteractionLogger()
+        session_id = "test_session_disapproved"
+        # Log multiple steps
+        step_id_1 = logger.log_step(session_id, "msg1", "classification", "input1", "output1")
+        step_id_2 = logger.log_step(session_id, "msg2", "explanation", "input2", "output2")
+        step_id_3 = logger.log_step(session_id, "msg3", "referral", "input3", "output3")
+        # Approve first, disapprove second and third
+        logger.update_approval(step_id_1, "approved")
+        logger.update_approval(step_id_2, "disapproved")
+        logger.update_approval(step_id_3, "disapproved")
+        # Get disapproved steps
+        disapproved = logger.get_disapproved_steps(session_id)
+        # Verify
+        assert len(disapproved) == 2
+        assert all(log.approval_status == "disapproved" for log in disapproved)
+    def test_feedback_logging_unapproved_steps_retrieval(self):
+        """
+        Test that unapproved steps can be retrieved from a session.
+        """
+        logger = InteractionLogger()
+        session_id = "test_session_unapproved"
+        # Log multiple steps
+        step_id_1 = logger.log_step(session_id, "msg1", "classification", "input1", "output1")
+        step_id_2 = logger.log_step(session_id, "msg2", "explanation", "input2", "output2")
+        step_id_3 = logger.log_step(session_id, "msg3", "referral", "input3", "output3")
+        # Approve first, leave others unapproved
+        logger.update_approval(step_id_1, "approved")
+        # Get unapproved steps
+        unapproved = logger.get_unapproved_steps(session_id)
+        # Verify
+        assert len(unapproved) == 2
+        assert all(log.approval_status is None for log in unapproved)
+    def test_feedback_logging_invalid_approval_status(self):
+        """
+        Test that invalid approval status raises an error.
+        """
+        logger = InteractionLogger()
+        session_id = "test_session_invalid"
+        # Log a step
+        step_id = logger.log_step(
+            session_id=session_id,
+            message_id="msg1",
+            step_type="classification",
+            input_text="input",
+            model_output="output",
+        )
+        # Try to update with invalid status
+        with pytest.raises(ValueError):
+            logger.update_approval(step_id, "invalid_status")
+    def test_feedback_logging_nonexistent_step(self):
+        """
+        Test that updating a nonexistent step raises an error.
+        """
+        logger = InteractionLogger()
+        with pytest.raises(ValueError):
+            logger.update_approval("nonexistent_step", "approved")
+    def test_feedback_logging_export_with_tagging(self):
+        """
+        Test that exported logs include tagging data.
+        """
+        logger = InteractionLogger()
+        session_id = "test_session_export_tagging"
+        # Create tagging record
+        tagging = TaggingRecord(
+            record_id="tag1",
+            message_id="msg1",
+            is_classification_correct=False,
+            classification_subcategory="missed_indicators",
+            correct_classification="red",
+            general_notes="Missed key indicators",
+        )
+        # Log a step
+        step_id = logger.log_step(
+            session_id=session_id,
+            message_id="msg1",
+            step_type="classification",
+            input_text="input",
+            model_output="output",
+        )
+        # Update with tagging
+        logger.update_approval(step_id, "disapproved", tagging)
+        # Export logs
+        exported = logger.export_session_logs(session_id)
+        # Verify export includes tagging data
+        assert len(exported) == 1
+        assert exported[0]["approval_status"] == "disapproved"
+        assert exported[0]["tagging_data"] is not None
+        assert exported[0]["tagging_data"]["classification_subcategory"] == "missed_indicators"
+        assert exported[0]["tagging_data"]["correct_classification"] == "red"
+        assert exported[0]["tagging_data"]["general_notes"] == "Missed key indicators"

tests/chaplain_feedback/test_properties_tagging_service.py ADDED Viewed

	@@ -0,0 +1,223 @@

+# test_properties_tagging_service.py
+"""
+Property-based tests for TaggingService.
+Tests universal properties that should hold across all inputs
+for the tagging system functionality.
+"""
+import pytest
+from hypothesis import given, strategies as st
+from src.core.tagging_service import TaggingService
+from src.core.chaplain_models import (
+    CLASSIFICATION_SUBCATEGORIES,
+    QUESTION_ISSUE_TYPES,
+    REFERRAL_ISSUE_TYPES,
+)
+from .conftest import valid_id_strategy
+class TestTaggingServiceProperties:
+    """Property-based tests for TaggingService."""
+    @given(
+        message_id=valid_id_strategy(),
+        general_notes=st.text(max_size=200)
+    )
+    def test_property_10_wrong_classification_subcategories_available(
+        self, message_id: str, general_notes: str
+    ):
+        """
+        **Feature: chaplain-feedback-system, Property 10: Wrong Classification Subcategories Available**
+        **Validates: Requirements 4.1**
+        For any incorrect classification feedback, the system should provide
+        all three subcategory options: "missed_indicators", "false_positive", "missed_distress".
+        """
+        service = TaggingService()
+        # Get available subcategories
+        available_subcategories = service.get_available_classification_subcategories()
+        # Should contain all three required subcategories
+        expected_subcategories = {"missed_indicators", "false_positive", "missed_distress"}
+        assert set(available_subcategories) == expected_subcategories
+        # Should be able to create records with each subcategory
+        for subcategory in available_subcategories:
+            record = service.create_classification_correction(
+                message_id=f"{message_id}_{subcategory}",
+                subcategory=subcategory,
+                correct_classification="red",
+                general_notes=general_notes
+            )
+            assert record.classification_subcategory == subcategory
+            assert record.is_classification_correct is False
+            assert record.correct_classification == "red"
+    @given(
+        message_id=valid_id_strategy(),
+        subcategory=st.sampled_from(CLASSIFICATION_SUBCATEGORIES),
+        correct_classification=st.sampled_from(["red", "yellow", "green"]),
+        general_notes=st.text(max_size=200)
+    )
+    def test_property_11_wrong_classification_saves_subcategory(
+        self,
+        message_id: str,
+        subcategory: str,
+        correct_classification: str,
+        general_notes: str
+    ):
+        """
+        **Feature: chaplain-feedback-system, Property 11: Wrong Classification Saves Subcategory**
+        **Validates: Requirements 4.3**
+        For any wrong classification tag submission, the saved record should contain
+        both the subcategory and the correct classification.
+        """
+        service = TaggingService()
+        # Create classification correction
+        record = service.create_classification_correction(
+            message_id=message_id,
+            subcategory=subcategory,
+            correct_classification=correct_classification,
+            general_notes=general_notes
+        )
+        # Record should be saved and retrievable
+        retrieved_record = service.get_tagging_record(record.record_id)
+        assert retrieved_record is not None
+        # Should contain both subcategory and correct classification
+        assert retrieved_record.classification_subcategory == subcategory
+        assert retrieved_record.correct_classification == correct_classification
+        assert retrieved_record.is_classification_correct is False
+        # Should also be retrievable by message ID
+        message_records = service.get_records_for_message(message_id)
+        assert len(message_records) == 1
+        assert message_records[0].classification_subcategory == subcategory
+        assert message_records[0].correct_classification == correct_classification
+    @given(
+        message_id=valid_id_strategy(),
+        question_issues=st.lists(
+            st.sampled_from(QUESTION_ISSUE_TYPES),
+            min_size=1,
+            max_size=len(QUESTION_ISSUE_TYPES),
+            unique=True
+        ),
+        question_comments=st.one_of(st.none(), st.text(max_size=200))
+    )
+    def test_property_12_question_issues_multi_select(
+        self,
+        message_id: str,
+        question_issues: list,
+        question_comments: str
+    ):
+        """
+        **Feature: chaplain-feedback-system, Property 12: Question Issues Multi-Select**
+        **Validates: Requirements 5.2**
+        For any follow-up question issue tagging, the system should allow
+        selecting multiple subcategories and save all selected values.
+        """
+        service = TaggingService()
+        # Create record with multiple question issues
+        record = service.create_tagging_record(
+            message_id=message_id,
+            question_issues=question_issues,
+            question_comments=question_comments
+        )
+        # Should save all selected question issues
+        assert set(record.question_issues) == set(question_issues)
+        assert record.question_comments == question_comments
+        # Should be retrievable with all issues intact
+        retrieved_record = service.get_tagging_record(record.record_id)
+        assert retrieved_record is not None
+        assert set(retrieved_record.question_issues) == set(question_issues)
+        assert retrieved_record.question_comments == question_comments
+    @given(
+        message_id=valid_id_strategy(),
+        referral_issues=st.lists(
+            st.sampled_from(REFERRAL_ISSUE_TYPES),
+            min_size=1,
+            max_size=len(REFERRAL_ISSUE_TYPES),
+            unique=True
+        ),
+        referral_comments=st.one_of(st.none(), st.text(max_size=200))
+    )
+    def test_property_13_referral_issues_multi_select(
+        self,
+        message_id: str,
+        referral_issues: list,
+        referral_comments: str
+    ):
+        """
+        **Feature: chaplain-feedback-system, Property 13: Referral Issues Multi-Select**
+        **Validates: Requirements 6.2**
+        For any referral message issue tagging, the system should allow
+        selecting multiple subcategories and save all selected values.
+        """
+        service = TaggingService()
+        # Create record with multiple referral issues
+        record = service.create_tagging_record(
+            message_id=message_id,
+            referral_issues=referral_issues,
+            referral_comments=referral_comments
+        )
+        # Should save all selected referral issues
+        assert set(record.referral_issues) == set(referral_issues)
+        assert record.referral_comments == referral_comments
+        # Should be retrievable with all issues intact
+        retrieved_record = service.get_tagging_record(record.record_id)
+        assert retrieved_record is not None
+        assert set(retrieved_record.referral_issues) == set(referral_issues)
+        assert retrieved_record.referral_comments == referral_comments
+    @given(
+        message_id=valid_id_strategy(),
+        indicator_issues=st.lists(st.text(min_size=1, max_size=50), min_size=1, max_size=5),
+        indicator_comments=st.one_of(st.none(), st.text(max_size=200))
+    )
+    def test_indicator_issue_tagging_functionality(
+        self,
+        message_id: str,
+        indicator_issues: list,
+        indicator_comments: str
+    ):
+        """
+        Test that indicator issue tagging works correctly.
+        This tests the indicator issue tagging functionality to ensure
+        incorrectly identified indicators can be marked with comments.
+        """
+        service = TaggingService()
+        # Create record with indicator issues
+        record = service.create_indicator_issue_tagging(
+            message_id=message_id,
+            indicator_issues=indicator_issues,
+            indicator_comments=indicator_comments
+        )
+        # Should save all indicator issues
+        assert record.indicator_issues == indicator_issues
+        assert record.indicator_comments == indicator_comments
+        # Should be retrievable with all issues intact
+        retrieved_record = service.get_tagging_record(record.record_id)
+        assert retrieved_record is not None
+        assert retrieved_record.indicator_issues == indicator_issues
+        assert retrieved_record.indicator_comments == indicator_comments