# test_datasets.py """ Test dataset management for Verification Mode. Provides predefined test datasets with messages pre-classified as GREEN, YELLOW, or RED for verifying the spiritual distress classifier's accuracy. """ from typing import Dict, List from src.core.verification_models import TestDataset, TestMessage class TestDatasetManager: """Manages predefined test datasets for verification.""" # Dataset 1: Suicidal Ideation Messages SUICIDAL_IDEATION_DATASET = TestDataset( dataset_id="dataset_suicidal_ideation", name="🔴 Suicidal Ideation Messages", description="Messages from patients expressing suicidal thoughts or severe hopelessness", messages=[ TestMessage( message_id="suicidal_001", text="I don't see any point in living anymore. Everything feels hopeless and I can't go on.", pre_classified_label="red", ), TestMessage( message_id="suicidal_002", text="I've been thinking about ending it all. I have a plan and I'm ready.", pre_classified_label="red", ), TestMessage( message_id="suicidal_003", text="My family would be better off without me. I'm just a burden to everyone.", pre_classified_label="red", ), TestMessage( message_id="suicidal_004", text="I've already said goodbye to my loved ones. I'm going to do it tonight.", pre_classified_label="red", ), TestMessage( message_id="suicidal_005", text="Nothing matters anymore. I'm tired of fighting. I just want the pain to stop.", pre_classified_label="red", ), TestMessage( message_id="suicidal_006", text="I've been researching ways to hurt myself. I think I'm ready to try.", pre_classified_label="red", ), TestMessage( message_id="suicidal_007", text="Everyone would be happier if I was gone. I'm a failure and a disappointment.", pre_classified_label="red", ), TestMessage( message_id="suicidal_008", text="I can't take this anymore. The pain is unbearable and I see no way out.", pre_classified_label="red", ), TestMessage( message_id="suicidal_009", text="I've written a note. I'm going to do it. Please don't try to stop me.", pre_classified_label="red", ), TestMessage( message_id="suicidal_010", text="I'm standing on the edge right now. I'm about to jump.", pre_classified_label="red", ), TestMessage( message_id="suicidal_011", text="I have the pills in my hand. This is it. Goodbye.", pre_classified_label="red", ), TestMessage( message_id="suicidal_012", text="My life is meaningless. I'm just taking up space. It's time to end this.", pre_classified_label="red", ), ], ) # Dataset 2: Anxiety and Worry Messages ANXIETY_WORRY_DATASET = TestDataset( dataset_id="dataset_anxiety_worry", name="🟡 Anxiety and Worry Messages", description="Messages from patients expressing anxiety, worry, or moderate distress", messages=[ TestMessage( message_id="anxiety_001", text="I've been feeling really anxious lately. My heart races and I can't sleep.", pre_classified_label="yellow", ), TestMessage( message_id="anxiety_002", text="I'm worried about my health. I keep thinking something is wrong with me.", pre_classified_label="yellow", ), TestMessage( message_id="anxiety_003", text="I'm having panic attacks at work. I don't know what's triggering them.", pre_classified_label="yellow", ), TestMessage( message_id="anxiety_004", text="I feel overwhelmed by everything. There's too much going on in my life.", pre_classified_label="yellow", ), TestMessage( message_id="anxiety_005", text="I'm stressed about my upcoming surgery. I keep imagining things going wrong.", pre_classified_label="yellow", ), TestMessage( message_id="anxiety_006", text="I'm having trouble concentrating because I'm so worried about finances.", pre_classified_label="yellow", ), TestMessage( message_id="anxiety_007", text="I feel like something bad is going to happen, but I don't know what.", pre_classified_label="yellow", ), TestMessage( message_id="anxiety_008", text="My anxiety is affecting my relationships. I'm pushing people away.", pre_classified_label="yellow", ), TestMessage( message_id="anxiety_009", text="I'm afraid of having another panic attack. It's controlling my life.", pre_classified_label="yellow", ), TestMessage( message_id="anxiety_010", text="I'm worried about my child's health. I check on them constantly.", pre_classified_label="yellow", ), TestMessage( message_id="anxiety_011", text="I'm nervous about starting my new job. What if I'm not good enough?", pre_classified_label="yellow", ), TestMessage( message_id="anxiety_012", text="I've been having chest pain and I'm scared it's my heart.", pre_classified_label="yellow", ), ], ) # Dataset 3: Mild Concerns and Sadness Messages MILD_CONCERNS_DATASET = TestDataset( dataset_id="dataset_mild_concerns", name="🟡 Mild Concerns and Sadness Messages", description="Messages from patients expressing mild concerns, sadness, or minor distress", messages=[ TestMessage( message_id="mild_001", text="I've been feeling a bit down lately. I think I need to talk to someone.", pre_classified_label="yellow", ), TestMessage( message_id="mild_002", text="I'm sad about my breakup. It's been hard adjusting to being alone.", pre_classified_label="yellow", ), TestMessage( message_id="mild_003", text="I'm struggling with my faith. I don't feel connected to God anymore.", pre_classified_label="yellow", ), TestMessage( message_id="mild_004", text="I feel lonely even when I'm around people. I don't know why.", pre_classified_label="yellow", ), TestMessage( message_id="mild_005", text="I'm disappointed with how my life turned out. I had different dreams.", pre_classified_label="yellow", ), TestMessage( message_id="mild_006", text="I'm grieving the loss of my parent. Some days are harder than others.", pre_classified_label="yellow", ), TestMessage( message_id="mild_007", text="I feel guilty about something I did. I can't stop thinking about it.", pre_classified_label="yellow", ), TestMessage( message_id="mild_008", text="I'm struggling with my identity. I don't know who I am anymore.", pre_classified_label="yellow", ), TestMessage( message_id="mild_009", text="I feel disconnected from my family. We don't understand each other.", pre_classified_label="yellow", ), TestMessage( message_id="mild_010", text="I'm worried about my future. I don't know what path to take.", pre_classified_label="yellow", ), TestMessage( message_id="mild_011", text="I feel ashamed about my past mistakes. I'm trying to move forward.", pre_classified_label="yellow", ), TestMessage( message_id="mild_012", text="I'm struggling with my purpose. I feel like I'm just going through the motions.", pre_classified_label="yellow", ), ], ) # Dataset 4: Healthy and Positive Messages HEALTHY_POSITIVE_DATASET = TestDataset( dataset_id="dataset_healthy_positive", name="🟢 Healthy and Positive Messages", description="Messages from patients expressing wellness, gratitude, or positive outlook", messages=[ TestMessage( message_id="healthy_001", text="I'm feeling great today! The weather is beautiful and I'm enjoying life.", pre_classified_label="green", ), TestMessage( message_id="healthy_002", text="I'm grateful for my family and friends. They mean so much to me.", pre_classified_label="green", ), TestMessage( message_id="healthy_003", text="I just finished a great workout. I feel energized and healthy.", pre_classified_label="green", ), TestMessage( message_id="healthy_004", text="I got promoted at work! I'm so excited about this new opportunity.", pre_classified_label="green", ), TestMessage( message_id="healthy_005", text="I'm looking forward to my vacation next month. I need some rest and relaxation.", pre_classified_label="green", ), TestMessage( message_id="healthy_006", text="My faith is strong. I feel connected to God and at peace.", pre_classified_label="green", ), TestMessage( message_id="healthy_007", text="I'm proud of myself for overcoming my challenges. I'm stronger now.", pre_classified_label="green", ), TestMessage( message_id="healthy_008", text="I love spending time with my children. They bring so much joy to my life.", pre_classified_label="green", ), TestMessage( message_id="healthy_009", text="I'm doing well with my recovery. I'm taking it one day at a time.", pre_classified_label="green", ), TestMessage( message_id="healthy_010", text="I'm excited about my new hobby. It's helping me relax and have fun.", pre_classified_label="green", ), TestMessage( message_id="healthy_011", text="I'm feeling optimistic about the future. I have hope and dreams.", pre_classified_label="green", ), TestMessage( message_id="healthy_012", text="I'm grateful for my health. I'm taking good care of myself.", pre_classified_label="green", ), ], ) # Dataset 5: Mixed Scenarios MIXED_SCENARIOS_DATASET = TestDataset( dataset_id="dataset_mixed_scenarios", name="🎯 Mixed Scenarios", description="A diverse mix of messages across all classification levels", messages=[ TestMessage( message_id="mixed_001", text="I'm having a good day today. Work went well.", pre_classified_label="green", ), TestMessage( message_id="mixed_002", text="I'm feeling a bit stressed about the upcoming deadline.", pre_classified_label="yellow", ), TestMessage( message_id="mixed_003", text="I'm worried about my health. I've been having chest pains.", pre_classified_label="yellow", ), TestMessage( message_id="mixed_004", text="I'm grateful for my supportive family.", pre_classified_label="green", ), TestMessage( message_id="mixed_005", text="I can't stop thinking about harming myself. I need help.", pre_classified_label="red", ), TestMessage( message_id="mixed_006", text="I'm enjoying my new hobby. It's really helping me relax.", pre_classified_label="green", ), TestMessage( message_id="mixed_007", text="I'm feeling overwhelmed by everything. I don't know how to cope.", pre_classified_label="yellow", ), TestMessage( message_id="mixed_008", text="I'm at peace with my life. I feel fulfilled.", pre_classified_label="green", ), TestMessage( message_id="mixed_009", text="I'm having thoughts of ending my life. I'm scared.", pre_classified_label="red", ), TestMessage( message_id="mixed_010", text="I'm struggling with my faith, but I'm trying to stay positive.", pre_classified_label="yellow", ), TestMessage( message_id="mixed_011", text="I'm doing well. My medication is helping.", pre_classified_label="green", ), TestMessage( message_id="mixed_012", text="I'm terrified. I don't think I can go on anymore.", pre_classified_label="red", ), TestMessage( message_id="mixed_013", text="I'm worried about my job security.", pre_classified_label="yellow", ), TestMessage( message_id="mixed_014", text="I'm grateful for another day of life.", pre_classified_label="green", ), TestMessage( message_id="mixed_015", text="I'm planning to end this. I've made my decision.", pre_classified_label="red", ), ], ) @classmethod def get_all_datasets(cls) -> Dict[str, TestDataset]: """Get all available test datasets.""" return { cls.SUICIDAL_IDEATION_DATASET.dataset_id: cls.SUICIDAL_IDEATION_DATASET, cls.ANXIETY_WORRY_DATASET.dataset_id: cls.ANXIETY_WORRY_DATASET, cls.MILD_CONCERNS_DATASET.dataset_id: cls.MILD_CONCERNS_DATASET, cls.HEALTHY_POSITIVE_DATASET.dataset_id: cls.HEALTHY_POSITIVE_DATASET, cls.MIXED_SCENARIOS_DATASET.dataset_id: cls.MIXED_SCENARIOS_DATASET, } @classmethod def get_dataset(cls, dataset_id: str) -> TestDataset: """Get a specific dataset by ID.""" datasets = cls.get_all_datasets() if dataset_id not in datasets: raise ValueError(f"Dataset {dataset_id} not found") return datasets[dataset_id] @classmethod def get_dataset_list(cls) -> List[Dict[str, str]]: """Get a list of all datasets with metadata.""" datasets = cls.get_all_datasets() return [ { "dataset_id": dataset.dataset_id, "name": dataset.name, "description": dataset.description, "message_count": dataset.message_count, } for dataset in datasets.values() ] @classmethod def load_dataset(cls, dataset_id: str) -> TestDataset: """Load a dataset and return it with all messages.""" return cls.get_dataset(dataset_id) @classmethod def get_messages_from_dataset(cls, dataset_id: str) -> List[TestMessage]: """Get all messages from a specific dataset.""" dataset = cls.get_dataset(dataset_id) return dataset.messages