# test_test_datasets.py """ Tests for test dataset management functionality. """ import pytest from src.core.test_datasets import TestDatasetManager from src.core.verification_models import TestDataset, TestMessage class TestDatasetManagerBasics: """Test basic dataset management functionality.""" def test_get_all_datasets_returns_five_datasets(self): """Test that all five datasets are available.""" datasets = TestDatasetManager.get_all_datasets() assert len(datasets) == 5 assert "dataset_suicidal_ideation" in datasets assert "dataset_anxiety_worry" in datasets assert "dataset_mild_concerns" in datasets assert "dataset_healthy_positive" in datasets assert "dataset_mixed_scenarios" in datasets def test_get_dataset_list_returns_metadata(self): """Test that dataset list includes required metadata.""" dataset_list = TestDatasetManager.get_dataset_list() assert len(dataset_list) == 5 for dataset_info in dataset_list: assert "dataset_id" in dataset_info assert "name" in dataset_info assert "description" in dataset_info assert "message_count" in dataset_info assert dataset_info["message_count"] >= 10 def test_get_specific_dataset(self): """Test retrieving a specific dataset.""" dataset = TestDatasetManager.get_dataset("dataset_suicidal_ideation") assert isinstance(dataset, TestDataset) assert dataset.dataset_id == "dataset_suicidal_ideation" assert len(dataset.messages) >= 10 def test_get_nonexistent_dataset_raises_error(self): """Test that requesting a nonexistent dataset raises ValueError.""" with pytest.raises(ValueError): TestDatasetManager.get_dataset("nonexistent_dataset") def test_load_dataset_returns_dataset(self): """Test that load_dataset returns a valid dataset.""" dataset = TestDatasetManager.load_dataset("dataset_anxiety_worry") assert isinstance(dataset, TestDataset) assert dataset.dataset_id == "dataset_anxiety_worry" def test_get_messages_from_dataset(self): """Test retrieving messages from a dataset.""" messages = TestDatasetManager.get_messages_from_dataset("dataset_healthy_positive") assert len(messages) >= 10 assert all(isinstance(msg, TestMessage) for msg in messages) def test_suicidal_ideation_dataset_has_red_messages(self): """Test that suicidal ideation dataset contains RED classified messages.""" dataset = TestDatasetManager.get_dataset("dataset_suicidal_ideation") red_messages = [m for m in dataset.messages if m.pre_classified_label == "red"] assert len(red_messages) == len(dataset.messages) assert all(m.pre_classified_label == "red" for m in dataset.messages) def test_anxiety_worry_dataset_has_yellow_messages(self): """Test that anxiety dataset contains YELLOW classified messages.""" dataset = TestDatasetManager.get_dataset("dataset_anxiety_worry") yellow_messages = [m for m in dataset.messages if m.pre_classified_label == "yellow"] assert len(yellow_messages) == len(dataset.messages) assert all(m.pre_classified_label == "yellow" for m in dataset.messages) def test_healthy_positive_dataset_has_green_messages(self): """Test that healthy dataset contains GREEN classified messages.""" dataset = TestDatasetManager.get_dataset("dataset_healthy_positive") green_messages = [m for m in dataset.messages if m.pre_classified_label == "green"] assert len(green_messages) == len(dataset.messages) assert all(m.pre_classified_label == "green" for m in dataset.messages) def test_mixed_scenarios_dataset_has_all_classifications(self): """Test that mixed scenarios dataset contains all three classifications.""" dataset = TestDatasetManager.get_dataset("dataset_mixed_scenarios") classifications = {m.pre_classified_label for m in dataset.messages} assert "green" in classifications assert "yellow" in classifications assert "red" in classifications def test_all_messages_have_required_fields(self): """Test that all messages have required fields.""" datasets = TestDatasetManager.get_all_datasets() for dataset in datasets.values(): for message in dataset.messages: assert message.message_id assert message.text assert message.pre_classified_label in ["green", "yellow", "red"] def test_all_datasets_have_unique_message_ids(self): """Test that message IDs are unique within each dataset.""" datasets = TestDatasetManager.get_all_datasets() for dataset in datasets.values(): message_ids = [m.message_id for m in dataset.messages] assert len(message_ids) == len(set(message_ids)) def test_dataset_message_count_property(self): """Test that dataset message_count property is accurate.""" dataset = TestDatasetManager.get_dataset("dataset_suicidal_ideation") assert dataset.message_count == len(dataset.messages) assert dataset.message_count >= 10