Spaces:
Sleeping
Sleeping
| # test_test_datasets.py | |
| """ | |
| Tests for test dataset management functionality. | |
| """ | |
| import pytest | |
| from src.core.test_datasets import TestDatasetManager | |
| from src.core.verification_models import TestDataset, TestMessage | |
| class TestDatasetManagerBasics: | |
| """Test basic dataset management functionality.""" | |
| def test_get_all_datasets_returns_five_datasets(self): | |
| """Test that all five datasets are available.""" | |
| datasets = TestDatasetManager.get_all_datasets() | |
| assert len(datasets) == 5 | |
| assert "dataset_suicidal_ideation" in datasets | |
| assert "dataset_anxiety_worry" in datasets | |
| assert "dataset_mild_concerns" in datasets | |
| assert "dataset_healthy_positive" in datasets | |
| assert "dataset_mixed_scenarios" in datasets | |
| def test_get_dataset_list_returns_metadata(self): | |
| """Test that dataset list includes required metadata.""" | |
| dataset_list = TestDatasetManager.get_dataset_list() | |
| assert len(dataset_list) == 5 | |
| for dataset_info in dataset_list: | |
| assert "dataset_id" in dataset_info | |
| assert "name" in dataset_info | |
| assert "description" in dataset_info | |
| assert "message_count" in dataset_info | |
| assert dataset_info["message_count"] >= 10 | |
| def test_get_specific_dataset(self): | |
| """Test retrieving a specific dataset.""" | |
| dataset = TestDatasetManager.get_dataset("dataset_suicidal_ideation") | |
| assert isinstance(dataset, TestDataset) | |
| assert dataset.dataset_id == "dataset_suicidal_ideation" | |
| assert len(dataset.messages) >= 10 | |
| def test_get_nonexistent_dataset_raises_error(self): | |
| """Test that requesting a nonexistent dataset raises ValueError.""" | |
| with pytest.raises(ValueError): | |
| TestDatasetManager.get_dataset("nonexistent_dataset") | |
| def test_load_dataset_returns_dataset(self): | |
| """Test that load_dataset returns a valid dataset.""" | |
| dataset = TestDatasetManager.load_dataset("dataset_anxiety_worry") | |
| assert isinstance(dataset, TestDataset) | |
| assert dataset.dataset_id == "dataset_anxiety_worry" | |
| def test_get_messages_from_dataset(self): | |
| """Test retrieving messages from a dataset.""" | |
| messages = TestDatasetManager.get_messages_from_dataset("dataset_healthy_positive") | |
| assert len(messages) >= 10 | |
| assert all(isinstance(msg, TestMessage) for msg in messages) | |
| def test_suicidal_ideation_dataset_has_red_messages(self): | |
| """Test that suicidal ideation dataset contains RED classified messages.""" | |
| dataset = TestDatasetManager.get_dataset("dataset_suicidal_ideation") | |
| red_messages = [m for m in dataset.messages if m.pre_classified_label == "red"] | |
| assert len(red_messages) == len(dataset.messages) | |
| assert all(m.pre_classified_label == "red" for m in dataset.messages) | |
| def test_anxiety_worry_dataset_has_yellow_messages(self): | |
| """Test that anxiety dataset contains YELLOW classified messages.""" | |
| dataset = TestDatasetManager.get_dataset("dataset_anxiety_worry") | |
| yellow_messages = [m for m in dataset.messages if m.pre_classified_label == "yellow"] | |
| assert len(yellow_messages) == len(dataset.messages) | |
| assert all(m.pre_classified_label == "yellow" for m in dataset.messages) | |
| def test_healthy_positive_dataset_has_green_messages(self): | |
| """Test that healthy dataset contains GREEN classified messages.""" | |
| dataset = TestDatasetManager.get_dataset("dataset_healthy_positive") | |
| green_messages = [m for m in dataset.messages if m.pre_classified_label == "green"] | |
| assert len(green_messages) == len(dataset.messages) | |
| assert all(m.pre_classified_label == "green" for m in dataset.messages) | |
| def test_mixed_scenarios_dataset_has_all_classifications(self): | |
| """Test that mixed scenarios dataset contains all three classifications.""" | |
| dataset = TestDatasetManager.get_dataset("dataset_mixed_scenarios") | |
| classifications = {m.pre_classified_label for m in dataset.messages} | |
| assert "green" in classifications | |
| assert "yellow" in classifications | |
| assert "red" in classifications | |
| def test_all_messages_have_required_fields(self): | |
| """Test that all messages have required fields.""" | |
| datasets = TestDatasetManager.get_all_datasets() | |
| for dataset in datasets.values(): | |
| for message in dataset.messages: | |
| assert message.message_id | |
| assert message.text | |
| assert message.pre_classified_label in ["green", "yellow", "red"] | |
| def test_all_datasets_have_unique_message_ids(self): | |
| """Test that message IDs are unique within each dataset.""" | |
| datasets = TestDatasetManager.get_all_datasets() | |
| for dataset in datasets.values(): | |
| message_ids = [m.message_id for m in dataset.messages] | |
| assert len(message_ids) == len(set(message_ids)) | |
| def test_dataset_message_count_property(self): | |
| """Test that dataset message_count property is accurate.""" | |
| dataset = TestDatasetManager.get_dataset("dataset_suicidal_ideation") | |
| assert dataset.message_count == len(dataset.messages) | |
| assert dataset.message_count >= 10 | |