DocUA's picture
Add property-based tests for verification mode functionality
a3934b1
# test_test_datasets.py
"""
Tests for test dataset management functionality.
"""
import pytest
from src.core.test_datasets import TestDatasetManager
from src.core.verification_models import TestDataset, TestMessage
class TestDatasetManagerBasics:
"""Test basic dataset management functionality."""
def test_get_all_datasets_returns_five_datasets(self):
"""Test that all five datasets are available."""
datasets = TestDatasetManager.get_all_datasets()
assert len(datasets) == 5
assert "dataset_suicidal_ideation" in datasets
assert "dataset_anxiety_worry" in datasets
assert "dataset_mild_concerns" in datasets
assert "dataset_healthy_positive" in datasets
assert "dataset_mixed_scenarios" in datasets
def test_get_dataset_list_returns_metadata(self):
"""Test that dataset list includes required metadata."""
dataset_list = TestDatasetManager.get_dataset_list()
assert len(dataset_list) == 5
for dataset_info in dataset_list:
assert "dataset_id" in dataset_info
assert "name" in dataset_info
assert "description" in dataset_info
assert "message_count" in dataset_info
assert dataset_info["message_count"] >= 10
def test_get_specific_dataset(self):
"""Test retrieving a specific dataset."""
dataset = TestDatasetManager.get_dataset("dataset_suicidal_ideation")
assert isinstance(dataset, TestDataset)
assert dataset.dataset_id == "dataset_suicidal_ideation"
assert len(dataset.messages) >= 10
def test_get_nonexistent_dataset_raises_error(self):
"""Test that requesting a nonexistent dataset raises ValueError."""
with pytest.raises(ValueError):
TestDatasetManager.get_dataset("nonexistent_dataset")
def test_load_dataset_returns_dataset(self):
"""Test that load_dataset returns a valid dataset."""
dataset = TestDatasetManager.load_dataset("dataset_anxiety_worry")
assert isinstance(dataset, TestDataset)
assert dataset.dataset_id == "dataset_anxiety_worry"
def test_get_messages_from_dataset(self):
"""Test retrieving messages from a dataset."""
messages = TestDatasetManager.get_messages_from_dataset("dataset_healthy_positive")
assert len(messages) >= 10
assert all(isinstance(msg, TestMessage) for msg in messages)
def test_suicidal_ideation_dataset_has_red_messages(self):
"""Test that suicidal ideation dataset contains RED classified messages."""
dataset = TestDatasetManager.get_dataset("dataset_suicidal_ideation")
red_messages = [m for m in dataset.messages if m.pre_classified_label == "red"]
assert len(red_messages) == len(dataset.messages)
assert all(m.pre_classified_label == "red" for m in dataset.messages)
def test_anxiety_worry_dataset_has_yellow_messages(self):
"""Test that anxiety dataset contains YELLOW classified messages."""
dataset = TestDatasetManager.get_dataset("dataset_anxiety_worry")
yellow_messages = [m for m in dataset.messages if m.pre_classified_label == "yellow"]
assert len(yellow_messages) == len(dataset.messages)
assert all(m.pre_classified_label == "yellow" for m in dataset.messages)
def test_healthy_positive_dataset_has_green_messages(self):
"""Test that healthy dataset contains GREEN classified messages."""
dataset = TestDatasetManager.get_dataset("dataset_healthy_positive")
green_messages = [m for m in dataset.messages if m.pre_classified_label == "green"]
assert len(green_messages) == len(dataset.messages)
assert all(m.pre_classified_label == "green" for m in dataset.messages)
def test_mixed_scenarios_dataset_has_all_classifications(self):
"""Test that mixed scenarios dataset contains all three classifications."""
dataset = TestDatasetManager.get_dataset("dataset_mixed_scenarios")
classifications = {m.pre_classified_label for m in dataset.messages}
assert "green" in classifications
assert "yellow" in classifications
assert "red" in classifications
def test_all_messages_have_required_fields(self):
"""Test that all messages have required fields."""
datasets = TestDatasetManager.get_all_datasets()
for dataset in datasets.values():
for message in dataset.messages:
assert message.message_id
assert message.text
assert message.pre_classified_label in ["green", "yellow", "red"]
def test_all_datasets_have_unique_message_ids(self):
"""Test that message IDs are unique within each dataset."""
datasets = TestDatasetManager.get_all_datasets()
for dataset in datasets.values():
message_ids = [m.message_id for m in dataset.messages]
assert len(message_ids) == len(set(message_ids))
def test_dataset_message_count_property(self):
"""Test that dataset message_count property is accurate."""
dataset = TestDatasetManager.get_dataset("dataset_suicidal_ideation")
assert dataset.message_count == len(dataset.messages)
assert dataset.message_count >= 10