File size: 5,307 Bytes
a3934b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# test_test_datasets.py
"""
Tests for test dataset management functionality.
"""

import pytest
from src.core.test_datasets import TestDatasetManager
from src.core.verification_models import TestDataset, TestMessage


class TestDatasetManagerBasics:
    """Test basic dataset management functionality."""

    def test_get_all_datasets_returns_five_datasets(self):
        """Test that all five datasets are available."""
        datasets = TestDatasetManager.get_all_datasets()
        assert len(datasets) == 5
        assert "dataset_suicidal_ideation" in datasets
        assert "dataset_anxiety_worry" in datasets
        assert "dataset_mild_concerns" in datasets
        assert "dataset_healthy_positive" in datasets
        assert "dataset_mixed_scenarios" in datasets

    def test_get_dataset_list_returns_metadata(self):
        """Test that dataset list includes required metadata."""
        dataset_list = TestDatasetManager.get_dataset_list()
        assert len(dataset_list) == 5
        
        for dataset_info in dataset_list:
            assert "dataset_id" in dataset_info
            assert "name" in dataset_info
            assert "description" in dataset_info
            assert "message_count" in dataset_info
            assert dataset_info["message_count"] >= 10

    def test_get_specific_dataset(self):
        """Test retrieving a specific dataset."""
        dataset = TestDatasetManager.get_dataset("dataset_suicidal_ideation")
        assert isinstance(dataset, TestDataset)
        assert dataset.dataset_id == "dataset_suicidal_ideation"
        assert len(dataset.messages) >= 10

    def test_get_nonexistent_dataset_raises_error(self):
        """Test that requesting a nonexistent dataset raises ValueError."""
        with pytest.raises(ValueError):
            TestDatasetManager.get_dataset("nonexistent_dataset")

    def test_load_dataset_returns_dataset(self):
        """Test that load_dataset returns a valid dataset."""
        dataset = TestDatasetManager.load_dataset("dataset_anxiety_worry")
        assert isinstance(dataset, TestDataset)
        assert dataset.dataset_id == "dataset_anxiety_worry"

    def test_get_messages_from_dataset(self):
        """Test retrieving messages from a dataset."""
        messages = TestDatasetManager.get_messages_from_dataset("dataset_healthy_positive")
        assert len(messages) >= 10
        assert all(isinstance(msg, TestMessage) for msg in messages)

    def test_suicidal_ideation_dataset_has_red_messages(self):
        """Test that suicidal ideation dataset contains RED classified messages."""
        dataset = TestDatasetManager.get_dataset("dataset_suicidal_ideation")
        red_messages = [m for m in dataset.messages if m.pre_classified_label == "red"]
        assert len(red_messages) == len(dataset.messages)
        assert all(m.pre_classified_label == "red" for m in dataset.messages)

    def test_anxiety_worry_dataset_has_yellow_messages(self):
        """Test that anxiety dataset contains YELLOW classified messages."""
        dataset = TestDatasetManager.get_dataset("dataset_anxiety_worry")
        yellow_messages = [m for m in dataset.messages if m.pre_classified_label == "yellow"]
        assert len(yellow_messages) == len(dataset.messages)
        assert all(m.pre_classified_label == "yellow" for m in dataset.messages)

    def test_healthy_positive_dataset_has_green_messages(self):
        """Test that healthy dataset contains GREEN classified messages."""
        dataset = TestDatasetManager.get_dataset("dataset_healthy_positive")
        green_messages = [m for m in dataset.messages if m.pre_classified_label == "green"]
        assert len(green_messages) == len(dataset.messages)
        assert all(m.pre_classified_label == "green" for m in dataset.messages)

    def test_mixed_scenarios_dataset_has_all_classifications(self):
        """Test that mixed scenarios dataset contains all three classifications."""
        dataset = TestDatasetManager.get_dataset("dataset_mixed_scenarios")
        classifications = {m.pre_classified_label for m in dataset.messages}
        assert "green" in classifications
        assert "yellow" in classifications
        assert "red" in classifications

    def test_all_messages_have_required_fields(self):
        """Test that all messages have required fields."""
        datasets = TestDatasetManager.get_all_datasets()
        for dataset in datasets.values():
            for message in dataset.messages:
                assert message.message_id
                assert message.text
                assert message.pre_classified_label in ["green", "yellow", "red"]

    def test_all_datasets_have_unique_message_ids(self):
        """Test that message IDs are unique within each dataset."""
        datasets = TestDatasetManager.get_all_datasets()
        for dataset in datasets.values():
            message_ids = [m.message_id for m in dataset.messages]
            assert len(message_ids) == len(set(message_ids))

    def test_dataset_message_count_property(self):
        """Test that dataset message_count property is accurate."""
        dataset = TestDatasetManager.get_dataset("dataset_suicidal_ideation")
        assert dataset.message_count == len(dataset.messages)
        assert dataset.message_count >= 10