File size: 10,685 Bytes
6242ddb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
"""Tests for core services with mocked ML inference."""

from __future__ import annotations

import json
from unittest.mock import patch

import numpy as np
import pytest

from app.models.schemas import FeedbackEntry, SentimentLabel, SentimentResult


class TestLanguageDetection:
    def test_detect_english(self):
        from app.services.language_detection import detect_language
        result = detect_language("This is a test sentence in English")
        assert result.language in ("en", "unknown")
        assert result.confidence >= 0.0

    def test_detect_empty_text(self):
        from app.services.language_detection import detect_language
        result = detect_language("")
        assert result.language == "unknown"
        assert result.confidence == 0.0

    def test_detect_short_text(self):
        from app.services.language_detection import detect_language
        result = detect_language("hi")
        assert result.language == "unknown"

    def test_batch_detection(self):
        from app.services.language_detection import detect_languages_batch
        results = detect_languages_batch(["Hello world", "Bonjour le monde", ""])
        assert len(results) == 3


class TestSentiment:
    def test_fallback_sentiment_positive(self):
        from app.services.sentiment import get_fallback_sentiment
        result = get_fallback_sentiment("This is great and amazing!")
        assert result.label == SentimentLabel.POSITIVE

    def test_fallback_sentiment_negative(self):
        from app.services.sentiment import get_fallback_sentiment
        result = get_fallback_sentiment("This is terrible and awful")
        assert result.label == SentimentLabel.NEGATIVE

    def test_fallback_sentiment_neutral(self):
        from app.services.sentiment import get_fallback_sentiment
        result = get_fallback_sentiment("The weather is cloudy today")
        assert result.label == SentimentLabel.NEUTRAL


class TestFileProcessing:
    def test_parse_csv(self):
        from app.services.file_processing import parse_csv
        content = b"text,source\nHello world,test\nGoodbye world,test\n"
        entries = parse_csv(content)
        assert len(entries) == 2
        assert entries[0].text == "Hello world"

    def test_parse_json_array(self):
        from app.services.file_processing import parse_json
        data = [{"text": "entry 1"}, {"text": "entry 2"}]
        entries = parse_json(json.dumps(data).encode())
        assert len(entries) == 2

    def test_parse_json_string_array(self):
        from app.services.file_processing import parse_json
        data = ["feedback one", "feedback two"]
        entries = parse_json(json.dumps(data).encode())
        assert len(entries) == 2

    def test_parse_json_with_wrapper(self):
        from app.services.file_processing import parse_json
        data = {"data": [{"text": "entry 1"}]}
        entries = parse_json(json.dumps(data).encode())
        assert len(entries) == 1

    def test_parse_csv_missing_text_column(self):
        from app.services.file_processing import parse_csv
        content = b"name,age\nJohn,30\n"
        # Should fall back to first column or raise
        try:
            entries = parse_csv(content)
            assert len(entries) >= 0
        except ValueError:
            pass

    def test_unsupported_format(self):
        from app.services.file_processing import parse_file
        with pytest.raises(ValueError, match="Unsupported"):
            parse_file(b"content", "file.txt")


class TestAnomalyDetection:
    def test_no_anomalies_stable(self):
        from app.services.anomaly_detection import detect_sentiment_anomalies
        sentiments = [
            SentimentResult(label=SentimentLabel.NEUTRAL, score=0.5, confidence=0.9)
            for _ in range(100)
        ]
        alerts = detect_sentiment_anomalies(sentiments)
        assert len(alerts) == 0

    def test_detects_sentiment_drop(self):
        from app.services.anomaly_detection import detect_sentiment_anomalies
        sentiments = [
            SentimentResult(label=SentimentLabel.POSITIVE, score=0.8, confidence=0.9)
            for _ in range(60)
        ]
        sentiments.append(
            SentimentResult(label=SentimentLabel.NEGATIVE, score=0.1, confidence=0.9)
        )
        alerts = detect_sentiment_anomalies(sentiments, window=50, threshold=1.5)
        assert len(alerts) > 0
        assert alerts[0].type.value == "sentiment_drop"

    def test_too_few_entries(self):
        from app.services.anomaly_detection import detect_sentiment_anomalies
        sentiments = [
            SentimentResult(label=SentimentLabel.NEUTRAL, score=0.5, confidence=0.9)
            for _ in range(5)
        ]
        alerts = detect_sentiment_anomalies(sentiments, window=50)
        assert len(alerts) == 0


class TestDataQuality:
    def test_empty_entries(self):
        from app.services.data_quality import analyze_data_quality
        report = analyze_data_quality([])
        assert report.total_entries == 0

    def test_quality_report(self):
        from app.models.schemas import AnalyzedEntry, LanguageResult
        from app.services.data_quality import analyze_data_quality

        entries = [
            AnalyzedEntry(
                id="1", text="Great product", source="test",
                sentiment=SentimentResult(label=SentimentLabel.POSITIVE, score=0.9, confidence=0.95),
                language=LanguageResult(language="en", confidence=0.99, method="langdetect"),
                topic_id=0, topic_label="Topic 0",
            ),
            AnalyzedEntry(
                id="2", text="Mauvais service", source="test",
                sentiment=SentimentResult(label=SentimentLabel.NEGATIVE, score=0.2, confidence=0.4),
                language=LanguageResult(language="fr", confidence=0.85, method="langdetect"),
                topic_id=1, topic_label="Topic 1",
            ),
        ]

        report = analyze_data_quality(entries)
        assert report.total_entries == 2
        assert report.low_confidence_count == 1
        assert report.mixed_language_count == 1


class TestExport:
    def test_export_csv(self):
        from app.models.schemas import AnalyzedEntry, LanguageResult
        from app.services.export import export_csv

        entries = [
            AnalyzedEntry(
                id="1", text="Test", source="test",
                sentiment=SentimentResult(label=SentimentLabel.POSITIVE, score=0.9, confidence=0.95),
                language=LanguageResult(language="en", confidence=0.99, method="langdetect"),
                topic_id=0, topic_label="Topic 0",
            ),
        ]
        result = export_csv(entries)
        assert b"id" in result
        assert b"Test" in result

    def test_export_json(self):
        from app.models.schemas import AnalyzedEntry, LanguageResult
        from app.services.export import export_json

        entries = [
            AnalyzedEntry(
                id="1", text="Test", source="test",
                sentiment=SentimentResult(label=SentimentLabel.POSITIVE, score=0.9, confidence=0.95),
                language=LanguageResult(language="en", confidence=0.99, method="langdetect"),
                topic_id=0, topic_label="Topic 0",
            ),
        ]
        result = export_json(entries)
        data = json.loads(result)
        assert len(data) == 1
        assert data[0]["text"] == "Test"


def _ml_available() -> bool:
    try:
        import torch  # noqa: F401
        import transformers  # noqa: F401
        return True
    except ImportError:
        return False


@pytest.mark.skipif(
    not _ml_available(),
    reason="ML models not installed — skipping real model tests",
)
class TestRealSentimentModel:
    """Diagnostic tests using the real ML model (not mocked)."""

    def test_model_loads(self):
        from app.services import sentiment
        sentiment._load_model()
        assert sentiment._model is not None

    def test_positive_english(self):
        from app.services.sentiment import analyze_sentiment_sync
        results = analyze_sentiment_sync(["I love this product, it is amazing!"])
        assert len(results) == 1
        assert results[0].label == SentimentLabel.POSITIVE
        assert results[0].score > 0.7
        assert results[0].confidence > 0.5

    def test_negative_english(self):
        from app.services.sentiment import analyze_sentiment_sync
        results = analyze_sentiment_sync(["This is terrible, worst experience ever."])
        assert len(results) == 1
        assert results[0].label == SentimentLabel.NEGATIVE
        assert results[0].score < 0.3
        assert results[0].confidence > 0.5

    def test_neutral_english(self):
        from app.services.sentiment import analyze_sentiment_sync
        results = analyze_sentiment_sync(["The order was delivered on Tuesday."])
        assert len(results) == 1
        assert results[0].score > 0.3
        assert results[0].score < 0.7

    def test_multilingual_german(self):
        from app.services.sentiment import analyze_sentiment_sync
        results = analyze_sentiment_sync(["Ich bin sehr zufrieden mit dem Service!"])
        assert results[0].label == SentimentLabel.POSITIVE
        assert results[0].score > 0.7

    def test_multilingual_spanish_negative(self):
        from app.services.sentiment import analyze_sentiment_sync
        results = analyze_sentiment_sync(["Este producto es horrible, no funciona."])
        assert results[0].label == SentimentLabel.NEGATIVE
        assert results[0].score < 0.3

    def test_batch_produces_varied_scores(self):
        from app.services.sentiment import analyze_sentiment_sync
        texts = [
            "I love this!",
            "This is terrible.",
            "The weather is normal today.",
            "Best purchase I ever made!",
            "Worst customer service.",
        ]
        results = analyze_sentiment_sync(texts)
        scores = [r.score for r in results]
        assert not all(s == 0.5 for s in scores), f"All scores are 0.5: {scores}"
        assert max(scores) - min(scores) > 0.3, f"Score spread too narrow: {scores}"

    def test_scores_not_all_neutral(self):
        from app.services.sentiment import analyze_sentiment_sync
        texts = [
            "Amazing fantastic wonderful product",
            "Horrible terrible awful experience",
            "Normal everyday standard thing",
        ]
        results = analyze_sentiment_sync(texts)
        labels = [r.label for r in results]
        assert SentimentLabel.NEUTRAL not in labels or len(set(labels)) > 1, \
            f"All labels are neutral: {labels}"