Spaces:
Sleeping
Sleeping
File size: 10,685 Bytes
6242ddb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 | """Tests for core services with mocked ML inference."""
from __future__ import annotations
import json
from unittest.mock import patch
import numpy as np
import pytest
from app.models.schemas import FeedbackEntry, SentimentLabel, SentimentResult
class TestLanguageDetection:
def test_detect_english(self):
from app.services.language_detection import detect_language
result = detect_language("This is a test sentence in English")
assert result.language in ("en", "unknown")
assert result.confidence >= 0.0
def test_detect_empty_text(self):
from app.services.language_detection import detect_language
result = detect_language("")
assert result.language == "unknown"
assert result.confidence == 0.0
def test_detect_short_text(self):
from app.services.language_detection import detect_language
result = detect_language("hi")
assert result.language == "unknown"
def test_batch_detection(self):
from app.services.language_detection import detect_languages_batch
results = detect_languages_batch(["Hello world", "Bonjour le monde", ""])
assert len(results) == 3
class TestSentiment:
def test_fallback_sentiment_positive(self):
from app.services.sentiment import get_fallback_sentiment
result = get_fallback_sentiment("This is great and amazing!")
assert result.label == SentimentLabel.POSITIVE
def test_fallback_sentiment_negative(self):
from app.services.sentiment import get_fallback_sentiment
result = get_fallback_sentiment("This is terrible and awful")
assert result.label == SentimentLabel.NEGATIVE
def test_fallback_sentiment_neutral(self):
from app.services.sentiment import get_fallback_sentiment
result = get_fallback_sentiment("The weather is cloudy today")
assert result.label == SentimentLabel.NEUTRAL
class TestFileProcessing:
def test_parse_csv(self):
from app.services.file_processing import parse_csv
content = b"text,source\nHello world,test\nGoodbye world,test\n"
entries = parse_csv(content)
assert len(entries) == 2
assert entries[0].text == "Hello world"
def test_parse_json_array(self):
from app.services.file_processing import parse_json
data = [{"text": "entry 1"}, {"text": "entry 2"}]
entries = parse_json(json.dumps(data).encode())
assert len(entries) == 2
def test_parse_json_string_array(self):
from app.services.file_processing import parse_json
data = ["feedback one", "feedback two"]
entries = parse_json(json.dumps(data).encode())
assert len(entries) == 2
def test_parse_json_with_wrapper(self):
from app.services.file_processing import parse_json
data = {"data": [{"text": "entry 1"}]}
entries = parse_json(json.dumps(data).encode())
assert len(entries) == 1
def test_parse_csv_missing_text_column(self):
from app.services.file_processing import parse_csv
content = b"name,age\nJohn,30\n"
# Should fall back to first column or raise
try:
entries = parse_csv(content)
assert len(entries) >= 0
except ValueError:
pass
def test_unsupported_format(self):
from app.services.file_processing import parse_file
with pytest.raises(ValueError, match="Unsupported"):
parse_file(b"content", "file.txt")
class TestAnomalyDetection:
def test_no_anomalies_stable(self):
from app.services.anomaly_detection import detect_sentiment_anomalies
sentiments = [
SentimentResult(label=SentimentLabel.NEUTRAL, score=0.5, confidence=0.9)
for _ in range(100)
]
alerts = detect_sentiment_anomalies(sentiments)
assert len(alerts) == 0
def test_detects_sentiment_drop(self):
from app.services.anomaly_detection import detect_sentiment_anomalies
sentiments = [
SentimentResult(label=SentimentLabel.POSITIVE, score=0.8, confidence=0.9)
for _ in range(60)
]
sentiments.append(
SentimentResult(label=SentimentLabel.NEGATIVE, score=0.1, confidence=0.9)
)
alerts = detect_sentiment_anomalies(sentiments, window=50, threshold=1.5)
assert len(alerts) > 0
assert alerts[0].type.value == "sentiment_drop"
def test_too_few_entries(self):
from app.services.anomaly_detection import detect_sentiment_anomalies
sentiments = [
SentimentResult(label=SentimentLabel.NEUTRAL, score=0.5, confidence=0.9)
for _ in range(5)
]
alerts = detect_sentiment_anomalies(sentiments, window=50)
assert len(alerts) == 0
class TestDataQuality:
def test_empty_entries(self):
from app.services.data_quality import analyze_data_quality
report = analyze_data_quality([])
assert report.total_entries == 0
def test_quality_report(self):
from app.models.schemas import AnalyzedEntry, LanguageResult
from app.services.data_quality import analyze_data_quality
entries = [
AnalyzedEntry(
id="1", text="Great product", source="test",
sentiment=SentimentResult(label=SentimentLabel.POSITIVE, score=0.9, confidence=0.95),
language=LanguageResult(language="en", confidence=0.99, method="langdetect"),
topic_id=0, topic_label="Topic 0",
),
AnalyzedEntry(
id="2", text="Mauvais service", source="test",
sentiment=SentimentResult(label=SentimentLabel.NEGATIVE, score=0.2, confidence=0.4),
language=LanguageResult(language="fr", confidence=0.85, method="langdetect"),
topic_id=1, topic_label="Topic 1",
),
]
report = analyze_data_quality(entries)
assert report.total_entries == 2
assert report.low_confidence_count == 1
assert report.mixed_language_count == 1
class TestExport:
def test_export_csv(self):
from app.models.schemas import AnalyzedEntry, LanguageResult
from app.services.export import export_csv
entries = [
AnalyzedEntry(
id="1", text="Test", source="test",
sentiment=SentimentResult(label=SentimentLabel.POSITIVE, score=0.9, confidence=0.95),
language=LanguageResult(language="en", confidence=0.99, method="langdetect"),
topic_id=0, topic_label="Topic 0",
),
]
result = export_csv(entries)
assert b"id" in result
assert b"Test" in result
def test_export_json(self):
from app.models.schemas import AnalyzedEntry, LanguageResult
from app.services.export import export_json
entries = [
AnalyzedEntry(
id="1", text="Test", source="test",
sentiment=SentimentResult(label=SentimentLabel.POSITIVE, score=0.9, confidence=0.95),
language=LanguageResult(language="en", confidence=0.99, method="langdetect"),
topic_id=0, topic_label="Topic 0",
),
]
result = export_json(entries)
data = json.loads(result)
assert len(data) == 1
assert data[0]["text"] == "Test"
def _ml_available() -> bool:
try:
import torch # noqa: F401
import transformers # noqa: F401
return True
except ImportError:
return False
@pytest.mark.skipif(
not _ml_available(),
reason="ML models not installed — skipping real model tests",
)
class TestRealSentimentModel:
"""Diagnostic tests using the real ML model (not mocked)."""
def test_model_loads(self):
from app.services import sentiment
sentiment._load_model()
assert sentiment._model is not None
def test_positive_english(self):
from app.services.sentiment import analyze_sentiment_sync
results = analyze_sentiment_sync(["I love this product, it is amazing!"])
assert len(results) == 1
assert results[0].label == SentimentLabel.POSITIVE
assert results[0].score > 0.7
assert results[0].confidence > 0.5
def test_negative_english(self):
from app.services.sentiment import analyze_sentiment_sync
results = analyze_sentiment_sync(["This is terrible, worst experience ever."])
assert len(results) == 1
assert results[0].label == SentimentLabel.NEGATIVE
assert results[0].score < 0.3
assert results[0].confidence > 0.5
def test_neutral_english(self):
from app.services.sentiment import analyze_sentiment_sync
results = analyze_sentiment_sync(["The order was delivered on Tuesday."])
assert len(results) == 1
assert results[0].score > 0.3
assert results[0].score < 0.7
def test_multilingual_german(self):
from app.services.sentiment import analyze_sentiment_sync
results = analyze_sentiment_sync(["Ich bin sehr zufrieden mit dem Service!"])
assert results[0].label == SentimentLabel.POSITIVE
assert results[0].score > 0.7
def test_multilingual_spanish_negative(self):
from app.services.sentiment import analyze_sentiment_sync
results = analyze_sentiment_sync(["Este producto es horrible, no funciona."])
assert results[0].label == SentimentLabel.NEGATIVE
assert results[0].score < 0.3
def test_batch_produces_varied_scores(self):
from app.services.sentiment import analyze_sentiment_sync
texts = [
"I love this!",
"This is terrible.",
"The weather is normal today.",
"Best purchase I ever made!",
"Worst customer service.",
]
results = analyze_sentiment_sync(texts)
scores = [r.score for r in results]
assert not all(s == 0.5 for s in scores), f"All scores are 0.5: {scores}"
assert max(scores) - min(scores) > 0.3, f"Score spread too narrow: {scores}"
def test_scores_not_all_neutral(self):
from app.services.sentiment import analyze_sentiment_sync
texts = [
"Amazing fantastic wonderful product",
"Horrible terrible awful experience",
"Normal everyday standard thing",
]
results = analyze_sentiment_sync(texts)
labels = [r.label for r in results]
assert SentimentLabel.NEUTRAL not in labels or len(set(labels)) > 1, \
f"All labels are neutral: {labels}"
|