"""Tests for core services with mocked ML inference.""" from __future__ import annotations import json from unittest.mock import patch import numpy as np import pytest from app.models.schemas import FeedbackEntry, SentimentLabel, SentimentResult class TestLanguageDetection: def test_detect_english(self): from app.services.language_detection import detect_language result = detect_language("This is a test sentence in English") assert result.language in ("en", "unknown") assert result.confidence >= 0.0 def test_detect_empty_text(self): from app.services.language_detection import detect_language result = detect_language("") assert result.language == "unknown" assert result.confidence == 0.0 def test_detect_short_text(self): from app.services.language_detection import detect_language result = detect_language("hi") assert result.language == "unknown" def test_batch_detection(self): from app.services.language_detection import detect_languages_batch results = detect_languages_batch(["Hello world", "Bonjour le monde", ""]) assert len(results) == 3 class TestSentiment: def test_fallback_sentiment_positive(self): from app.services.sentiment import get_fallback_sentiment result = get_fallback_sentiment("This is great and amazing!") assert result.label == SentimentLabel.POSITIVE def test_fallback_sentiment_negative(self): from app.services.sentiment import get_fallback_sentiment result = get_fallback_sentiment("This is terrible and awful") assert result.label == SentimentLabel.NEGATIVE def test_fallback_sentiment_neutral(self): from app.services.sentiment import get_fallback_sentiment result = get_fallback_sentiment("The weather is cloudy today") assert result.label == SentimentLabel.NEUTRAL class TestFileProcessing: def test_parse_csv(self): from app.services.file_processing import parse_csv content = b"text,source\nHello world,test\nGoodbye world,test\n" entries = parse_csv(content) assert len(entries) == 2 assert entries[0].text == "Hello world" def test_parse_json_array(self): from app.services.file_processing import parse_json data = [{"text": "entry 1"}, {"text": "entry 2"}] entries = parse_json(json.dumps(data).encode()) assert len(entries) == 2 def test_parse_json_string_array(self): from app.services.file_processing import parse_json data = ["feedback one", "feedback two"] entries = parse_json(json.dumps(data).encode()) assert len(entries) == 2 def test_parse_json_with_wrapper(self): from app.services.file_processing import parse_json data = {"data": [{"text": "entry 1"}]} entries = parse_json(json.dumps(data).encode()) assert len(entries) == 1 def test_parse_csv_missing_text_column(self): from app.services.file_processing import parse_csv content = b"name,age\nJohn,30\n" # Should fall back to first column or raise try: entries = parse_csv(content) assert len(entries) >= 0 except ValueError: pass def test_unsupported_format(self): from app.services.file_processing import parse_file with pytest.raises(ValueError, match="Unsupported"): parse_file(b"content", "file.txt") class TestAnomalyDetection: def test_no_anomalies_stable(self): from app.services.anomaly_detection import detect_sentiment_anomalies sentiments = [ SentimentResult(label=SentimentLabel.NEUTRAL, score=0.5, confidence=0.9) for _ in range(100) ] alerts = detect_sentiment_anomalies(sentiments) assert len(alerts) == 0 def test_detects_sentiment_drop(self): from app.services.anomaly_detection import detect_sentiment_anomalies sentiments = [ SentimentResult(label=SentimentLabel.POSITIVE, score=0.8, confidence=0.9) for _ in range(60) ] sentiments.append( SentimentResult(label=SentimentLabel.NEGATIVE, score=0.1, confidence=0.9) ) alerts = detect_sentiment_anomalies(sentiments, window=50, threshold=1.5) assert len(alerts) > 0 assert alerts[0].type.value == "sentiment_drop" def test_too_few_entries(self): from app.services.anomaly_detection import detect_sentiment_anomalies sentiments = [ SentimentResult(label=SentimentLabel.NEUTRAL, score=0.5, confidence=0.9) for _ in range(5) ] alerts = detect_sentiment_anomalies(sentiments, window=50) assert len(alerts) == 0 class TestDataQuality: def test_empty_entries(self): from app.services.data_quality import analyze_data_quality report = analyze_data_quality([]) assert report.total_entries == 0 def test_quality_report(self): from app.models.schemas import AnalyzedEntry, LanguageResult from app.services.data_quality import analyze_data_quality entries = [ AnalyzedEntry( id="1", text="Great product", source="test", sentiment=SentimentResult(label=SentimentLabel.POSITIVE, score=0.9, confidence=0.95), language=LanguageResult(language="en", confidence=0.99, method="langdetect"), topic_id=0, topic_label="Topic 0", ), AnalyzedEntry( id="2", text="Mauvais service", source="test", sentiment=SentimentResult(label=SentimentLabel.NEGATIVE, score=0.2, confidence=0.4), language=LanguageResult(language="fr", confidence=0.85, method="langdetect"), topic_id=1, topic_label="Topic 1", ), ] report = analyze_data_quality(entries) assert report.total_entries == 2 assert report.low_confidence_count == 1 assert report.mixed_language_count == 1 class TestExport: def test_export_csv(self): from app.models.schemas import AnalyzedEntry, LanguageResult from app.services.export import export_csv entries = [ AnalyzedEntry( id="1", text="Test", source="test", sentiment=SentimentResult(label=SentimentLabel.POSITIVE, score=0.9, confidence=0.95), language=LanguageResult(language="en", confidence=0.99, method="langdetect"), topic_id=0, topic_label="Topic 0", ), ] result = export_csv(entries) assert b"id" in result assert b"Test" in result def test_export_json(self): from app.models.schemas import AnalyzedEntry, LanguageResult from app.services.export import export_json entries = [ AnalyzedEntry( id="1", text="Test", source="test", sentiment=SentimentResult(label=SentimentLabel.POSITIVE, score=0.9, confidence=0.95), language=LanguageResult(language="en", confidence=0.99, method="langdetect"), topic_id=0, topic_label="Topic 0", ), ] result = export_json(entries) data = json.loads(result) assert len(data) == 1 assert data[0]["text"] == "Test" def _ml_available() -> bool: try: import torch # noqa: F401 import transformers # noqa: F401 return True except ImportError: return False @pytest.mark.skipif( not _ml_available(), reason="ML models not installed — skipping real model tests", ) class TestRealSentimentModel: """Diagnostic tests using the real ML model (not mocked).""" def test_model_loads(self): from app.services import sentiment sentiment._load_model() assert sentiment._model is not None def test_positive_english(self): from app.services.sentiment import analyze_sentiment_sync results = analyze_sentiment_sync(["I love this product, it is amazing!"]) assert len(results) == 1 assert results[0].label == SentimentLabel.POSITIVE assert results[0].score > 0.7 assert results[0].confidence > 0.5 def test_negative_english(self): from app.services.sentiment import analyze_sentiment_sync results = analyze_sentiment_sync(["This is terrible, worst experience ever."]) assert len(results) == 1 assert results[0].label == SentimentLabel.NEGATIVE assert results[0].score < 0.3 assert results[0].confidence > 0.5 def test_neutral_english(self): from app.services.sentiment import analyze_sentiment_sync results = analyze_sentiment_sync(["The order was delivered on Tuesday."]) assert len(results) == 1 assert results[0].score > 0.3 assert results[0].score < 0.7 def test_multilingual_german(self): from app.services.sentiment import analyze_sentiment_sync results = analyze_sentiment_sync(["Ich bin sehr zufrieden mit dem Service!"]) assert results[0].label == SentimentLabel.POSITIVE assert results[0].score > 0.7 def test_multilingual_spanish_negative(self): from app.services.sentiment import analyze_sentiment_sync results = analyze_sentiment_sync(["Este producto es horrible, no funciona."]) assert results[0].label == SentimentLabel.NEGATIVE assert results[0].score < 0.3 def test_batch_produces_varied_scores(self): from app.services.sentiment import analyze_sentiment_sync texts = [ "I love this!", "This is terrible.", "The weather is normal today.", "Best purchase I ever made!", "Worst customer service.", ] results = analyze_sentiment_sync(texts) scores = [r.score for r in results] assert not all(s == 0.5 for s in scores), f"All scores are 0.5: {scores}" assert max(scores) - min(scores) > 0.3, f"Score spread too narrow: {scores}" def test_scores_not_all_neutral(self): from app.services.sentiment import analyze_sentiment_sync texts = [ "Amazing fantastic wonderful product", "Horrible terrible awful experience", "Normal everyday standard thing", ] results = analyze_sentiment_sync(texts) labels = [r.label for r in results] assert SentimentLabel.NEUTRAL not in labels or len(set(labels)) > 1, \ f"All labels are neutral: {labels}"