Spaces:

alexchilton
/

topic-analysis

Sleeping

topic-analysis / backend /tests /test_services.py

alexchilton

Initial deployment: Sentiment & Topic Analysis Dashboard

6242ddb 2 months ago

10.7 kB

	"""Tests for core services with mocked ML inference."""

	from __future__ import annotations

	import json
	from unittest.mock import patch

	import numpy as np
	import pytest

	from app.models.schemas import FeedbackEntry, SentimentLabel, SentimentResult


	class TestLanguageDetection:
	def test_detect_english(self):
	from app.services.language_detection import detect_language
	result = detect_language("This is a test sentence in English")
	assert result.language in ("en", "unknown")
	assert result.confidence >= 0.0

	def test_detect_empty_text(self):
	from app.services.language_detection import detect_language
	result = detect_language("")
	assert result.language == "unknown"
	assert result.confidence == 0.0

	def test_detect_short_text(self):
	from app.services.language_detection import detect_language
	result = detect_language("hi")
	assert result.language == "unknown"

	def test_batch_detection(self):
	from app.services.language_detection import detect_languages_batch
	results = detect_languages_batch(["Hello world", "Bonjour le monde", ""])
	assert len(results) == 3


	class TestSentiment:
	def test_fallback_sentiment_positive(self):
	from app.services.sentiment import get_fallback_sentiment
	result = get_fallback_sentiment("This is great and amazing!")
	assert result.label == SentimentLabel.POSITIVE

	def test_fallback_sentiment_negative(self):
	from app.services.sentiment import get_fallback_sentiment
	result = get_fallback_sentiment("This is terrible and awful")
	assert result.label == SentimentLabel.NEGATIVE

	def test_fallback_sentiment_neutral(self):
	from app.services.sentiment import get_fallback_sentiment
	result = get_fallback_sentiment("The weather is cloudy today")
	assert result.label == SentimentLabel.NEUTRAL


	class TestFileProcessing:
	def test_parse_csv(self):
	from app.services.file_processing import parse_csv
	content = b"text,source\nHello world,test\nGoodbye world,test\n"
	entries = parse_csv(content)
	assert len(entries) == 2
	assert entries[0].text == "Hello world"

	def test_parse_json_array(self):
	from app.services.file_processing import parse_json
	data = [{"text": "entry 1"}, {"text": "entry 2"}]
	entries = parse_json(json.dumps(data).encode())
	assert len(entries) == 2

	def test_parse_json_string_array(self):
	from app.services.file_processing import parse_json
	data = ["feedback one", "feedback two"]
	entries = parse_json(json.dumps(data).encode())
	assert len(entries) == 2

	def test_parse_json_with_wrapper(self):
	from app.services.file_processing import parse_json
	data = {"data": [{"text": "entry 1"}]}
	entries = parse_json(json.dumps(data).encode())
	assert len(entries) == 1

	def test_parse_csv_missing_text_column(self):
	from app.services.file_processing import parse_csv
	content = b"name,age\nJohn,30\n"
	# Should fall back to first column or raise
	try:
	entries = parse_csv(content)
	assert len(entries) >= 0
	except ValueError:
	pass

	def test_unsupported_format(self):
	from app.services.file_processing import parse_file
	with pytest.raises(ValueError, match="Unsupported"):
	parse_file(b"content", "file.txt")


	class TestAnomalyDetection:
	def test_no_anomalies_stable(self):
	from app.services.anomaly_detection import detect_sentiment_anomalies
	sentiments = [
	SentimentResult(label=SentimentLabel.NEUTRAL, score=0.5, confidence=0.9)
	for _ in range(100)
	]
	alerts = detect_sentiment_anomalies(sentiments)
	assert len(alerts) == 0

	def test_detects_sentiment_drop(self):
	from app.services.anomaly_detection import detect_sentiment_anomalies
	sentiments = [
	SentimentResult(label=SentimentLabel.POSITIVE, score=0.8, confidence=0.9)
	for _ in range(60)
	]
	sentiments.append(
	SentimentResult(label=SentimentLabel.NEGATIVE, score=0.1, confidence=0.9)
	)
	alerts = detect_sentiment_anomalies(sentiments, window=50, threshold=1.5)
	assert len(alerts) > 0
	assert alerts[0].type.value == "sentiment_drop"

	def test_too_few_entries(self):
	from app.services.anomaly_detection import detect_sentiment_anomalies
	sentiments = [
	SentimentResult(label=SentimentLabel.NEUTRAL, score=0.5, confidence=0.9)
	for _ in range(5)
	]
	alerts = detect_sentiment_anomalies(sentiments, window=50)
	assert len(alerts) == 0


	class TestDataQuality:
	def test_empty_entries(self):
	from app.services.data_quality import analyze_data_quality
	report = analyze_data_quality([])
	assert report.total_entries == 0

	def test_quality_report(self):
	from app.models.schemas import AnalyzedEntry, LanguageResult
	from app.services.data_quality import analyze_data_quality

	entries = [
	AnalyzedEntry(
	id="1", text="Great product", source="test",
	sentiment=SentimentResult(label=SentimentLabel.POSITIVE, score=0.9, confidence=0.95),
	language=LanguageResult(language="en", confidence=0.99, method="langdetect"),
	topic_id=0, topic_label="Topic 0",
	),
	AnalyzedEntry(
	id="2", text="Mauvais service", source="test",
	sentiment=SentimentResult(label=SentimentLabel.NEGATIVE, score=0.2, confidence=0.4),
	language=LanguageResult(language="fr", confidence=0.85, method="langdetect"),
	topic_id=1, topic_label="Topic 1",
	),
	]

	report = analyze_data_quality(entries)
	assert report.total_entries == 2
	assert report.low_confidence_count == 1
	assert report.mixed_language_count == 1


	class TestExport:
	def test_export_csv(self):
	from app.models.schemas import AnalyzedEntry, LanguageResult
	from app.services.export import export_csv

	entries = [
	AnalyzedEntry(
	id="1", text="Test", source="test",
	sentiment=SentimentResult(label=SentimentLabel.POSITIVE, score=0.9, confidence=0.95),
	language=LanguageResult(language="en", confidence=0.99, method="langdetect"),
	topic_id=0, topic_label="Topic 0",
	),
	]
	result = export_csv(entries)
	assert b"id" in result
	assert b"Test" in result

	def test_export_json(self):
	from app.models.schemas import AnalyzedEntry, LanguageResult
	from app.services.export import export_json

	entries = [
	AnalyzedEntry(
	id="1", text="Test", source="test",
	sentiment=SentimentResult(label=SentimentLabel.POSITIVE, score=0.9, confidence=0.95),
	language=LanguageResult(language="en", confidence=0.99, method="langdetect"),
	topic_id=0, topic_label="Topic 0",
	),
	]
	result = export_json(entries)
	data = json.loads(result)
	assert len(data) == 1
	assert data[0]["text"] == "Test"


	def _ml_available() -> bool:
	try:
	import torch # noqa: F401
	import transformers # noqa: F401
	return True
	except ImportError:
	return False


	@pytest.mark.skipif(
	not _ml_available(),
	reason="ML models not installed — skipping real model tests",
	)
	class TestRealSentimentModel:
	"""Diagnostic tests using the real ML model (not mocked)."""

	def test_model_loads(self):
	from app.services import sentiment
	sentiment._load_model()
	assert sentiment._model is not None

	def test_positive_english(self):
	from app.services.sentiment import analyze_sentiment_sync
	results = analyze_sentiment_sync(["I love this product, it is amazing!"])
	assert len(results) == 1
	assert results[0].label == SentimentLabel.POSITIVE
	assert results[0].score > 0.7
	assert results[0].confidence > 0.5

	def test_negative_english(self):
	from app.services.sentiment import analyze_sentiment_sync
	results = analyze_sentiment_sync(["This is terrible, worst experience ever."])
	assert len(results) == 1
	assert results[0].label == SentimentLabel.NEGATIVE
	assert results[0].score < 0.3
	assert results[0].confidence > 0.5

	def test_neutral_english(self):
	from app.services.sentiment import analyze_sentiment_sync
	results = analyze_sentiment_sync(["The order was delivered on Tuesday."])
	assert len(results) == 1
	assert results[0].score > 0.3
	assert results[0].score < 0.7

	def test_multilingual_german(self):
	from app.services.sentiment import analyze_sentiment_sync
	results = analyze_sentiment_sync(["Ich bin sehr zufrieden mit dem Service!"])
	assert results[0].label == SentimentLabel.POSITIVE
	assert results[0].score > 0.7

	def test_multilingual_spanish_negative(self):
	from app.services.sentiment import analyze_sentiment_sync
	results = analyze_sentiment_sync(["Este producto es horrible, no funciona."])
	assert results[0].label == SentimentLabel.NEGATIVE
	assert results[0].score < 0.3

	def test_batch_produces_varied_scores(self):
	from app.services.sentiment import analyze_sentiment_sync
	texts = [
	"I love this!",
	"This is terrible.",
	"The weather is normal today.",
	"Best purchase I ever made!",
	"Worst customer service.",
	]
	results = analyze_sentiment_sync(texts)
	scores = [r.score for r in results]
	assert not all(s == 0.5 for s in scores), f"All scores are 0.5: {scores}"
	assert max(scores) - min(scores) > 0.3, f"Score spread too narrow: {scores}"

	def test_scores_not_all_neutral(self):
	from app.services.sentiment import analyze_sentiment_sync
	texts = [
	"Amazing fantastic wonderful product",
	"Horrible terrible awful experience",
	"Normal everyday standard thing",
	]
	results = analyze_sentiment_sync(texts)
	labels = [r.label for r in results]
	assert SentimentLabel.NEUTRAL not in labels or len(set(labels)) > 1, \
	f"All labels are neutral: {labels}"