"""Tests for advanced analytics.""" import pytest import pandas as pd import numpy as np from analysis.predictive_intervals import ( calculate_predictive_interval, rank_by_predictive_interval, get_top_positive_by_interval, get_top_negative_by_interval, calculate_intervals_for_dataframe ) from analysis.category_analytics import CategoryAnalytics from analysis.thread_analysis import ThreadAnalyzer class TestPredictiveIntervals: """Test suite for predictive intervals.""" def test_calculate_predictive_interval_high_positive(self): """Test predictive interval with high positive ratio.""" interval = calculate_predictive_interval( positive_count=80, negative_count=20, neutral_count=0 ) assert 0.0 <= interval <= 1.0 assert interval > 0.5 # Should be high for mostly positive def test_calculate_predictive_interval_high_negative(self): """Test predictive interval with high negative ratio.""" interval = calculate_predictive_interval( positive_count=20, negative_count=80, neutral_count=0 ) assert 0.0 <= interval <= 1.0 assert interval < 0.5 # Should be low for mostly negative def test_calculate_predictive_interval_small_sample(self): """Test predictive interval with small sample.""" # Single positive comment interval_small = calculate_predictive_interval( positive_count=1, negative_count=0, neutral_count=0 ) # Many positive comments interval_large = calculate_predictive_interval( positive_count=100, negative_count=0, neutral_count=0 ) # Small sample should have lower interval (more uncertainty) assert interval_small < interval_large def test_calculate_predictive_interval_confidence_levels(self): """Test different confidence levels.""" interval_90 = calculate_predictive_interval( positive_count=80, negative_count=20, confidence_level=0.90 ) interval_95 = calculate_predictive_interval( positive_count=80, negative_count=20, confidence_level=0.95 ) interval_99 = calculate_predictive_interval( positive_count=80, negative_count=20, confidence_level=0.99 ) # Higher confidence = lower bound (more conservative) assert interval_90 >= interval_95 >= interval_99 def test_rank_by_predictive_interval(self): """Test ranking by predictive interval.""" data = [ {"id": "item1", "positive_count": 80, "negative_count": 20}, {"id": "item2", "positive_count": 1, "negative_count": 0}, {"id": "item3", "positive_count": 50, "negative_count": 50}, ] ranked = rank_by_predictive_interval(data) assert len(ranked) == 3 assert "predictive_interval" in ranked[0] assert ranked[0]["predictive_interval"] >= ranked[-1]["predictive_interval"] def test_get_top_positive_by_interval(self): """Test getting top positive items.""" data = [ {"id": "item1", "positive_count": 80, "negative_count": 20}, {"id": "item2", "positive_count": 1, "negative_count": 0}, {"id": "item3", "positive_count": 50, "negative_count": 50}, ] top = get_top_positive_by_interval(data, top_k=2) assert len(top) == 2 assert top[0]["predictive_interval"] >= top[1]["predictive_interval"] def test_get_top_negative_by_interval(self): """Test getting top negative items.""" data = [ {"id": "item1", "positive_count": 20, "negative_count": 80}, {"id": "item2", "positive_count": 0, "negative_count": 1}, {"id": "item3", "positive_count": 50, "negative_count": 50}, ] top = get_top_negative_by_interval(data, top_k=2) assert len(top) == 2 # Most negative should have lowest interval assert top[0]["predictive_interval"] <= top[1]["predictive_interval"] def test_calculate_intervals_for_dataframe(self): """Test calculating intervals for DataFrame.""" df = pd.DataFrame({ "id": ["item1", "item2"], "positive_count": [80, 20], "negative_count": [20, 80] }) df_result = calculate_intervals_for_dataframe(df) assert "predictive_interval" in df_result.columns assert len(df_result) == 2 assert df_result.loc[0, "predictive_interval"] > df_result.loc[1, "predictive_interval"] class TestCategoryAnalytics: """Test suite for category analytics.""" @pytest.fixture def sample_data(self): """Create sample data for testing.""" return [ {"category": "politics", "text": "Отличная новость!"}, {"category": "politics", "text": "Ужасная ситуация..."}, {"category": "economy", "text": "Нормально"}, ] def test_category_analytics_initialization(self): """Test category analytics initialization.""" analytics = CategoryAnalytics() assert analytics is not None assert analytics.analyzer is not None def test_analyze_category_sentiment(self, sample_data): """Test category sentiment analysis.""" analytics = CategoryAnalytics() # This will actually run sentiment analysis, so it may be slow # For faster tests, we could mock the analyzer stats = analytics.analyze_category_sentiment(sample_data) assert "politics" in stats assert "economy" in stats assert "total_comments" in stats["politics"] assert "positive_count" in stats["politics"] assert "predictive_interval" in stats["politics"] def test_rank_categories_by_sentiment(self, sample_data): """Test ranking categories by sentiment.""" analytics = CategoryAnalytics() stats = analytics.analyze_category_sentiment(sample_data) ranked = analytics.rank_categories_by_sentiment(stats, sort_by="predictive_interval") assert len(ranked) >= 1 assert "category" in ranked[0] assert "predictive_interval" in ranked[0] def test_get_top_positive_categories(self, sample_data): """Test getting top positive categories.""" analytics = CategoryAnalytics() stats = analytics.analyze_category_sentiment(sample_data) top = analytics.get_top_positive_categories(stats, top_k=5) assert len(top) <= 5 if len(top) > 1: assert top[0]["predictive_interval"] >= top[1]["predictive_interval"] def test_analyze_from_dataframe(self): """Test analyzing from DataFrame.""" df = pd.DataFrame({ "category": ["politics", "politics", "economy"], "text": ["Отлично!", "Ужасно!", "Нормально"] }) analytics = CategoryAnalytics() stats_df = analytics.analyze_from_dataframe(df) assert isinstance(stats_df, pd.DataFrame) assert "category" in stats_df.columns assert "predictive_interval" in stats_df.columns class TestThreadAnalyzer: """Test suite for thread analyzer.""" @pytest.fixture def sample_data(self): """Create sample data for testing.""" return [ {"news_id": "1", "id": "1", "text": "Отлично!"}, {"news_id": "1", "id": "2", "text": "Ужасно!"}, {"news_id": "2", "id": "3", "text": "Нормально"}, ] def test_thread_analyzer_initialization(self): """Test thread analyzer initialization.""" analyzer = ThreadAnalyzer() assert analyzer is not None assert analyzer.analyzer is not None def test_calculate_thread_lengths(self, sample_data): """Test calculating thread lengths.""" analyzer = ThreadAnalyzer() lengths = analyzer.calculate_thread_lengths(sample_data) assert "1" in lengths assert "2" in lengths assert lengths["1"] == 2 assert lengths["2"] == 1 def test_calculate_temperature(self, sample_data): """Test calculating temperature.""" analyzer = ThreadAnalyzer() temperatures = analyzer.calculate_temperature(sample_data) assert "1" in temperatures assert "2" in temperatures assert 0.0 <= temperatures["1"] <= 1.0 assert 0.0 <= temperatures["2"] <= 1.0 def test_analyze_correlation(self): """Test correlation analysis.""" analyzer = ThreadAnalyzer() thread_lengths = {"1": 10, "2": 5, "3": 20} temperatures = {"1": 0.3, "2": 0.5, "3": 0.7} results = analyzer.analyze_correlation(thread_lengths, temperatures) assert "correlation" in results assert "p_value" in results assert "significant" in results assert "sample_size" in results assert -1.0 <= results["correlation"] <= 1.0 assert 0.0 <= results["p_value"] <= 1.0 def test_analyze_correlation_insufficient_data(self): """Test correlation with insufficient data.""" analyzer = ThreadAnalyzer() thread_lengths = {"1": 10} temperatures = {"2": 0.5} # No overlap results = analyzer.analyze_correlation(thread_lengths, temperatures) assert results.get("sample_size", 0) < 2 assert "error" in results or results.get("correlation", 0) == 0.0 def test_analyze_from_dataframe(self): """Test analyzing from DataFrame.""" df = pd.DataFrame({ "news_id": ["1", "1", "2"], "text": ["Отлично!", "Ужасно!", "Нормально"] }) analyzer = ThreadAnalyzer() thread_stats, correlation = analyzer.analyze_from_dataframe(df) assert isinstance(thread_stats, pd.DataFrame) assert "news_id" in thread_stats.columns assert "thread_length" in thread_stats.columns assert "temperature" in thread_stats.columns assert isinstance(correlation, dict) assert "correlation" in correlation class TestAnalyticsAPI: """Test suite for analytics API endpoints.""" @pytest.fixture def client(self): """Create test client.""" from fastapi.testclient import TestClient from api.main import app return TestClient(app) def test_predictive_intervals_endpoint(self, client): """Test predictive intervals endpoint.""" request_data = { "data": [ {"id": "item1", "positive_count": 80, "negative_count": 20, "neutral_count": 0}, {"id": "item2", "positive_count": 1, "negative_count": 0, "neutral_count": 0} ], "confidence_level": 0.95 } response = client.post( "/analytics/predictive-intervals", json=request_data ) assert response.status_code in [200, 500] # May fail if model not loaded if response.status_code == 200: data = response.json() assert "ranked_data" in data assert "top_positive" in data assert "top_negative" in data def test_category_sentiment_endpoint(self, client): """Test category sentiment endpoint.""" request_data = { "data": [ {"category": "politics", "text": "Отлично!"}, {"category": "politics", "text": "Ужасно!"}, {"category": "economy", "text": "Нормально"} ] } response = client.post( "/analytics/category-sentiment", json=request_data ) # May fail if model not loaded assert response.status_code in [200, 500] if response.status_code == 200: data = response.json() assert "category_stats" in data assert "top_positive_categories" in data assert "top_negative_categories" in data def test_thread_analysis_endpoint(self, client): """Test thread analysis endpoint.""" request_data = { "data": [ {"news_id": "1", "text": "Отлично!"}, {"news_id": "1", "text": "Ужасно!"}, {"news_id": "2", "text": "Нормально"} ] } response = client.post( "/analytics/thread-analysis", json=request_data ) # May fail if model not loaded assert response.status_code in [200, 500] if response.status_code == 200: data = response.json() assert "thread_stats" in data assert "correlation" in data assert "correlation" in data["correlation"] def test_analytics_health(self, client): """Test analytics health endpoint.""" response = client.get("/analytics/health") assert response.status_code == 200 data = response.json() assert "status" in data