"""
Tests for Anomaly Detection Module

Comprehensive test suite for anomaly detector.
"""

import pytest
import asyncio
from typing import List, Dict, Any

import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(__file__))))

from src.models.anomaly_detection import AnomalyDetector


class TestAnomalyDetector:
    """Test suite for AnomalyDetector."""
    
    @pytest.fixture
    def detector(self):
        """Create anomaly detector instance."""
        return AnomalyDetector()
    
    @pytest.fixture
    def sample_contracts(self):
        """Sample contract data for testing."""
        return [
            {
                "id": "CT001",
                "description": "Aquisição de computadores",
                "value": 50000.0,
                "supplier": "Tech Company A",
                "date": "2024-01-15",
                "organ": "Ministry of Education"
            },
            {
                "id": "CT002",
                "description": "Aquisição de computadores",
                "value": 500000.0,  # Anomaly: 10x higher
                "supplier": "Tech Company B",
                "date": "2024-01-20",
                "organ": "Ministry of Education"
            },
            {
                "id": "CT003",
                "description": "Serviços de consultoria",
                "value": 75000.0,
                "supplier": "Consulting Inc",
                "date": "2024-02-01",
                "organ": "Ministry of Health"
            }
        ]
    
    def test_detector_initialization(self, detector):
        """Test detector is properly initialized."""
        assert detector is not None
        assert detector.model_name == "anomaly_detector"
        assert hasattr(detector, '_thresholds')
        assert detector._thresholds['value_threshold'] == 1000000
    
    def test_detector_training(self, detector, sample_contracts):
        """Test detector training process."""
        # Run training
        result = asyncio.run(detector.train(sample_contracts))
        
        assert result['status'] == 'trained'
        assert result['samples'] == len(sample_contracts)
        assert result['model'] == 'anomaly_detector'
        assert detector._is_trained is True
    
    def test_anomaly_detection_high_value(self, detector, sample_contracts):
        """Test detection of high value anomalies."""
        # Train first
        asyncio.run(detector.train(sample_contracts))
        
        # Run prediction
        results = asyncio.run(detector.predict(sample_contracts))
        
        # Should detect high value anomaly
        assert len(results) > 0
        
        # Find the high value contract
        high_value_result = next(
            (r for r in results if r['contract_id'] == 'CT002'),
            None
        )
        
        assert high_value_result is not None
        assert high_value_result['is_anomaly'] is True
        assert high_value_result['anomaly_type'] == 'high_value'
        assert high_value_result['confidence'] > 0.8
    
    def test_anomaly_detection_frequency(self, detector):
        """Test detection of frequency anomalies."""
        # Create contracts with same supplier
        contracts = [
            {
                "id": f"CT{i:03d}",
                "description": "Service contract",
                "value": 50000.0,
                "supplier": "Same Supplier LLC",  # All same supplier
                "date": f"2024-01-{i+1:02d}",
                "organ": "Ministry X"
            }
            for i in range(15)  # 15 contracts to same supplier
        ]
        
        # Add one normal contract
        contracts.append({
            "id": "CT999",
            "description": "Different service",
            "value": 45000.0,
            "supplier": "Other Company",
            "date": "2024-02-01",
            "organ": "Ministry X"
        })
        
        # Train and predict
        asyncio.run(detector.train(contracts))
        results = asyncio.run(detector.predict(contracts))
        
        # Should detect frequency anomaly
        frequency_anomalies = [
            r for r in results 
            if r.get('anomaly_type') == 'suspicious_frequency'
        ]
        
        assert len(frequency_anomalies) > 0
        assert frequency_anomalies[0]['supplier'] == 'Same Supplier LLC'
    
    def test_no_anomalies_normal_data(self, detector):
        """Test no anomalies detected in normal data."""
        # Create normal contracts
        normal_contracts = [
            {
                "id": f"CT{i:03d}",
                "description": f"Service type {i % 3}",
                "value": 50000.0 + (i * 1000),  # Small variations
                "supplier": f"Company {chr(65 + i % 5)}",  # 5 different suppliers
                "date": f"2024-01-{(i % 28) + 1:02d}",
                "organ": f"Ministry {i % 3}"
            }
            for i in range(20)
        ]
        
        # Train and predict
        asyncio.run(detector.train(normal_contracts))
        results = asyncio.run(detector.predict(normal_contracts))
        
        # Should have few or no anomalies
        anomalies = [r for r in results if r.get('is_anomaly', False)]
        assert len(anomalies) < 3  # Less than 15% anomalies
    
    def test_empty_data_handling(self, detector):
        """Test handling of empty data."""
        # Train with empty data
        result = asyncio.run(detector.train([]))
        assert result['status'] == 'trained'
        assert result['samples'] == 0
        
        # Predict with empty data
        results = asyncio.run(detector.predict([]))
        assert results == []
    
    def test_invalid_data_handling(self, detector):
        """Test handling of invalid data."""
        invalid_contracts = [
            {"id": "CT001"},  # Missing required fields
            {"id": "CT002", "value": "not_a_number"},  # Invalid type
            None,  # Null entry
        ]
        
        # Should handle gracefully
        try:
            asyncio.run(detector.train(invalid_contracts))
            results = asyncio.run(detector.predict(invalid_contracts))
            # Should either skip invalid entries or return empty
            assert isinstance(results, list)
        except Exception as e:
            # Should raise meaningful error
            assert "invalid" in str(e).lower() or "error" in str(e).lower()
    
    def test_threshold_configuration(self):
        """Test custom threshold configuration."""
        # Create detector with custom thresholds
        custom_detector = AnomalyDetector()
        custom_detector._thresholds = {
            "value_threshold": 100000,  # Lower threshold
            "frequency_threshold": 5,    # Lower frequency
            "pattern_threshold": 0.9     # Higher pattern threshold
        }
        
        assert custom_detector._thresholds['value_threshold'] == 100000
        assert custom_detector._thresholds['frequency_threshold'] == 5
        assert custom_detector._thresholds['pattern_threshold'] == 0.9
    
    @pytest.mark.parametrize("num_contracts,expected_performance", [
        (10, 0.1),      # 10 contracts should process in < 0.1s
        (100, 0.5),     # 100 contracts should process in < 0.5s
        (1000, 2.0),    # 1000 contracts should process in < 2s
    ])
    def test_performance(self, detector, num_contracts, expected_performance):
        """Test performance with different data sizes."""
        import time
        
        # Generate test data
        contracts = [
            {
                "id": f"CT{i:06d}",
                "description": f"Contract {i}",
                "value": 50000.0 + (i * 100),
                "supplier": f"Company {i % 20}",
                "date": f"2024-01-{(i % 28) + 1:02d}",
                "organ": f"Ministry {i % 5}"
            }
            for i in range(num_contracts)
        ]
        
        # Measure prediction time
        asyncio.run(detector.train(contracts[:100]))  # Train on subset
        
        start_time = time.time()
        results = asyncio.run(detector.predict(contracts))
        elapsed_time = time.time() - start_time
        
        assert elapsed_time < expected_performance
        assert len(results) <= len(contracts)


@pytest.mark.asyncio
class TestAsyncAnomalyDetector:
    """Async test suite for AnomalyDetector."""
    
    async def test_concurrent_predictions(self):
        """Test concurrent prediction requests."""
        detector = AnomalyDetector()
        
        # Create multiple contract sets
        contract_sets = [
            [
                {
                    "id": f"SET{set_id}-CT{i:03d}",
                    "description": f"Contract {i}",
                    "value": 50000.0 * (set_id + 1),
                    "supplier": f"Company {i}",
                    "date": "2024-01-15",
                    "organ": f"Ministry {set_id}"
                }
                for i in range(10)
            ]
            for set_id in range(5)
        ]
        
        # Train detector
        await detector.train(contract_sets[0])
        
        # Run concurrent predictions
        tasks = [
            detector.predict(contracts)
            for contracts in contract_sets
        ]
        
        results = await asyncio.gather(*tasks)
        
        # All should complete successfully
        assert len(results) == 5
        for result in results:
            assert isinstance(result, list)
    
    async def test_model_state_persistence(self):
        """Test model state is maintained across predictions."""
        detector = AnomalyDetector()
        
        # Initial training
        train_data = [
            {
                "id": f"CT{i:03d}",
                "description": "Initial contract",
                "value": 100000.0,
                "supplier": f"Company {i}",
                "date": "2024-01-01",
                "organ": "Ministry A"
            }
            for i in range(50)
        ]
        
        await detector.train(train_data)
        assert detector._is_trained is True
        
        # Multiple predictions shouldn't affect trained state
        for _ in range(10):
            await detector.predict(train_data[:10])
            assert detector._is_trained is True