Spaces:
Sleeping
Sleeping
| """ | |
| Tests for Anomaly Detection Module | |
| Comprehensive test suite for anomaly detector. | |
| """ | |
| import pytest | |
| import asyncio | |
| from typing import List, Dict, Any | |
| import sys | |
| import os | |
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) | |
| from src.models.anomaly_detection import AnomalyDetector | |
| class TestAnomalyDetector: | |
| """Test suite for AnomalyDetector.""" | |
| def detector(self): | |
| """Create anomaly detector instance.""" | |
| return AnomalyDetector() | |
| def sample_contracts(self): | |
| """Sample contract data for testing.""" | |
| return [ | |
| { | |
| "id": "CT001", | |
| "description": "Aquisição de computadores", | |
| "value": 50000.0, | |
| "supplier": "Tech Company A", | |
| "date": "2024-01-15", | |
| "organ": "Ministry of Education" | |
| }, | |
| { | |
| "id": "CT002", | |
| "description": "Aquisição de computadores", | |
| "value": 500000.0, # Anomaly: 10x higher | |
| "supplier": "Tech Company B", | |
| "date": "2024-01-20", | |
| "organ": "Ministry of Education" | |
| }, | |
| { | |
| "id": "CT003", | |
| "description": "Serviços de consultoria", | |
| "value": 75000.0, | |
| "supplier": "Consulting Inc", | |
| "date": "2024-02-01", | |
| "organ": "Ministry of Health" | |
| } | |
| ] | |
| def test_detector_initialization(self, detector): | |
| """Test detector is properly initialized.""" | |
| assert detector is not None | |
| assert detector.model_name == "anomaly_detector" | |
| assert hasattr(detector, '_thresholds') | |
| assert detector._thresholds['value_threshold'] == 1000000 | |
| def test_detector_training(self, detector, sample_contracts): | |
| """Test detector training process.""" | |
| # Run training | |
| result = asyncio.run(detector.train(sample_contracts)) | |
| assert result['status'] == 'trained' | |
| assert result['samples'] == len(sample_contracts) | |
| assert result['model'] == 'anomaly_detector' | |
| assert detector._is_trained is True | |
| def test_anomaly_detection_high_value(self, detector, sample_contracts): | |
| """Test detection of high value anomalies.""" | |
| # Train first | |
| asyncio.run(detector.train(sample_contracts)) | |
| # Run prediction | |
| results = asyncio.run(detector.predict(sample_contracts)) | |
| # Should detect high value anomaly | |
| assert len(results) > 0 | |
| # Find the high value contract | |
| high_value_result = next( | |
| (r for r in results if r['contract_id'] == 'CT002'), | |
| None | |
| ) | |
| assert high_value_result is not None | |
| assert high_value_result['is_anomaly'] is True | |
| assert high_value_result['anomaly_type'] == 'high_value' | |
| assert high_value_result['confidence'] > 0.8 | |
| def test_anomaly_detection_frequency(self, detector): | |
| """Test detection of frequency anomalies.""" | |
| # Create contracts with same supplier | |
| contracts = [ | |
| { | |
| "id": f"CT{i:03d}", | |
| "description": "Service contract", | |
| "value": 50000.0, | |
| "supplier": "Same Supplier LLC", # All same supplier | |
| "date": f"2024-01-{i+1:02d}", | |
| "organ": "Ministry X" | |
| } | |
| for i in range(15) # 15 contracts to same supplier | |
| ] | |
| # Add one normal contract | |
| contracts.append({ | |
| "id": "CT999", | |
| "description": "Different service", | |
| "value": 45000.0, | |
| "supplier": "Other Company", | |
| "date": "2024-02-01", | |
| "organ": "Ministry X" | |
| }) | |
| # Train and predict | |
| asyncio.run(detector.train(contracts)) | |
| results = asyncio.run(detector.predict(contracts)) | |
| # Should detect frequency anomaly | |
| frequency_anomalies = [ | |
| r for r in results | |
| if r.get('anomaly_type') == 'suspicious_frequency' | |
| ] | |
| assert len(frequency_anomalies) > 0 | |
| assert frequency_anomalies[0]['supplier'] == 'Same Supplier LLC' | |
| def test_no_anomalies_normal_data(self, detector): | |
| """Test no anomalies detected in normal data.""" | |
| # Create normal contracts | |
| normal_contracts = [ | |
| { | |
| "id": f"CT{i:03d}", | |
| "description": f"Service type {i % 3}", | |
| "value": 50000.0 + (i * 1000), # Small variations | |
| "supplier": f"Company {chr(65 + i % 5)}", # 5 different suppliers | |
| "date": f"2024-01-{(i % 28) + 1:02d}", | |
| "organ": f"Ministry {i % 3}" | |
| } | |
| for i in range(20) | |
| ] | |
| # Train and predict | |
| asyncio.run(detector.train(normal_contracts)) | |
| results = asyncio.run(detector.predict(normal_contracts)) | |
| # Should have few or no anomalies | |
| anomalies = [r for r in results if r.get('is_anomaly', False)] | |
| assert len(anomalies) < 3 # Less than 15% anomalies | |
| def test_empty_data_handling(self, detector): | |
| """Test handling of empty data.""" | |
| # Train with empty data | |
| result = asyncio.run(detector.train([])) | |
| assert result['status'] == 'trained' | |
| assert result['samples'] == 0 | |
| # Predict with empty data | |
| results = asyncio.run(detector.predict([])) | |
| assert results == [] | |
| def test_invalid_data_handling(self, detector): | |
| """Test handling of invalid data.""" | |
| invalid_contracts = [ | |
| {"id": "CT001"}, # Missing required fields | |
| {"id": "CT002", "value": "not_a_number"}, # Invalid type | |
| None, # Null entry | |
| ] | |
| # Should handle gracefully | |
| try: | |
| asyncio.run(detector.train(invalid_contracts)) | |
| results = asyncio.run(detector.predict(invalid_contracts)) | |
| # Should either skip invalid entries or return empty | |
| assert isinstance(results, list) | |
| except Exception as e: | |
| # Should raise meaningful error | |
| assert "invalid" in str(e).lower() or "error" in str(e).lower() | |
| def test_threshold_configuration(self): | |
| """Test custom threshold configuration.""" | |
| # Create detector with custom thresholds | |
| custom_detector = AnomalyDetector() | |
| custom_detector._thresholds = { | |
| "value_threshold": 100000, # Lower threshold | |
| "frequency_threshold": 5, # Lower frequency | |
| "pattern_threshold": 0.9 # Higher pattern threshold | |
| } | |
| assert custom_detector._thresholds['value_threshold'] == 100000 | |
| assert custom_detector._thresholds['frequency_threshold'] == 5 | |
| assert custom_detector._thresholds['pattern_threshold'] == 0.9 | |
| def test_performance(self, detector, num_contracts, expected_performance): | |
| """Test performance with different data sizes.""" | |
| import time | |
| # Generate test data | |
| contracts = [ | |
| { | |
| "id": f"CT{i:06d}", | |
| "description": f"Contract {i}", | |
| "value": 50000.0 + (i * 100), | |
| "supplier": f"Company {i % 20}", | |
| "date": f"2024-01-{(i % 28) + 1:02d}", | |
| "organ": f"Ministry {i % 5}" | |
| } | |
| for i in range(num_contracts) | |
| ] | |
| # Measure prediction time | |
| asyncio.run(detector.train(contracts[:100])) # Train on subset | |
| start_time = time.time() | |
| results = asyncio.run(detector.predict(contracts)) | |
| elapsed_time = time.time() - start_time | |
| assert elapsed_time < expected_performance | |
| assert len(results) <= len(contracts) | |
| class TestAsyncAnomalyDetector: | |
| """Async test suite for AnomalyDetector.""" | |
| async def test_concurrent_predictions(self): | |
| """Test concurrent prediction requests.""" | |
| detector = AnomalyDetector() | |
| # Create multiple contract sets | |
| contract_sets = [ | |
| [ | |
| { | |
| "id": f"SET{set_id}-CT{i:03d}", | |
| "description": f"Contract {i}", | |
| "value": 50000.0 * (set_id + 1), | |
| "supplier": f"Company {i}", | |
| "date": "2024-01-15", | |
| "organ": f"Ministry {set_id}" | |
| } | |
| for i in range(10) | |
| ] | |
| for set_id in range(5) | |
| ] | |
| # Train detector | |
| await detector.train(contract_sets[0]) | |
| # Run concurrent predictions | |
| tasks = [ | |
| detector.predict(contracts) | |
| for contracts in contract_sets | |
| ] | |
| results = await asyncio.gather(*tasks) | |
| # All should complete successfully | |
| assert len(results) == 5 | |
| for result in results: | |
| assert isinstance(result, list) | |
| async def test_model_state_persistence(self): | |
| """Test model state is maintained across predictions.""" | |
| detector = AnomalyDetector() | |
| # Initial training | |
| train_data = [ | |
| { | |
| "id": f"CT{i:03d}", | |
| "description": "Initial contract", | |
| "value": 100000.0, | |
| "supplier": f"Company {i}", | |
| "date": "2024-01-01", | |
| "organ": "Ministry A" | |
| } | |
| for i in range(50) | |
| ] | |
| await detector.train(train_data) | |
| assert detector._is_trained is True | |
| # Multiple predictions shouldn't affect trained state | |
| for _ in range(10): | |
| await detector.predict(train_data[:10]) | |
| assert detector._is_trained is True |