""" Unit Tests for Monitoring Module """ from pathlib import Path import tempfile import numpy as np import pytest from turing.monitoring.baseline_manager import ( BaselineManager, extract_baseline_statistics, ) from turing.monitoring.drift_detector import DriftDetector from turing.monitoring.synthetic_data_generator import SyntheticDataGenerator class TestBaselineExtraction: """Tests for baseline statistics extraction.""" @pytest.fixture def sample_data(self): texts = [ "This is a sample comment", "Another test comment here", "Short text", "Longer comment with more information", "Medium length comment", ] labels = np.array([[1, 0, 1, 0, 0], [0, 1, 0, 1, 0], [1, 1, 0, 0, 0], [0, 0, 1, 1, 1], [1, 0, 0, 0, 1]]) return texts, labels def test_extract_baseline(self, sample_data): texts, labels = sample_data baseline = extract_baseline_statistics(X_train=texts, y_train=labels, language="java") assert "text_length_distribution" in baseline assert "word_count_distribution" in baseline assert baseline["language"] == "java" assert baseline["num_samples"] == len(texts) class TestDriftDetector: """Tests for drift detection.""" @pytest.fixture def baseline(self): return { "text_length_distribution": np.array([20, 25, 30, 35]), "word_count_distribution": np.array([3, 4, 5, 6]), "label_counts": np.array([5, 3, 2, 4]), } def test_detector_init(self): detector = DriftDetector(p_value_threshold=0.05, alert_threshold=0.01) assert detector.p_value_threshold == 0.05 def test_text_length_drift(self, baseline): detector = DriftDetector(p_value_threshold=0.05) prod_texts = [ "Very long test comment with lots of additional information", "Another extremely long sample text", "Yet another quite lengthy comment", "More long production text", ] ref_texts = [text[:len(text)//2] for text in prod_texts] # Shorter reference texts result = detector.detect_text_property_drift(prod_texts, ref_texts) assert "drifted" in result assert "method" in result class TestSyntheticDataGenerator: """Tests for synthetic data generation.""" @pytest.fixture def sample_data(self): texts = ["This is a sample", "Another test", "Short", "Longer text"] labels = np.array([0, 1, 0, 1]) return texts, labels def test_generator_init(self): gen = SyntheticDataGenerator(seed=42) assert gen.seed == 42 def test_generate_short(self, sample_data): texts, labels = sample_data gen = SyntheticDataGenerator(seed=42) short = gen.generate_short_comments(texts, ratio=0.5, n_samples=10) assert len(short) == 10 assert np.mean([len(t) for t in short]) < np.mean([len(t) for t in texts]) class TestBaselineManager: """Tests for baseline management.""" @pytest.fixture def temp_dir(self): with tempfile.TemporaryDirectory() as tmpdir: yield Path(tmpdir) def test_save_and_load(self, temp_dir): manager = BaselineManager(mlflow_enabled=False, local_cache_dir=temp_dir) baseline = { "text_length_distribution": [10, 20, 30], "label_counts": [5, 3], "language": "java", "num_samples": 3, } manager.save_baseline(baseline, "java", "test", "model") loaded = manager.load_baseline("java", "test", "model") assert loaded["language"] == "java" assert loaded["num_samples"] == 3 if __name__ == "__main__": pytest.main([__file__, "-v"])