Spaces:
Running
Running
| """ | |
| Unit Tests for Monitoring Module | |
| """ | |
| from pathlib import Path | |
| import tempfile | |
| import numpy as np | |
| import pytest | |
| from turing.monitoring.baseline_manager import ( | |
| BaselineManager, | |
| extract_baseline_statistics, | |
| ) | |
| from turing.monitoring.drift_detector import DriftDetector | |
| from turing.monitoring.synthetic_data_generator import SyntheticDataGenerator | |
| class TestBaselineExtraction: | |
| """Tests for baseline statistics extraction.""" | |
| def sample_data(self): | |
| texts = [ | |
| "This is a sample comment", | |
| "Another test comment here", | |
| "Short text", | |
| "Longer comment with more information", | |
| "Medium length comment", | |
| ] | |
| labels = np.array([[1, 0, 1, 0, 0], [0, 1, 0, 1, 0], [1, 1, 0, 0, 0], [0, 0, 1, 1, 1], [1, 0, 0, 0, 1]]) | |
| return texts, labels | |
| def test_extract_baseline(self, sample_data): | |
| texts, labels = sample_data | |
| baseline = extract_baseline_statistics(X_train=texts, y_train=labels, language="java") | |
| assert "text_length_distribution" in baseline | |
| assert "word_count_distribution" in baseline | |
| assert baseline["language"] == "java" | |
| assert baseline["num_samples"] == len(texts) | |
| class TestDriftDetector: | |
| """Tests for drift detection.""" | |
| def baseline(self): | |
| return { | |
| "text_length_distribution": np.array([20, 25, 30, 35]), | |
| "word_count_distribution": np.array([3, 4, 5, 6]), | |
| "label_counts": np.array([5, 3, 2, 4]), | |
| } | |
| def test_detector_init(self): | |
| detector = DriftDetector(p_value_threshold=0.05, alert_threshold=0.01) | |
| assert detector.p_value_threshold == 0.05 | |
| def test_text_length_drift(self, baseline): | |
| detector = DriftDetector(p_value_threshold=0.05) | |
| prod_texts = [ | |
| "Very long test comment with lots of additional information", | |
| "Another extremely long sample text", | |
| "Yet another quite lengthy comment", | |
| "More long production text", | |
| ] | |
| ref_texts = [text[:len(text)//2] for text in prod_texts] # Shorter reference texts | |
| result = detector.detect_text_property_drift(prod_texts, ref_texts) | |
| assert "drifted" in result | |
| assert "method" in result | |
| class TestSyntheticDataGenerator: | |
| """Tests for synthetic data generation.""" | |
| def sample_data(self): | |
| texts = ["This is a sample", "Another test", "Short", "Longer text"] | |
| labels = np.array([0, 1, 0, 1]) | |
| return texts, labels | |
| def test_generator_init(self): | |
| gen = SyntheticDataGenerator(seed=42) | |
| assert gen.seed == 42 | |
| def test_generate_short(self, sample_data): | |
| texts, labels = sample_data | |
| gen = SyntheticDataGenerator(seed=42) | |
| short = gen.generate_short_comments(texts, ratio=0.5, n_samples=10) | |
| assert len(short) == 10 | |
| assert np.mean([len(t) for t in short]) < np.mean([len(t) for t in texts]) | |
| class TestBaselineManager: | |
| """Tests for baseline management.""" | |
| def temp_dir(self): | |
| with tempfile.TemporaryDirectory() as tmpdir: | |
| yield Path(tmpdir) | |
| def test_save_and_load(self, temp_dir): | |
| manager = BaselineManager(mlflow_enabled=False, local_cache_dir=temp_dir) | |
| baseline = { | |
| "text_length_distribution": [10, 20, 30], | |
| "label_counts": [5, 3], | |
| "language": "java", | |
| "num_samples": 3, | |
| } | |
| manager.save_baseline(baseline, "java", "test", "model") | |
| loaded = manager.load_baseline("java", "test", "model") | |
| assert loaded["language"] == "java" | |
| assert loaded["num_samples"] == 3 | |
| if __name__ == "__main__": | |
| pytest.main([__file__, "-v"]) | |