turing-space / turing /tests /unit /test_monitoring.py
github-actions[bot]
Sync turing folder from GitHub
38593e7
"""
Unit Tests for Monitoring Module
"""
from pathlib import Path
import tempfile
import numpy as np
import pytest
from turing.monitoring.baseline_manager import (
BaselineManager,
extract_baseline_statistics,
)
from turing.monitoring.drift_detector import DriftDetector
from turing.monitoring.synthetic_data_generator import SyntheticDataGenerator
class TestBaselineExtraction:
"""Tests for baseline statistics extraction."""
@pytest.fixture
def sample_data(self):
texts = [
"This is a sample comment",
"Another test comment here",
"Short text",
"Longer comment with more information",
"Medium length comment",
]
labels = np.array([[1, 0, 1, 0, 0], [0, 1, 0, 1, 0], [1, 1, 0, 0, 0], [0, 0, 1, 1, 1], [1, 0, 0, 0, 1]])
return texts, labels
def test_extract_baseline(self, sample_data):
texts, labels = sample_data
baseline = extract_baseline_statistics(X_train=texts, y_train=labels, language="java")
assert "text_length_distribution" in baseline
assert "word_count_distribution" in baseline
assert baseline["language"] == "java"
assert baseline["num_samples"] == len(texts)
class TestDriftDetector:
"""Tests for drift detection."""
@pytest.fixture
def baseline(self):
return {
"text_length_distribution": np.array([20, 25, 30, 35]),
"word_count_distribution": np.array([3, 4, 5, 6]),
"label_counts": np.array([5, 3, 2, 4]),
}
def test_detector_init(self):
detector = DriftDetector(p_value_threshold=0.05, alert_threshold=0.01)
assert detector.p_value_threshold == 0.05
def test_text_length_drift(self, baseline):
detector = DriftDetector(p_value_threshold=0.05)
prod_texts = [
"Very long test comment with lots of additional information",
"Another extremely long sample text",
"Yet another quite lengthy comment",
"More long production text",
]
ref_texts = [text[:len(text)//2] for text in prod_texts] # Shorter reference texts
result = detector.detect_text_property_drift(prod_texts, ref_texts)
assert "drifted" in result
assert "method" in result
class TestSyntheticDataGenerator:
"""Tests for synthetic data generation."""
@pytest.fixture
def sample_data(self):
texts = ["This is a sample", "Another test", "Short", "Longer text"]
labels = np.array([0, 1, 0, 1])
return texts, labels
def test_generator_init(self):
gen = SyntheticDataGenerator(seed=42)
assert gen.seed == 42
def test_generate_short(self, sample_data):
texts, labels = sample_data
gen = SyntheticDataGenerator(seed=42)
short = gen.generate_short_comments(texts, ratio=0.5, n_samples=10)
assert len(short) == 10
assert np.mean([len(t) for t in short]) < np.mean([len(t) for t in texts])
class TestBaselineManager:
"""Tests for baseline management."""
@pytest.fixture
def temp_dir(self):
with tempfile.TemporaryDirectory() as tmpdir:
yield Path(tmpdir)
def test_save_and_load(self, temp_dir):
manager = BaselineManager(mlflow_enabled=False, local_cache_dir=temp_dir)
baseline = {
"text_length_distribution": [10, 20, 30],
"label_counts": [5, 3],
"language": "java",
"num_samples": 3,
}
manager.save_baseline(baseline, "java", "test", "model")
loaded = manager.load_baseline("java", "test", "model")
assert loaded["language"] == "java"
assert loaded["num_samples"] == 3
if __name__ == "__main__":
pytest.main([__file__, "-v"])