Spaces:
Sleeping
Sleeping
| # test_data_validation_service.py | |
| """ | |
| Tests for Data Validation and Integrity Service. | |
| Tests validation of verification records, accuracy calculations, data integrity checksums, | |
| duplicate detection, and final session validation. | |
| Requirements: 11.1, 11.2, 11.3, 11.4, 11.5 | |
| """ | |
| import pytest | |
| from datetime import datetime, timedelta | |
| from unittest.mock import Mock, patch | |
| from src.core.data_validation_service import ( | |
| DataValidationService, ValidationResult, IntegrityChecksum, DuplicateDetectionResult | |
| ) | |
| from src.core.verification_models import ( | |
| VerificationRecord, VerificationSession, EnhancedVerificationSession, TestMessage | |
| ) | |
| class TestDataValidationService: | |
| """Test suite for DataValidationService.""" | |
| def setup_method(self): | |
| """Set up test fixtures.""" | |
| self.validation_service = DataValidationService() | |
| # Create valid test data | |
| self.valid_record = VerificationRecord( | |
| message_id="test_001", | |
| original_message="Patient expressing spiritual distress", | |
| classifier_decision="yellow", | |
| classifier_confidence=0.75, | |
| classifier_indicators=["spiritual", "distress"], | |
| ground_truth_label="yellow", | |
| verifier_notes="Correctly identified", | |
| is_correct=True, | |
| timestamp=datetime.now() | |
| ) | |
| self.valid_session = VerificationSession( | |
| session_id="session_001", | |
| verifier_name="Dr. Test", | |
| dataset_id="dataset_001", | |
| dataset_name="Test Dataset", | |
| created_at=datetime.now(), | |
| total_messages=2, | |
| verified_count=2, | |
| correct_count=1, | |
| incorrect_count=1, | |
| verifications=[ | |
| self.valid_record, | |
| VerificationRecord( | |
| message_id="test_002", | |
| original_message="Patient feeling hopeful", | |
| classifier_decision="green", | |
| classifier_confidence=0.85, | |
| classifier_indicators=["hopeful"], | |
| ground_truth_label="red", | |
| verifier_notes="Misclassified", | |
| is_correct=False, | |
| timestamp=datetime.now() | |
| ) | |
| ], | |
| is_complete=False | |
| ) | |
| def test_validate_verification_record_valid(self): | |
| """Test validation of a valid verification record.""" | |
| result = self.validation_service.validate_verification_record(self.valid_record) | |
| assert result.is_valid | |
| assert len(result.errors) == 0 | |
| assert "validation_timestamp" in result.metadata | |
| assert result.metadata["record_id"] == "test_001" | |
| def test_validate_verification_record_missing_fields(self): | |
| """Test validation fails for missing required fields.""" | |
| # Create record with missing required field by setting it to None after creation | |
| invalid_record = VerificationRecord( | |
| message_id="test_001", | |
| original_message="Test message", | |
| classifier_decision="green", | |
| classifier_confidence=0.8, | |
| classifier_indicators=[], | |
| ground_truth_label="green", | |
| verifier_notes="", | |
| is_correct=True | |
| ) | |
| # Manually set timestamp to None to simulate missing field | |
| invalid_record.timestamp = None | |
| result = self.validation_service.validate_verification_record(invalid_record) | |
| assert not result.is_valid | |
| assert any("timestamp" in error for error in result.errors) | |
| def test_validate_verification_record_invalid_constraints(self): | |
| """Test validation fails for constraint violations.""" | |
| # Create record with invalid confidence | |
| invalid_record = VerificationRecord( | |
| message_id="test_001", | |
| original_message="Test message", | |
| classifier_decision="green", | |
| classifier_confidence=1.5, # Invalid: > 1.0 | |
| classifier_indicators=[], | |
| ground_truth_label="green", | |
| verifier_notes="", | |
| is_correct=True, | |
| timestamp=datetime.now() | |
| ) | |
| result = self.validation_service.validate_verification_record(invalid_record) | |
| assert not result.is_valid | |
| assert any("classifier_confidence" in error for error in result.errors) | |
| def test_validate_verification_record_logical_inconsistency(self): | |
| """Test validation detects logical inconsistencies.""" | |
| # Create record where is_correct doesn't match decision comparison | |
| inconsistent_record = VerificationRecord( | |
| message_id="test_001", | |
| original_message="Test message", | |
| classifier_decision="green", | |
| classifier_confidence=0.8, | |
| classifier_indicators=[], | |
| ground_truth_label="red", | |
| verifier_notes="", | |
| is_correct=True, # Should be False since green != red | |
| timestamp=datetime.now() | |
| ) | |
| result = self.validation_service.validate_verification_record(inconsistent_record) | |
| assert not result.is_valid | |
| assert any("is_correct" in error for error in result.errors) | |
| def test_validate_verification_session_valid(self): | |
| """Test validation of a valid verification session.""" | |
| result = self.validation_service.validate_verification_session(self.valid_session) | |
| assert result.is_valid | |
| assert len(result.errors) == 0 | |
| assert "validation_timestamp" in result.metadata | |
| assert result.metadata["session_id"] == "session_001" | |
| def test_validate_verification_session_count_mismatch(self): | |
| """Test validation detects count mismatches.""" | |
| # Create session with incorrect counts | |
| invalid_session = VerificationSession( | |
| session_id="session_001", | |
| verifier_name="Dr. Test", | |
| dataset_id="dataset_001", | |
| dataset_name="Test Dataset", | |
| created_at=datetime.now(), | |
| total_messages=2, | |
| verified_count=3, # Incorrect: should be 2 | |
| correct_count=1, | |
| incorrect_count=1, | |
| verifications=self.valid_session.verifications, | |
| is_complete=False | |
| ) | |
| result = self.validation_service.validate_verification_session(invalid_session) | |
| assert not result.is_valid | |
| # Check for either verification_count_mismatch or count_consistency error | |
| error_messages = " ".join(result.errors) | |
| assert "Verified count" in error_messages and ("doesn't equal" in error_messages or "doesn't match" in error_messages) | |
| def test_verify_accuracy_calculations_valid(self): | |
| """Test accuracy calculation verification for valid session.""" | |
| result = self.validation_service.verify_accuracy_calculations(self.valid_session) | |
| assert result.is_valid | |
| assert len(result.errors) == 0 | |
| assert "expected_verified_count" in result.metadata | |
| assert result.metadata["expected_verified_count"] == 2 | |
| assert result.metadata["expected_correct_count"] == 1 | |
| assert result.metadata["expected_incorrect_count"] == 1 | |
| def test_verify_accuracy_calculations_mismatch(self): | |
| """Test accuracy calculation verification detects mismatches.""" | |
| # Create session with incorrect counts | |
| invalid_session = VerificationSession( | |
| session_id="session_001", | |
| verifier_name="Dr. Test", | |
| dataset_id="dataset_001", | |
| dataset_name="Test Dataset", | |
| created_at=datetime.now(), | |
| total_messages=2, | |
| verified_count=2, | |
| correct_count=2, # Incorrect: should be 1 | |
| incorrect_count=0, # Incorrect: should be 1 | |
| verifications=self.valid_session.verifications, | |
| is_complete=False | |
| ) | |
| result = self.validation_service.verify_accuracy_calculations(invalid_session) | |
| assert not result.is_valid | |
| # Check for either specific count errors or general mismatch errors | |
| error_messages = " ".join(result.errors) | |
| assert "Correct count mismatch" in error_messages or "Incorrect count mismatch" in error_messages | |
| def test_generate_data_integrity_checksum(self): | |
| """Test data integrity checksum generation.""" | |
| checksum = self.validation_service.generate_data_integrity_checksum(self.valid_session) | |
| assert isinstance(checksum, IntegrityChecksum) | |
| assert checksum.checksum_type == "sha256" | |
| assert len(checksum.checksum_value) == 64 # SHA256 hex length | |
| assert checksum.data_size > 0 | |
| assert isinstance(checksum.timestamp, datetime) | |
| def test_validate_data_integrity_valid(self): | |
| """Test data integrity validation with matching checksum.""" | |
| # Generate checksum for original data | |
| original_checksum = self.validation_service.generate_data_integrity_checksum(self.valid_session) | |
| # Validate against same data | |
| result = self.validation_service.validate_data_integrity(self.valid_session, original_checksum) | |
| assert result.is_valid | |
| assert len(result.errors) == 0 | |
| assert result.metadata["expected_checksum"] == original_checksum.checksum_value | |
| def test_validate_data_integrity_mismatch(self): | |
| """Test data integrity validation with mismatched checksum.""" | |
| # Generate checksum for original data | |
| original_checksum = self.validation_service.generate_data_integrity_checksum(self.valid_session) | |
| # Modify session data significantly | |
| modified_session = VerificationSession( | |
| session_id="modified_session", | |
| verifier_name="Different Verifier", # Changed | |
| dataset_id="different_dataset", # Changed | |
| dataset_name="Different Dataset", # Changed | |
| created_at=self.valid_session.created_at, | |
| total_messages=self.valid_session.total_messages, | |
| verified_count=self.valid_session.verified_count, | |
| correct_count=self.valid_session.correct_count, | |
| incorrect_count=self.valid_session.incorrect_count, | |
| verifications=self.valid_session.verifications, | |
| is_complete=self.valid_session.is_complete | |
| ) | |
| # Validate modified data against original checksum | |
| result = self.validation_service.validate_data_integrity(modified_session, original_checksum) | |
| assert not result.is_valid | |
| error_messages = " ".join(result.errors) | |
| assert "Data integrity checksum mismatch" in error_messages | |
| def test_detect_duplicate_test_cases_no_duplicates(self): | |
| """Test duplicate detection with no duplicates.""" | |
| test_cases = [ | |
| TestMessage("msg_001", "Patient expressing spiritual distress", "yellow"), | |
| TestMessage("msg_002", "Patient feeling hopeful and positive", "green"), | |
| TestMessage("msg_003", "Patient experiencing severe anxiety", "red") | |
| ] | |
| result = self.validation_service.detect_duplicate_test_cases(test_cases) | |
| assert isinstance(result, DuplicateDetectionResult) | |
| assert result.duplicates_found == 0 | |
| assert len(result.duplicate_groups) == 0 | |
| def test_detect_duplicate_test_cases_exact_duplicates(self): | |
| """Test duplicate detection with exact text matches.""" | |
| test_cases = [ | |
| TestMessage("msg_001", "Patient expressing spiritual distress", "yellow"), | |
| TestMessage("msg_002", "Patient expressing spiritual distress", "yellow"), # Exact duplicate | |
| TestMessage("msg_003", "Patient feeling hopeful", "green") | |
| ] | |
| result = self.validation_service.detect_duplicate_test_cases(test_cases) | |
| assert result.duplicates_found == 1 | |
| assert len(result.duplicate_groups) == 1 | |
| assert len(result.duplicate_groups[0]) == 2 | |
| assert "msg_001" in result.duplicate_groups[0] | |
| assert "msg_002" in result.duplicate_groups[0] | |
| def test_detect_duplicate_test_cases_similar_duplicates(self): | |
| """Test duplicate detection with similar text.""" | |
| test_cases = [ | |
| TestMessage("msg_001", "Patient expressing spiritual distress and anxiety", "yellow"), | |
| TestMessage("msg_002", "Patient expressing anxiety and spiritual distress", "yellow"), # Similar | |
| TestMessage("msg_003", "Patient feeling completely different emotions", "green") | |
| ] | |
| result = self.validation_service.detect_duplicate_test_cases(test_cases, similarity_threshold=0.8) | |
| assert result.duplicates_found == 1 | |
| assert len(result.duplicate_groups) == 1 | |
| def test_validate_test_message_valid(self): | |
| """Test validation of a valid test message.""" | |
| test_message = TestMessage("msg_001", "Patient expressing spiritual distress", "yellow") | |
| result = self.validation_service.validate_test_message(test_message) | |
| assert result.is_valid | |
| assert len(result.errors) == 0 | |
| def test_validate_test_message_invalid(self): | |
| """Test validation of invalid test message.""" | |
| # Create message with invalid classification | |
| test_message = TestMessage("msg_001", "Patient expressing distress", "invalid_color") | |
| result = self.validation_service.validate_test_message(test_message) | |
| assert not result.is_valid | |
| assert any("pre_classified_label" in error for error in result.errors) | |
| def test_perform_final_session_validation_valid(self): | |
| """Test final session validation for valid session.""" | |
| result = self.validation_service.perform_final_session_validation(self.valid_session) | |
| assert result.is_valid | |
| assert "validation_timestamp" in result.metadata | |
| assert "integrity_checksum" in result.metadata | |
| assert "data_quality_score" in result.metadata | |
| def test_perform_final_session_validation_with_issues(self): | |
| """Test final session validation detects issues.""" | |
| # Create session with validation issues | |
| invalid_session = VerificationSession( | |
| session_id="session_001", | |
| verifier_name="Dr. Test", | |
| dataset_id="dataset_001", | |
| dataset_name="Test Dataset", | |
| created_at=datetime.now(), | |
| total_messages=2, | |
| verified_count=3, # Incorrect count | |
| correct_count=2, # Incorrect count | |
| incorrect_count=0, # Incorrect count | |
| verifications=self.valid_session.verifications, | |
| is_complete=False | |
| ) | |
| result = self.validation_service.perform_final_session_validation(invalid_session) | |
| assert not result.is_valid | |
| assert len(result.errors) > 0 | |
| def test_data_quality_score_calculation(self): | |
| """Test data quality score calculation.""" | |
| # Test with perfect session | |
| result = self.validation_service.perform_final_session_validation(self.valid_session) | |
| quality_score = result.metadata.get("data_quality_score", 0) | |
| assert 0 <= quality_score <= 100 | |
| assert quality_score > 90 # Should be high for valid session | |
| def test_text_similarity_calculation(self): | |
| """Test text similarity calculation.""" | |
| # Test identical texts | |
| similarity = self.validation_service._calculate_text_similarity( | |
| "Patient expressing spiritual distress", | |
| "Patient expressing spiritual distress" | |
| ) | |
| assert similarity == 1.0 | |
| # Test completely different texts | |
| similarity = self.validation_service._calculate_text_similarity( | |
| "Patient expressing spiritual distress", | |
| "Weather is sunny today" | |
| ) | |
| assert similarity < 0.5 | |
| # Test similar texts | |
| similarity = self.validation_service._calculate_text_similarity( | |
| "Patient expressing spiritual distress and anxiety", | |
| "Patient expressing anxiety and spiritual distress" | |
| ) | |
| assert similarity > 0.8 | |
| def validation_service(): | |
| """Fixture for DataValidationService.""" | |
| return DataValidationService() | |
| def sample_verification_record(): | |
| """Fixture for a sample verification record.""" | |
| return VerificationRecord( | |
| message_id="test_001", | |
| original_message="Patient expressing spiritual distress", | |
| classifier_decision="yellow", | |
| classifier_confidence=0.75, | |
| classifier_indicators=["spiritual", "distress"], | |
| ground_truth_label="yellow", | |
| verifier_notes="Correctly identified", | |
| is_correct=True, | |
| timestamp=datetime.now() | |
| ) | |
| def sample_verification_session(sample_verification_record): | |
| """Fixture for a sample verification session.""" | |
| return VerificationSession( | |
| session_id="session_001", | |
| verifier_name="Dr. Test", | |
| dataset_id="dataset_001", | |
| dataset_name="Test Dataset", | |
| created_at=datetime.now(), | |
| total_messages=1, | |
| verified_count=1, | |
| correct_count=1, | |
| incorrect_count=0, | |
| verifications=[sample_verification_record], | |
| is_complete=False | |
| ) |