# test_data_validation_service.py """ Tests for Data Validation and Integrity Service. Tests validation of verification records, accuracy calculations, data integrity checksums, duplicate detection, and final session validation. Requirements: 11.1, 11.2, 11.3, 11.4, 11.5 """ import pytest from datetime import datetime, timedelta from unittest.mock import Mock, patch from src.core.data_validation_service import ( DataValidationService, ValidationResult, IntegrityChecksum, DuplicateDetectionResult ) from src.core.verification_models import ( VerificationRecord, VerificationSession, EnhancedVerificationSession, TestMessage ) class TestDataValidationService: """Test suite for DataValidationService.""" def setup_method(self): """Set up test fixtures.""" self.validation_service = DataValidationService() # Create valid test data self.valid_record = VerificationRecord( message_id="test_001", original_message="Patient expressing spiritual distress", classifier_decision="yellow", classifier_confidence=0.75, classifier_indicators=["spiritual", "distress"], ground_truth_label="yellow", verifier_notes="Correctly identified", is_correct=True, timestamp=datetime.now() ) self.valid_session = VerificationSession( session_id="session_001", verifier_name="Dr. Test", dataset_id="dataset_001", dataset_name="Test Dataset", created_at=datetime.now(), total_messages=2, verified_count=2, correct_count=1, incorrect_count=1, verifications=[ self.valid_record, VerificationRecord( message_id="test_002", original_message="Patient feeling hopeful", classifier_decision="green", classifier_confidence=0.85, classifier_indicators=["hopeful"], ground_truth_label="red", verifier_notes="Misclassified", is_correct=False, timestamp=datetime.now() ) ], is_complete=False ) def test_validate_verification_record_valid(self): """Test validation of a valid verification record.""" result = self.validation_service.validate_verification_record(self.valid_record) assert result.is_valid assert len(result.errors) == 0 assert "validation_timestamp" in result.metadata assert result.metadata["record_id"] == "test_001" def test_validate_verification_record_missing_fields(self): """Test validation fails for missing required fields.""" # Create record with missing required field by setting it to None after creation invalid_record = VerificationRecord( message_id="test_001", original_message="Test message", classifier_decision="green", classifier_confidence=0.8, classifier_indicators=[], ground_truth_label="green", verifier_notes="", is_correct=True ) # Manually set timestamp to None to simulate missing field invalid_record.timestamp = None result = self.validation_service.validate_verification_record(invalid_record) assert not result.is_valid assert any("timestamp" in error for error in result.errors) def test_validate_verification_record_invalid_constraints(self): """Test validation fails for constraint violations.""" # Create record with invalid confidence invalid_record = VerificationRecord( message_id="test_001", original_message="Test message", classifier_decision="green", classifier_confidence=1.5, # Invalid: > 1.0 classifier_indicators=[], ground_truth_label="green", verifier_notes="", is_correct=True, timestamp=datetime.now() ) result = self.validation_service.validate_verification_record(invalid_record) assert not result.is_valid assert any("classifier_confidence" in error for error in result.errors) def test_validate_verification_record_logical_inconsistency(self): """Test validation detects logical inconsistencies.""" # Create record where is_correct doesn't match decision comparison inconsistent_record = VerificationRecord( message_id="test_001", original_message="Test message", classifier_decision="green", classifier_confidence=0.8, classifier_indicators=[], ground_truth_label="red", verifier_notes="", is_correct=True, # Should be False since green != red timestamp=datetime.now() ) result = self.validation_service.validate_verification_record(inconsistent_record) assert not result.is_valid assert any("is_correct" in error for error in result.errors) def test_validate_verification_session_valid(self): """Test validation of a valid verification session.""" result = self.validation_service.validate_verification_session(self.valid_session) assert result.is_valid assert len(result.errors) == 0 assert "validation_timestamp" in result.metadata assert result.metadata["session_id"] == "session_001" def test_validate_verification_session_count_mismatch(self): """Test validation detects count mismatches.""" # Create session with incorrect counts invalid_session = VerificationSession( session_id="session_001", verifier_name="Dr. Test", dataset_id="dataset_001", dataset_name="Test Dataset", created_at=datetime.now(), total_messages=2, verified_count=3, # Incorrect: should be 2 correct_count=1, incorrect_count=1, verifications=self.valid_session.verifications, is_complete=False ) result = self.validation_service.validate_verification_session(invalid_session) assert not result.is_valid # Check for either verification_count_mismatch or count_consistency error error_messages = " ".join(result.errors) assert "Verified count" in error_messages and ("doesn't equal" in error_messages or "doesn't match" in error_messages) def test_verify_accuracy_calculations_valid(self): """Test accuracy calculation verification for valid session.""" result = self.validation_service.verify_accuracy_calculations(self.valid_session) assert result.is_valid assert len(result.errors) == 0 assert "expected_verified_count" in result.metadata assert result.metadata["expected_verified_count"] == 2 assert result.metadata["expected_correct_count"] == 1 assert result.metadata["expected_incorrect_count"] == 1 def test_verify_accuracy_calculations_mismatch(self): """Test accuracy calculation verification detects mismatches.""" # Create session with incorrect counts invalid_session = VerificationSession( session_id="session_001", verifier_name="Dr. Test", dataset_id="dataset_001", dataset_name="Test Dataset", created_at=datetime.now(), total_messages=2, verified_count=2, correct_count=2, # Incorrect: should be 1 incorrect_count=0, # Incorrect: should be 1 verifications=self.valid_session.verifications, is_complete=False ) result = self.validation_service.verify_accuracy_calculations(invalid_session) assert not result.is_valid # Check for either specific count errors or general mismatch errors error_messages = " ".join(result.errors) assert "Correct count mismatch" in error_messages or "Incorrect count mismatch" in error_messages def test_generate_data_integrity_checksum(self): """Test data integrity checksum generation.""" checksum = self.validation_service.generate_data_integrity_checksum(self.valid_session) assert isinstance(checksum, IntegrityChecksum) assert checksum.checksum_type == "sha256" assert len(checksum.checksum_value) == 64 # SHA256 hex length assert checksum.data_size > 0 assert isinstance(checksum.timestamp, datetime) def test_validate_data_integrity_valid(self): """Test data integrity validation with matching checksum.""" # Generate checksum for original data original_checksum = self.validation_service.generate_data_integrity_checksum(self.valid_session) # Validate against same data result = self.validation_service.validate_data_integrity(self.valid_session, original_checksum) assert result.is_valid assert len(result.errors) == 0 assert result.metadata["expected_checksum"] == original_checksum.checksum_value def test_validate_data_integrity_mismatch(self): """Test data integrity validation with mismatched checksum.""" # Generate checksum for original data original_checksum = self.validation_service.generate_data_integrity_checksum(self.valid_session) # Modify session data significantly modified_session = VerificationSession( session_id="modified_session", verifier_name="Different Verifier", # Changed dataset_id="different_dataset", # Changed dataset_name="Different Dataset", # Changed created_at=self.valid_session.created_at, total_messages=self.valid_session.total_messages, verified_count=self.valid_session.verified_count, correct_count=self.valid_session.correct_count, incorrect_count=self.valid_session.incorrect_count, verifications=self.valid_session.verifications, is_complete=self.valid_session.is_complete ) # Validate modified data against original checksum result = self.validation_service.validate_data_integrity(modified_session, original_checksum) assert not result.is_valid error_messages = " ".join(result.errors) assert "Data integrity checksum mismatch" in error_messages def test_detect_duplicate_test_cases_no_duplicates(self): """Test duplicate detection with no duplicates.""" test_cases = [ TestMessage("msg_001", "Patient expressing spiritual distress", "yellow"), TestMessage("msg_002", "Patient feeling hopeful and positive", "green"), TestMessage("msg_003", "Patient experiencing severe anxiety", "red") ] result = self.validation_service.detect_duplicate_test_cases(test_cases) assert isinstance(result, DuplicateDetectionResult) assert result.duplicates_found == 0 assert len(result.duplicate_groups) == 0 def test_detect_duplicate_test_cases_exact_duplicates(self): """Test duplicate detection with exact text matches.""" test_cases = [ TestMessage("msg_001", "Patient expressing spiritual distress", "yellow"), TestMessage("msg_002", "Patient expressing spiritual distress", "yellow"), # Exact duplicate TestMessage("msg_003", "Patient feeling hopeful", "green") ] result = self.validation_service.detect_duplicate_test_cases(test_cases) assert result.duplicates_found == 1 assert len(result.duplicate_groups) == 1 assert len(result.duplicate_groups[0]) == 2 assert "msg_001" in result.duplicate_groups[0] assert "msg_002" in result.duplicate_groups[0] def test_detect_duplicate_test_cases_similar_duplicates(self): """Test duplicate detection with similar text.""" test_cases = [ TestMessage("msg_001", "Patient expressing spiritual distress and anxiety", "yellow"), TestMessage("msg_002", "Patient expressing anxiety and spiritual distress", "yellow"), # Similar TestMessage("msg_003", "Patient feeling completely different emotions", "green") ] result = self.validation_service.detect_duplicate_test_cases(test_cases, similarity_threshold=0.8) assert result.duplicates_found == 1 assert len(result.duplicate_groups) == 1 def test_validate_test_message_valid(self): """Test validation of a valid test message.""" test_message = TestMessage("msg_001", "Patient expressing spiritual distress", "yellow") result = self.validation_service.validate_test_message(test_message) assert result.is_valid assert len(result.errors) == 0 def test_validate_test_message_invalid(self): """Test validation of invalid test message.""" # Create message with invalid classification test_message = TestMessage("msg_001", "Patient expressing distress", "invalid_color") result = self.validation_service.validate_test_message(test_message) assert not result.is_valid assert any("pre_classified_label" in error for error in result.errors) def test_perform_final_session_validation_valid(self): """Test final session validation for valid session.""" result = self.validation_service.perform_final_session_validation(self.valid_session) assert result.is_valid assert "validation_timestamp" in result.metadata assert "integrity_checksum" in result.metadata assert "data_quality_score" in result.metadata def test_perform_final_session_validation_with_issues(self): """Test final session validation detects issues.""" # Create session with validation issues invalid_session = VerificationSession( session_id="session_001", verifier_name="Dr. Test", dataset_id="dataset_001", dataset_name="Test Dataset", created_at=datetime.now(), total_messages=2, verified_count=3, # Incorrect count correct_count=2, # Incorrect count incorrect_count=0, # Incorrect count verifications=self.valid_session.verifications, is_complete=False ) result = self.validation_service.perform_final_session_validation(invalid_session) assert not result.is_valid assert len(result.errors) > 0 def test_data_quality_score_calculation(self): """Test data quality score calculation.""" # Test with perfect session result = self.validation_service.perform_final_session_validation(self.valid_session) quality_score = result.metadata.get("data_quality_score", 0) assert 0 <= quality_score <= 100 assert quality_score > 90 # Should be high for valid session def test_text_similarity_calculation(self): """Test text similarity calculation.""" # Test identical texts similarity = self.validation_service._calculate_text_similarity( "Patient expressing spiritual distress", "Patient expressing spiritual distress" ) assert similarity == 1.0 # Test completely different texts similarity = self.validation_service._calculate_text_similarity( "Patient expressing spiritual distress", "Weather is sunny today" ) assert similarity < 0.5 # Test similar texts similarity = self.validation_service._calculate_text_similarity( "Patient expressing spiritual distress and anxiety", "Patient expressing anxiety and spiritual distress" ) assert similarity > 0.8 @pytest.fixture def validation_service(): """Fixture for DataValidationService.""" return DataValidationService() @pytest.fixture def sample_verification_record(): """Fixture for a sample verification record.""" return VerificationRecord( message_id="test_001", original_message="Patient expressing spiritual distress", classifier_decision="yellow", classifier_confidence=0.75, classifier_indicators=["spiritual", "distress"], ground_truth_label="yellow", verifier_notes="Correctly identified", is_correct=True, timestamp=datetime.now() ) @pytest.fixture def sample_verification_session(sample_verification_record): """Fixture for a sample verification session.""" return VerificationSession( session_id="session_001", verifier_name="Dr. Test", dataset_id="dataset_001", dataset_name="Test Dataset", created_at=datetime.now(), total_messages=1, verified_count=1, correct_count=1, incorrect_count=0, verifications=[sample_verification_record], is_complete=False )