Spiritual_Health_Project / tests /verification_mode /test_properties_metrics.py
DocUA's picture
Add property-based tests for verification mode functionality
a3934b1
# test_properties_metrics.py
"""
Property-based tests for verification metrics calculator.
Tests that metrics are calculated correctly across all inputs.
"""
import pytest
from hypothesis import given, strategies as st, settings, HealthCheck
from datetime import datetime
from src.core.verification_models import VerificationRecord
from src.core.verification_metrics import VerificationMetricsCalculator
def verification_record_strategy():
"""Generate random verification records."""
return st.builds(
VerificationRecord,
message_id=st.text(
alphabet="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_",
min_size=1,
max_size=20,
),
original_message=st.text(min_size=1, max_size=500),
classifier_decision=st.sampled_from(["green", "yellow", "red"]),
classifier_confidence=st.floats(min_value=0.0, max_value=1.0),
classifier_indicators=st.lists(st.text(min_size=1, max_size=50), max_size=5),
ground_truth_label=st.sampled_from(["green", "yellow", "red"]),
verifier_notes=st.text(max_size=200),
is_correct=st.booleans(),
timestamp=st.just(datetime.now()),
)
class TestAccuracyCalculation:
"""
**Feature: verification-mode, Property 4: Accuracy Calculation is Correct**
Tests that accuracy is calculated correctly as (correct / total) * 100.
"""
@given(st.lists(verification_record_strategy(), min_size=1, max_size=100))
@settings(suppress_health_check=[HealthCheck.function_scoped_fixture])
def test_accuracy_calculation_is_correct(self, records):
"""
**Feature: verification-mode, Property 4: Accuracy Calculation is Correct**
**Validates: Requirements 5.3, 5.4, 9.2**
For any set of verification records, the calculated accuracy should equal
(correct_count / total_count) * 100.
"""
# Calculate expected accuracy
correct_count = sum(1 for r in records if r.is_correct)
expected_accuracy = (correct_count / len(records)) * 100
# Calculate actual accuracy
actual_accuracy = VerificationMetricsCalculator.calculate_accuracy(records)
# Verify accuracy is correct
assert actual_accuracy == expected_accuracy
def test_accuracy_with_all_correct(self):
"""
**Feature: verification-mode, Property 4: Accuracy Calculation is Correct**
**Validates: Requirements 5.3, 5.4, 9.2**
When all records are correct, accuracy should be 100.
"""
records = [
VerificationRecord(
message_id=f"msg_{i}",
original_message=f"Message {i}",
classifier_decision="green",
classifier_confidence=0.9,
classifier_indicators=["test"],
ground_truth_label="green",
verifier_notes="",
is_correct=True,
timestamp=datetime.now(),
)
for i in range(10)
]
accuracy = VerificationMetricsCalculator.calculate_accuracy(records)
assert accuracy == 100.0
def test_accuracy_with_all_incorrect(self):
"""
**Feature: verification-mode, Property 4: Accuracy Calculation is Correct**
**Validates: Requirements 5.3, 5.4, 9.2**
When all records are incorrect, accuracy should be 0.
"""
records = [
VerificationRecord(
message_id=f"msg_{i}",
original_message=f"Message {i}",
classifier_decision="green",
classifier_confidence=0.9,
classifier_indicators=["test"],
ground_truth_label="yellow",
verifier_notes="",
is_correct=False,
timestamp=datetime.now(),
)
for i in range(10)
]
accuracy = VerificationMetricsCalculator.calculate_accuracy(records)
assert accuracy == 0.0
def test_accuracy_with_empty_records(self):
"""
**Feature: verification-mode, Property 4: Accuracy Calculation is Correct**
**Validates: Requirements 5.3, 5.4, 9.2**
When there are no records, accuracy should be 0.
"""
accuracy = VerificationMetricsCalculator.calculate_accuracy([])
assert accuracy == 0.0
def test_accuracy_with_half_correct(self):
"""
**Feature: verification-mode, Property 4: Accuracy Calculation is Correct**
**Validates: Requirements 5.3, 5.4, 9.2**
When half the records are correct, accuracy should be 50.
"""
records = [
VerificationRecord(
message_id=f"msg_{i}",
original_message=f"Message {i}",
classifier_decision="green",
classifier_confidence=0.9,
classifier_indicators=["test"],
ground_truth_label="green" if i % 2 == 0 else "yellow",
verifier_notes="",
is_correct=(i % 2 == 0),
timestamp=datetime.now(),
)
for i in range(10)
]
accuracy = VerificationMetricsCalculator.calculate_accuracy(records)
assert accuracy == 50.0
@given(st.lists(verification_record_strategy(), min_size=1, max_size=100))
def test_accuracy_by_type_calculation(self, records):
"""
**Feature: verification-mode, Property 4: Accuracy Calculation is Correct**
**Validates: Requirements 5.3, 5.4, 9.2**
For any set of records, accuracy by type should correctly count records
where classifier_decision equals ground_truth_label for each type.
"""
accuracy_by_type = (
VerificationMetricsCalculator.calculate_accuracy_by_type(records)
)
# Verify we have all three types
assert "green" in accuracy_by_type
assert "yellow" in accuracy_by_type
assert "red" in accuracy_by_type
# Verify each type's accuracy is correct
for classification_type in ["green", "yellow", "red"]:
type_records = [
r for r in records
if r.classifier_decision == classification_type
]
if type_records:
correct_count = sum(1 for r in type_records if r.is_correct)
expected_accuracy = (correct_count / len(type_records)) * 100
assert accuracy_by_type[classification_type] == expected_accuracy
else:
assert accuracy_by_type[classification_type] == 0.0
@given(st.lists(verification_record_strategy(), min_size=1, max_size=100))
def test_confusion_matrix_structure(self, records):
"""
**Feature: verification-mode, Property 4: Accuracy Calculation is Correct**
**Validates: Requirements 5.3, 5.4, 9.2**
For any set of records, the confusion matrix should have correct structure
and all counts should sum to total records.
"""
matrix = VerificationMetricsCalculator.calculate_confusion_matrix(records)
# Verify structure
assert "green" in matrix
assert "yellow" in matrix
assert "red" in matrix
for classifier_type in ["green", "yellow", "red"]:
assert "green" in matrix[classifier_type]
assert "yellow" in matrix[classifier_type]
assert "red" in matrix[classifier_type]
# Verify all counts sum to total records
total_count = sum(
matrix[classifier][truth]
for classifier in ["green", "yellow", "red"]
for truth in ["green", "yellow", "red"]
)
assert total_count == len(records)
@given(st.lists(verification_record_strategy(), min_size=1, max_size=100))
def test_metrics_summary_consistency(self, records):
"""
**Feature: verification-mode, Property 4: Accuracy Calculation is Correct**
**Validates: Requirements 5.3, 5.4, 9.2**
For any set of records, the metrics summary should be internally consistent.
"""
summary = VerificationMetricsCalculator.get_metrics_summary(records)
# Verify counts are consistent
assert summary["total_records"] == len(records)
assert (
summary["correct_count"] + summary["incorrect_count"]
== summary["total_records"]
)
# Verify accuracy matches calculated value
expected_accuracy = (
summary["correct_count"] / summary["total_records"] * 100
if summary["total_records"] > 0
else 0.0
)
assert summary["accuracy"] == expected_accuracy
# Verify accuracy_by_type values are between 0 and 100
for accuracy in summary["accuracy_by_type"].values():
assert 0.0 <= accuracy <= 100.0