verifile-x-api / backend /tests /test_determinism.py
abinazebinoy's picture
fix: update signal count to 26, fix perturbation non-determinism, fix DIRE import, patch starlette CVE
3b429a5
"""
Tests for system determinism and reproducibility.
Note: Some variance expected due to CLIP placeholder centroids until #32 is complete.
"""
import pytest
from backend.services.image_forensics import ImageForensics
def test_detection_is_deterministic(sample_image_bytes):
"""Test that full detection produces consistent results."""
forensics1 = ImageForensics(sample_image_bytes, "test.png")
forensics2 = ImageForensics(sample_image_bytes, "test.png")
# Generate full reports
report1 = forensics1.generate_forensic_report()
report2 = forensics2.generate_forensic_report()
# Classifications should be consistent
assert report1["summary"]["ai_classification"] == report2["summary"]["ai_classification"]
# Signal counts should be identical
assert report1["summary"]["total_detection_signals"] == 26
assert report2["summary"]["total_detection_signals"] == 26
def test_hash_generation_is_consistent(sample_image_bytes):
"""Test that hash generation is deterministic."""
forensics = ImageForensics(sample_image_bytes, "test.png")
hashes1 = forensics.generate_hashes()
hashes2 = forensics.generate_hashes()
# Test the hashes that actually exist
# The hash dict contains: md5, sha256, phash, average_hash, dhash
assert hashes1['md5'] == hashes2['md5']
assert hashes1['sha256'] == hashes2['sha256']
# Perceptual hashes should also be consistent
if 'phash' in hashes1:
assert hashes1['phash'] == hashes2['phash']
if 'average_hash' in hashes1:
assert hashes1['average_hash'] == hashes2['average_hash']
def test_forensic_report_stability(sample_image_bytes):
"""
Test that full forensic reports are stable across runs.
Note: AI probability may vary ~15-20% due to CLIP random centroids.
This will be deterministic once CLIP database is built (#32).
"""
forensics1 = ImageForensics(sample_image_bytes, "test.png")
forensics2 = ImageForensics(sample_image_bytes, "test.png")
report1 = forensics1.generate_forensic_report()
report2 = forensics2.generate_forensic_report()
# Hashes should be identical
assert report1["hashes"]["md5"] == report2["hashes"]["md5"]
assert report1["hashes"]["sha256"] == report2["hashes"]["sha256"]
# Signal counts should be identical
assert report1["summary"]["total_detection_signals"] == 26
assert report2["summary"]["total_detection_signals"] == 26
assert report1["summary"]["total_detection_signals"] == report2["summary"]["total_detection_signals"]
# AI probability: allow 20% variance for CLIP randomness
ai_prob_1 = report1["summary"]["ai_probability"]
ai_prob_2 = report2["summary"]["ai_probability"]
if ai_prob_1 > 0.01 and ai_prob_2 > 0.01:
variance = abs(ai_prob_1 - ai_prob_2) / max(ai_prob_1, ai_prob_2)
assert variance < 0.20, (
f"AI probability variance too high (expected <20% due to CLIP randomness): {variance:.3f} "
f"(prob1={ai_prob_1:.3f}, prob2={ai_prob_2:.3f}). "
f"This is expected until CLIP database is built (#32)."
)
def test_cache_consistency(sample_image_bytes):
"""Test that forensics operations produce consistent results."""
# Note: result_cache doesn't exist in backend.core.cache
# This test verifies that repeated operations give same results
forensics = ImageForensics(sample_image_bytes, "test.png")
# Generate hashes twice
hashes1 = forensics.generate_hashes()
hashes2 = forensics.generate_hashes()
# Should be identical
assert hashes1 == hashes2
# Generate reports twice
report1 = forensics.generate_forensic_report()
report2 = forensics.generate_forensic_report()
# Hashes should match
assert report1["hashes"] == report2["hashes"]
def test_signal_ordering_is_stable(sample_image_bytes):
"""Test that detection signals appear in consistent order."""
forensics1 = ImageForensics(sample_image_bytes, "test.png")
forensics2 = ImageForensics(sample_image_bytes, "test.png")
report1 = forensics1.generate_forensic_report()
report2 = forensics2.generate_forensic_report()
# The ai_detection section contains detection_results, not signals
# Check that both reports have the same structure
assert "ai_detection" in report1
assert "ai_detection" in report2
# Both should have 21 signals total
assert report1["ai_detection"]["total_signals"] == 26
assert report2["ai_detection"]["total_signals"] == 26
# Classification keys should be consistent
assert report1["ai_detection"]["classification"] == report2["ai_detection"]["classification"]