Spaces:
Running
Running
fix: update signal count to 26, fix perturbation non-determinism, fix DIRE import, patch starlette CVE
3b429a5 | """ | |
| Tests for system determinism and reproducibility. | |
| Note: Some variance expected due to CLIP placeholder centroids until #32 is complete. | |
| """ | |
| import pytest | |
| from backend.services.image_forensics import ImageForensics | |
| def test_detection_is_deterministic(sample_image_bytes): | |
| """Test that full detection produces consistent results.""" | |
| forensics1 = ImageForensics(sample_image_bytes, "test.png") | |
| forensics2 = ImageForensics(sample_image_bytes, "test.png") | |
| # Generate full reports | |
| report1 = forensics1.generate_forensic_report() | |
| report2 = forensics2.generate_forensic_report() | |
| # Classifications should be consistent | |
| assert report1["summary"]["ai_classification"] == report2["summary"]["ai_classification"] | |
| # Signal counts should be identical | |
| assert report1["summary"]["total_detection_signals"] == 26 | |
| assert report2["summary"]["total_detection_signals"] == 26 | |
| def test_hash_generation_is_consistent(sample_image_bytes): | |
| """Test that hash generation is deterministic.""" | |
| forensics = ImageForensics(sample_image_bytes, "test.png") | |
| hashes1 = forensics.generate_hashes() | |
| hashes2 = forensics.generate_hashes() | |
| # Test the hashes that actually exist | |
| # The hash dict contains: md5, sha256, phash, average_hash, dhash | |
| assert hashes1['md5'] == hashes2['md5'] | |
| assert hashes1['sha256'] == hashes2['sha256'] | |
| # Perceptual hashes should also be consistent | |
| if 'phash' in hashes1: | |
| assert hashes1['phash'] == hashes2['phash'] | |
| if 'average_hash' in hashes1: | |
| assert hashes1['average_hash'] == hashes2['average_hash'] | |
| def test_forensic_report_stability(sample_image_bytes): | |
| """ | |
| Test that full forensic reports are stable across runs. | |
| Note: AI probability may vary ~15-20% due to CLIP random centroids. | |
| This will be deterministic once CLIP database is built (#32). | |
| """ | |
| forensics1 = ImageForensics(sample_image_bytes, "test.png") | |
| forensics2 = ImageForensics(sample_image_bytes, "test.png") | |
| report1 = forensics1.generate_forensic_report() | |
| report2 = forensics2.generate_forensic_report() | |
| # Hashes should be identical | |
| assert report1["hashes"]["md5"] == report2["hashes"]["md5"] | |
| assert report1["hashes"]["sha256"] == report2["hashes"]["sha256"] | |
| # Signal counts should be identical | |
| assert report1["summary"]["total_detection_signals"] == 26 | |
| assert report2["summary"]["total_detection_signals"] == 26 | |
| assert report1["summary"]["total_detection_signals"] == report2["summary"]["total_detection_signals"] | |
| # AI probability: allow 20% variance for CLIP randomness | |
| ai_prob_1 = report1["summary"]["ai_probability"] | |
| ai_prob_2 = report2["summary"]["ai_probability"] | |
| if ai_prob_1 > 0.01 and ai_prob_2 > 0.01: | |
| variance = abs(ai_prob_1 - ai_prob_2) / max(ai_prob_1, ai_prob_2) | |
| assert variance < 0.20, ( | |
| f"AI probability variance too high (expected <20% due to CLIP randomness): {variance:.3f} " | |
| f"(prob1={ai_prob_1:.3f}, prob2={ai_prob_2:.3f}). " | |
| f"This is expected until CLIP database is built (#32)." | |
| ) | |
| def test_cache_consistency(sample_image_bytes): | |
| """Test that forensics operations produce consistent results.""" | |
| # Note: result_cache doesn't exist in backend.core.cache | |
| # This test verifies that repeated operations give same results | |
| forensics = ImageForensics(sample_image_bytes, "test.png") | |
| # Generate hashes twice | |
| hashes1 = forensics.generate_hashes() | |
| hashes2 = forensics.generate_hashes() | |
| # Should be identical | |
| assert hashes1 == hashes2 | |
| # Generate reports twice | |
| report1 = forensics.generate_forensic_report() | |
| report2 = forensics.generate_forensic_report() | |
| # Hashes should match | |
| assert report1["hashes"] == report2["hashes"] | |
| def test_signal_ordering_is_stable(sample_image_bytes): | |
| """Test that detection signals appear in consistent order.""" | |
| forensics1 = ImageForensics(sample_image_bytes, "test.png") | |
| forensics2 = ImageForensics(sample_image_bytes, "test.png") | |
| report1 = forensics1.generate_forensic_report() | |
| report2 = forensics2.generate_forensic_report() | |
| # The ai_detection section contains detection_results, not signals | |
| # Check that both reports have the same structure | |
| assert "ai_detection" in report1 | |
| assert "ai_detection" in report2 | |
| # Both should have 21 signals total | |
| assert report1["ai_detection"]["total_signals"] == 26 | |
| assert report2["ai_detection"]["total_signals"] == 26 | |
| # Classification keys should be consistent | |
| assert report1["ai_detection"]["classification"] == report2["ai_detection"]["classification"] | |