abinazebinoy commited on
Commit
e4d04ea
·
1 Parent(s): a1c7689

feat:DCT Frequency Artifact Detection (25th signal)

Browse files

- dct_frequency_detector.py: 3 spectral analysis methods
* 8x8 block DCT high-frequency energy ratio
* Global FFT radial power spectrum smoothness
* Checkerboard artifact detection (GAN up-convolution)
- ensemble: DCT wired in, v1.4, 7 methods
- 25 total signals
- Tests updated

backend/services/advanced_ensemble_detector.py CHANGED
@@ -10,6 +10,7 @@ from backend.services.clip_detector import CLIPDetector
10
  from backend.services.prnu_detector import detect_prnu
11
  from backend.services.ela_detector import detect_ela
12
  from backend.services.metadata_forensics import analyze_metadata
 
13
 
14
  logger = setup_logger(__name__)
15
 
@@ -39,7 +40,7 @@ class AdvancedEnsembleDetector(StatisticalDetector):
39
  Run complete advanced detection with all methods.
40
 
41
  Returns:
42
- Complete report with 24 detection signals
43
  """
44
  logger.info(f"Starting advanced ensemble detection for {self.filename}")
45
 
@@ -61,8 +62,11 @@ class AdvancedEnsembleDetector(StatisticalDetector):
61
  # Add metadata forensics signal
62
  metadata_result = analyze_metadata(self.image_bytes, self.filename)
63
 
64
- # Combine all signals (now 24 total)
65
- all_signals = base_report["all_signals"] + [dire_result, clip_result, prnu_result, ela_result, metadata_result]
 
 
 
66
 
67
  # Recalculate final score with weighted ensemble
68
  # Weights based on validation performance
@@ -75,21 +79,23 @@ class AdvancedEnsembleDetector(StatisticalDetector):
75
 
76
  if dire_confidence > 0.0:
77
  weighted_score = (
78
- 0.33 * base_report["ai_probability"] +
79
- 0.26 * dire_result["score"] +
80
- 0.19 * clip_result["score"] +
81
  0.09 * prnu_result["score"] +
82
  0.07 * ela_result["score"] +
83
- 0.06 * metadata_result["score"]
 
84
  )
85
  else:
86
- logger.info("DIRE unavailable — using statistical+CLIP+PRNU+ELA+metadata")
87
  weighted_score = (
88
- 0.52 * base_report["ai_probability"] +
89
- 0.23 * clip_result["score"] +
90
  0.11 * prnu_result["score"] +
91
  0.08 * ela_result["score"] +
92
- 0.06 * metadata_result["score"]
 
93
  )
94
 
95
  suspicious_count = sum(1 for s in all_signals if s["score"] > 0.5)
@@ -132,8 +138,8 @@ class AdvancedEnsembleDetector(StatisticalDetector):
132
  "summary": f"Analyzed using {len(all_signals)} independent signals including "
133
  f"statistical analysis, diffusion reconstruction, and semantic embeddings. "
134
  f"{suspicious_count} signals indicate AI generation.",
135
- "detection_version": "advanced-ensemble-v1.3",
136
- "methods_used": ["statistical", "dire", "clip", "prnu", "ela", "metadata"]
137
  }
138
 
139
  logger.info(
 
10
  from backend.services.prnu_detector import detect_prnu
11
  from backend.services.ela_detector import detect_ela
12
  from backend.services.metadata_forensics import analyze_metadata
13
+ from backend.services.dct_frequency_detector import detect_dct_artifacts
14
 
15
  logger = setup_logger(__name__)
16
 
 
40
  Run complete advanced detection with all methods.
41
 
42
  Returns:
43
+ Complete report with 25 detection signals
44
  """
45
  logger.info(f"Starting advanced ensemble detection for {self.filename}")
46
 
 
62
  # Add metadata forensics signal
63
  metadata_result = analyze_metadata(self.image_bytes, self.filename)
64
 
65
+ # Add DCT frequency signal
66
+ dct_result = detect_dct_artifacts(self.image_bytes, self.filename)
67
+
68
+ # Combine all signals (now 25 total)
69
+ all_signals = base_report["all_signals"] + [dire_result, clip_result, prnu_result, ela_result, metadata_result, dct_result]
70
 
71
  # Recalculate final score with weighted ensemble
72
  # Weights based on validation performance
 
79
 
80
  if dire_confidence > 0.0:
81
  weighted_score = (
82
+ 0.31 * base_report["ai_probability"] +
83
+ 0.24 * dire_result["score"] +
84
+ 0.18 * clip_result["score"] +
85
  0.09 * prnu_result["score"] +
86
  0.07 * ela_result["score"] +
87
+ 0.06 * metadata_result["score"] +
88
+ 0.05 * dct_result["score"]
89
  )
90
  else:
91
+ logger.info("DIRE unavailable — using statistical+CLIP+PRNU+ELA+metadata+DCT")
92
  weighted_score = (
93
+ 0.49 * base_report["ai_probability"] +
94
+ 0.22 * clip_result["score"] +
95
  0.11 * prnu_result["score"] +
96
  0.08 * ela_result["score"] +
97
+ 0.06 * metadata_result["score"] +
98
+ 0.04 * dct_result["score"]
99
  )
100
 
101
  suspicious_count = sum(1 for s in all_signals if s["score"] > 0.5)
 
138
  "summary": f"Analyzed using {len(all_signals)} independent signals including "
139
  f"statistical analysis, diffusion reconstruction, and semantic embeddings. "
140
  f"{suspicious_count} signals indicate AI generation.",
141
+ "detection_version": "advanced-ensemble-v1.4",
142
+ "methods_used": ["statistical", "dire", "clip", "prnu", "ela", "metadata", "dct"]
143
  }
144
 
145
  logger.info(
backend/services/dct_frequency_detector.py ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ DCT Frequency Domain Artifact Detection.
3
+
4
+ GAN-generated images and some diffusion models leave characteristic
5
+ artifacts in the Discrete Cosine Transform (DCT) frequency domain.
6
+ These artifacts are invisible to the human eye but mathematically
7
+ detectable as spectral peaks or unusual energy distributions.
8
+
9
+ Key papers:
10
+ - "Detecting GAN-Generated Fake Images Using Co-occurrence Matrices" (2019)
11
+ - "Leveraging Frequency Analysis for Deep Fake Image Recognition" (ICML 2020)
12
+ - "Watch Your Up-Convolution: CNN Based Generative Deep Neural Networks are
13
+ Failing to Reproduce Spectral Distributions" (CVPR 2020)
14
+ """
15
+ import numpy as np
16
+ from typing import Dict, Any
17
+ from PIL import Image
18
+ from io import BytesIO
19
+ from backend.core.logger import setup_logger
20
+
21
+ logger = setup_logger(__name__)
22
+
23
+
24
+ def _compute_dct_2d(block: np.ndarray) -> np.ndarray:
25
+ """Compute 2D DCT using scipy if available, else numpy."""
26
+ try:
27
+ from scipy.fft import dctn
28
+ return dctn(block, norm='ortho')
29
+ except ImportError:
30
+ # Fallback: use FFT as approximation
31
+ return np.abs(np.fft.fft2(block))
32
+
33
+
34
+ def detect_dct_artifacts(image_bytes: bytes, filename: str = "unknown") -> Dict[str, Any]:
35
+ """
36
+ Detect DCT frequency domain artifacts in image.
37
+
38
+ Method:
39
+ 1. Convert to grayscale, divide into 8x8 blocks (like JPEG)
40
+ 2. Compute DCT of each block
41
+ 3. Analyze frequency energy distribution across blocks
42
+ 4. Check for characteristic GAN/AI spectral peaks
43
+ 5. Measure high-frequency energy (AI images are often too smooth)
44
+ """
45
+ try:
46
+ img = Image.open(BytesIO(image_bytes)).convert("L")
47
+ arr = np.array(img, dtype=np.float64)
48
+ h, w = arr.shape
49
+
50
+ if h < 32 or w < 32:
51
+ return {
52
+ "signal_name": "DCT Frequency Artifacts",
53
+ "score": 0.5,
54
+ "confidence": 0.0,
55
+ "explanation": "Image too small for DCT analysis",
56
+ "method": "dct_frequency"
57
+ }
58
+
59
+ # === Signal 1: 8x8 block DCT energy distribution ===
60
+ block_size = 8
61
+ dct_blocks = []
62
+ hf_ratios = [] # High-frequency to total energy ratios
63
+
64
+ for y in range(0, h - block_size, block_size):
65
+ for x in range(0, w - block_size, block_size):
66
+ block = arr[y:y+block_size, x:x+block_size]
67
+ block = block - block.mean() # Zero-center
68
+
69
+ dct = _compute_dct_2d(block)
70
+ dct_abs = np.abs(dct)
71
+
72
+ total_energy = np.sum(dct_abs ** 2) + 1e-10
73
+
74
+ # High freq = bottom-right of DCT block
75
+ hf_energy = np.sum(dct_abs[4:, 4:] ** 2)
76
+ hf_ratio = hf_energy / total_energy
77
+ hf_ratios.append(hf_ratio)
78
+ dct_blocks.append(dct_abs)
79
+
80
+ if not hf_ratios:
81
+ return {
82
+ "signal_name": "DCT Frequency Artifacts",
83
+ "score": 0.5,
84
+ "confidence": 0.1,
85
+ "explanation": "Insufficient blocks for DCT analysis",
86
+ "method": "dct_frequency"
87
+ }
88
+
89
+ mean_hf = float(np.mean(hf_ratios))
90
+ std_hf = float(np.std(hf_ratios))
91
+
92
+ # === Signal 2: Global FFT spectrum analysis ===
93
+ # AI images often have unnaturally smooth spectra
94
+ fft = np.fft.fft2(arr)
95
+ fft_shift = np.fft.fftshift(fft)
96
+ magnitude = np.abs(fft_shift)
97
+ magnitude_log = np.log1p(magnitude)
98
+
99
+ # Radial power spectrum
100
+ cy, cx = h // 2, w // 2
101
+ y_idx, x_idx = np.ogrid[:h, :w]
102
+ radius = np.sqrt((y_idx - cy)**2 + (x_idx - cx)**2).astype(int)
103
+ max_r = min(cy, cx)
104
+
105
+ radial_power = []
106
+ for r in range(1, max_r):
107
+ mask = radius == r
108
+ if mask.sum() > 0:
109
+ radial_power.append(float(np.mean(magnitude_log[mask])))
110
+
111
+ if len(radial_power) > 10:
112
+ # Natural images: power decreases roughly as 1/f
113
+ # AI images: often too uniform or have peaks
114
+ rp = np.array(radial_power)
115
+ rp_diff = np.diff(rp)
116
+
117
+ # Measure smoothness of spectral rolloff
118
+ spectral_smoothness = float(np.std(rp_diff))
119
+
120
+ # Check for spectral peaks (GAN artifact)
121
+ rp_norm = rp / (rp.max() + 1e-10)
122
+ above_trend = np.sum(rp_norm[10:] > 0.3)
123
+ spectral_peaks = int(above_trend)
124
+ else:
125
+ spectral_smoothness = 0.5
126
+ spectral_peaks = 0
127
+
128
+ # === Signal 3: Checkerboard artifact detection ===
129
+ # Up-convolution in GANs creates checkerboard patterns at 2x, 4x, 8x freq
130
+ fft_magnitude = np.abs(np.fft.fft2(arr))
131
+ h2, w2 = h // 2, w // 2
132
+ # Check for energy at Nyquist-related frequencies
133
+ nyquist_energy = float(np.mean(fft_magnitude[h2-2:h2+2, w2-2:w2+2]))
134
+ total_fft_energy = float(np.mean(fft_magnitude))
135
+ checkerboard_ratio = nyquist_energy / (total_fft_energy + 1e-10)
136
+
137
+ # === Compute AI score ===
138
+ # Low high-frequency energy = AI (too smooth)
139
+ if mean_hf < 0.02:
140
+ hf_score = 0.75
141
+ elif mean_hf < 0.05:
142
+ hf_score = 0.55
143
+ elif mean_hf < 0.15:
144
+ hf_score = 0.35
145
+ else:
146
+ hf_score = 0.20
147
+
148
+ # Very smooth spectrum (low variation) = AI
149
+ if spectral_smoothness < 0.05:
150
+ smooth_score = 0.70
151
+ elif spectral_smoothness < 0.10:
152
+ smooth_score = 0.50
153
+ else:
154
+ smooth_score = 0.25
155
+
156
+ # Checkerboard artifacts = GAN
157
+ if checkerboard_ratio > 3.0:
158
+ checker_score = 0.80
159
+ elif checkerboard_ratio > 1.5:
160
+ checker_score = 0.55
161
+ else:
162
+ checker_score = 0.25
163
+
164
+ ai_score = float(np.clip(
165
+ 0.45 * hf_score + 0.35 * smooth_score + 0.20 * checker_score,
166
+ 0.0, 1.0
167
+ ))
168
+
169
+ confidence = min(0.75, 0.35 + (h * w) / (1024 * 1024) * 0.40)
170
+
171
+ if mean_hf < 0.03:
172
+ explanation = (
173
+ f"Very low high-frequency DCT energy ({mean_hf:.3f}) — "
174
+ "image is unnaturally smooth, consistent with AI synthesis"
175
+ )
176
+ elif checkerboard_ratio > 2.0:
177
+ explanation = (
178
+ f"Checkerboard frequency artifacts detected "
179
+ f"(ratio={checkerboard_ratio:.2f}) — "
180
+ "typical of GAN up-convolution artifacts"
181
+ )
182
+ else:
183
+ explanation = (
184
+ f"Normal DCT frequency distribution "
185
+ f"(HF={mean_hf:.3f}, smoothness={spectral_smoothness:.3f})"
186
+ )
187
+
188
+ logger.info(
189
+ f"DCT analysis: score={ai_score:.3f}, "
190
+ f"hf={mean_hf:.3f}, checker={checkerboard_ratio:.2f}, "
191
+ f"file={filename}"
192
+ )
193
+
194
+ return {
195
+ "signal_name": "DCT Frequency Artifacts",
196
+ "score": ai_score,
197
+ "confidence": confidence,
198
+ "explanation": explanation,
199
+ "raw_value": mean_hf,
200
+ "expected_range": "< 0.03 HF energy for AI images",
201
+ "method": "dct_frequency"
202
+ }
203
+
204
+ except Exception as e:
205
+ logger.warning(f"DCT frequency analysis failed: {e}")
206
+ return {
207
+ "signal_name": "DCT Frequency Artifacts",
208
+ "score": 0.5,
209
+ "confidence": 0.0,
210
+ "explanation": f"DCT analysis unavailable: {str(e)}",
211
+ "raw_value": 0.0,
212
+ "method": "dct_frequency"
213
+ }
backend/tests/test_advanced_ai_detector.py CHANGED
@@ -67,4 +67,4 @@ def test_forensics_integration(sample_image_bytes):
67
  assert "ai_detection" in report
68
  assert "all_signals" in report["ai_detection"]
69
  # System has 21 signals: 19 statistical + 1 DIRE + 1 CLIP
70
- assert report["summary"]["total_detection_signals"] == 24
 
67
  assert "ai_detection" in report
68
  assert "all_signals" in report["ai_detection"]
69
  # System has 21 signals: 19 statistical + 1 DIRE + 1 CLIP
70
+ assert report["summary"]["total_detection_signals"] == 25
backend/tests/test_advanced_ensemble.py CHANGED
@@ -26,8 +26,8 @@ def test_advanced_ensemble_complete_detection(sample_image_bytes):
26
  assert "methods_used" in report
27
 
28
  # Should have 21 signals (19 statistical + DIRE + CLIP)
29
- assert report["total_signals"] == 24
30
- assert len(report["all_signals"]) == 24
31
 
32
  # Check methods used
33
  assert "statistical" in report["methods_used"]
@@ -36,7 +36,7 @@ def test_advanced_ensemble_complete_detection(sample_image_bytes):
36
  assert "prnu" in report["methods_used"]
37
 
38
  # Check version
39
- assert report["detection_version"] == "advanced-ensemble-v1.3"
40
 
41
  # Cleanup
42
  detector.cleanup()
@@ -50,7 +50,7 @@ def test_advanced_ensemble_forensics_integration(sample_image_bytes):
50
  report = forensics.generate_forensic_report()
51
 
52
  # Check advanced detection was used
53
- assert report["ai_detection"]["total_signals"] == 24
54
  assert report["metadata"]["analyzer_version"] == "6.0.0"
55
  assert "methods_used" in report["ai_detection"]
56
- assert len(report["ai_detection"]["methods_used"]) == 6
 
26
  assert "methods_used" in report
27
 
28
  # Should have 21 signals (19 statistical + DIRE + CLIP)
29
+ assert report["total_signals"] == 25
30
+ assert len(report["all_signals"]) == 25
31
 
32
  # Check methods used
33
  assert "statistical" in report["methods_used"]
 
36
  assert "prnu" in report["methods_used"]
37
 
38
  # Check version
39
+ assert report["detection_version"] == "advanced-ensemble-v1.4"
40
 
41
  # Cleanup
42
  detector.cleanup()
 
50
  report = forensics.generate_forensic_report()
51
 
52
  # Check advanced detection was used
53
+ assert report["ai_detection"]["total_signals"] == 25
54
  assert report["metadata"]["analyzer_version"] == "6.0.0"
55
  assert "methods_used" in report["ai_detection"]
56
+ assert len(report["ai_detection"]["methods_used"]) == 7
backend/tests/test_covariance_detector.py CHANGED
@@ -62,7 +62,7 @@ def test_covariance_forensics_integration(sample_image_bytes):
62
 
63
  assert "ai_detection" in report
64
  # System has 21 signals: 19 statistical + 1 DIRE + 1 CLIP
65
- assert report["ai_detection"]["total_signals"] == 24
66
  assert report["metadata"]["analyzer_version"] == "6.0.0"
67
  assert "detection_version" in report["ai_detection"]
68
 
 
62
 
63
  assert "ai_detection" in report
64
  # System has 21 signals: 19 statistical + 1 DIRE + 1 CLIP
65
+ assert report["ai_detection"]["total_signals"] == 25
66
  assert report["metadata"]["analyzer_version"] == "6.0.0"
67
  assert "detection_version" in report["ai_detection"]
68
 
backend/tests/test_determinism.py CHANGED
@@ -20,8 +20,8 @@ def test_detection_is_deterministic(sample_image_bytes):
20
  assert report1["summary"]["ai_classification"] == report2["summary"]["ai_classification"]
21
 
22
  # Signal counts should be identical
23
- assert report1["summary"]["total_detection_signals"] == 24
24
- assert report2["summary"]["total_detection_signals"] == 24
25
 
26
 
27
  def test_hash_generation_is_consistent(sample_image_bytes):
@@ -61,8 +61,8 @@ def test_forensic_report_stability(sample_image_bytes):
61
  assert report1["hashes"]["sha256"] == report2["hashes"]["sha256"]
62
 
63
  # Signal counts should be identical
64
- assert report1["summary"]["total_detection_signals"] == 24
65
- assert report2["summary"]["total_detection_signals"] == 24
66
  assert report1["summary"]["total_detection_signals"] == report2["summary"]["total_detection_signals"]
67
 
68
  # AI probability: allow 20% variance for CLIP randomness
@@ -114,8 +114,8 @@ def test_signal_ordering_is_stable(sample_image_bytes):
114
  assert "ai_detection" in report2
115
 
116
  # Both should have 21 signals total
117
- assert report1["ai_detection"]["total_signals"] == 24
118
- assert report2["ai_detection"]["total_signals"] == 24
119
 
120
  # Classification keys should be consistent
121
  assert report1["ai_detection"]["classification"] == report2["ai_detection"]["classification"]
 
20
  assert report1["summary"]["ai_classification"] == report2["summary"]["ai_classification"]
21
 
22
  # Signal counts should be identical
23
+ assert report1["summary"]["total_detection_signals"] == 25
24
+ assert report2["summary"]["total_detection_signals"] == 25
25
 
26
 
27
  def test_hash_generation_is_consistent(sample_image_bytes):
 
61
  assert report1["hashes"]["sha256"] == report2["hashes"]["sha256"]
62
 
63
  # Signal counts should be identical
64
+ assert report1["summary"]["total_detection_signals"] == 25
65
+ assert report2["summary"]["total_detection_signals"] == 25
66
  assert report1["summary"]["total_detection_signals"] == report2["summary"]["total_detection_signals"]
67
 
68
  # AI probability: allow 20% variance for CLIP randomness
 
114
  assert "ai_detection" in report2
115
 
116
  # Both should have 21 signals total
117
+ assert report1["ai_detection"]["total_signals"] == 25
118
+ assert report2["ai_detection"]["total_signals"] == 25
119
 
120
  # Classification keys should be consistent
121
  assert report1["ai_detection"]["classification"] == report2["ai_detection"]["classification"]
backend/tests/test_statistical_detector.py CHANGED
@@ -61,7 +61,7 @@ def test_statistical_forensics_integration(sample_image_bytes):
61
 
62
  assert "ai_detection" in report
63
  # System has 21 signals: 19 statistical + 1 DIRE + 1 CLIP
64
- assert report["ai_detection"]["total_signals"] == 24
65
  assert report["metadata"]["analyzer_version"] == "6.0.0"
66
  assert "detection_version" in report["ai_detection"]
67
 
 
61
 
62
  assert "ai_detection" in report
63
  # System has 21 signals: 19 statistical + 1 DIRE + 1 CLIP
64
+ assert report["ai_detection"]["total_signals"] == 25
65
  assert report["metadata"]["analyzer_version"] == "6.0.0"
66
  assert "detection_version" in report["ai_detection"]
67
 
backend/tests/test_ultra_advanced_detector.py CHANGED
@@ -60,6 +60,6 @@ def test_ultra_forensics_integration(sample_image_bytes):
60
 
61
  assert "ai_detection" in report
62
  # System has 21 signals: 19 statistical + 1 DIRE + 1 CLIP
63
- assert report["ai_detection"]["total_signals"] == 24
64
  assert report["metadata"]["analyzer_version"] == "6.0.0"
65
  assert "detection_version" in report["ai_detection"]
 
60
 
61
  assert "ai_detection" in report
62
  # System has 21 signals: 19 statistical + 1 DIRE + 1 CLIP
63
+ assert report["ai_detection"]["total_signals"] == 25
64
  assert report["metadata"]["analyzer_version"] == "6.0.0"
65
  assert "detection_version" in report["ai_detection"]
frontend/index.html CHANGED
@@ -122,7 +122,7 @@
122
  <nav class="navbar">
123
  <div class="nav-container">
124
  <div class="logo">VeriFile-X</div>
125
- <div class="nav-badge">24 Detection Signals • 96-98% Accuracy</div>
126
  </div>
127
  </nav>
128
 
 
122
  <nav class="navbar">
123
  <div class="nav-container">
124
  <div class="logo">VeriFile-X</div>
125
+ <div class="nav-badge">25 Detection Signals • 96-98% Accuracy</div>
126
  </div>
127
  </nav>
128