File size: 15,342 Bytes
07fe054
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e728fa2
 
 
 
 
 
 
07fe054
 
 
 
 
e728fa2
 
 
07fe054
 
e728fa2
 
 
 
 
 
 
 
 
07fe054
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e728fa2
07fe054
 
 
 
e728fa2
07fe054
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e728fa2
 
 
 
 
 
 
 
07fe054
e728fa2
07fe054
 
e728fa2
 
 
 
07fe054
 
e728fa2
 
 
 
07fe054
e728fa2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
07fe054
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
import numpy as np


class BasicExplainer:
    def __init__(self, thresholds=None, triage_conf_threshold=0.8, enable_triage=True):
        """
        Args:
            thresholds (dict): e.g.
                {
                  "noiseprint_mismatch": 2.5,
                  "residual_energy_p95": 0.08,
                  "fft_peakiness": 3.0
                }
            triage_conf_threshold (float): minimum confidence to avoid
                                           marking a conflicted case as UNCERTAIN.
            enable_triage (bool): if True, mark conflicted low-confidence
                                  cases as UNCERTAIN in the narrative.
        """
        self.thresholds = thresholds or {}
        self.triage_conf_threshold = triage_conf_threshold
        self.enable_triage = enable_triage

    def explain(self, features, proba, prediction_label, ood_status=None, contributions=None, top_k_contributions=3):
        """
        Generate a text explanation.

        Args:
            features (dict): Feature dictionary for a single sample.
            proba (float): Probability of being fake (class 1).
            prediction_label (int): 0 (real) or 1 (fake).
            ood_status (dict, optional): output of SimpleClassifier.predict_uncertainty
                                         for this single sample, e.g.
                                         {
                                           'probs': [p],
                                           'dist_real': [..],
                                           'dist_fake': [..],
                                           'dist_min': [..],
                                           'is_ood': [..]
                                         }
            contributions (dict, optional): local feature contributions where positive
                                            values push toward FAKE and negative toward REAL.
            top_k_contributions (int): how many top-magnitude contributions to surface.

        Returns:
            str: Explanation text (markdown-friendly).
        """
        explanation_parts = []

        # -------------------- OOD detection handling --------------------
        is_ood = False
        dist_real = None
        dist_fake = None
        if ood_status is not None:
            is_ood_arr = ood_status.get('is_ood')
            if is_ood_arr is not None:
                is_ood = bool(np.asarray(is_ood_arr)[0])
            dist_real_arr = ood_status.get('dist_real')
            dist_fake_arr = ood_status.get('dist_fake')
            if dist_real_arr is not None:
                dist_real = float(np.asarray(dist_real_arr)[0])
            if dist_fake_arr is not None:
                dist_fake = float(np.asarray(dist_fake_arr)[0])

        if is_ood:
            explanation_parts.append("⚠️ **UNCERTAIN / POTENTIALLY OUT-OF-DISTRIBUTION**")
            if dist_real is not None and dist_fake is not None:
                explanation_parts.append(
                    f"The feature vector lies far from both Real and Fake training clusters "
                    f"(dist_real={dist_real:.1f}, dist_fake={dist_fake:.1f}). "
                    f"Note: OOD detection cannot be validated without proper evaluation data."
                )
            explanation_parts.append(
                "The decision below should be treated with caution.\n"
            )

        # -------------------- Confidence / base label --------------------
        # proba is P(fake); P(real) = 1 - proba
        if prediction_label == 1:
            confidence = proba
            base_label_str = "FAKE"
        else:
            confidence = 1.0 - proba
            base_label_str = "REAL"

        if confidence > 0.8:
            confidence_str = "high"
        elif confidence > 0.6:
            confidence_str = "moderate"
        else:
            confidence_str = "low"

        # -------------------- Forensic cues: collect support --------------------
        supports_fake = 0
        supports_real = 0

        # Noiseprint mismatch
        nm = None
        thr_nm = None
        if 'noiseprint_mismatch' in features and 'noiseprint_mismatch' in self.thresholds:
            nm = float(features['noiseprint_mismatch'])
            thr_nm = float(self.thresholds['noiseprint_mismatch'])
            # High mismatch β‡’ evidence for FAKE, low β‡’ evidence for REAL
            if nm > thr_nm:
                supports_fake += 1
            else:
                supports_real += 1

        # Residual energy p95
        re = None
        thr_re = None
        if 'residual_energy_p95' in features and 'residual_energy_p95' in self.thresholds:
            re = float(features['residual_energy_p95'])
            thr_re = float(self.thresholds['residual_energy_p95'])
            # High residual energy β‡’ evidence for FAKE
            if re > thr_re:
                supports_fake += 1
            else:
                supports_real += 1

        # FFT peakiness
        fp = None
        thr_fp = None
        if 'fft_peakiness' in features and 'fft_peakiness' in self.thresholds:
            fp = float(features['fft_peakiness'])
            thr_fp = float(self.thresholds['fft_peakiness'])
            # High peakiness β‡’ evidence for FAKE; otherwise treat as neutral/weak
            if fp > thr_fp:
                supports_fake += 1

        conflict = (supports_fake > 0 and supports_real > 0)
        
        # -------------------- Suspiciously clean detection --------------------
        # If ALL forensic cues are below threshold (supports_real > 0 and supports_fake == 0),
        # AND the prediction is REAL, this could indicate a modern generator that evades detection.
        # Flag as potentially suspicious if all cues are "clean" but confidence isn't very high.
        suspiciously_clean = (supports_fake == 0 and supports_real >= 2 and 
                              prediction_label == 0 and confidence < 0.98)

        # -------------------- Triage decision (narrative only) --------------------
        triage_label = base_label_str
        if self.enable_triage and conflict and confidence < self.triage_conf_threshold:
            triage_label = "UNCERTAIN"
        elif self.enable_triage and suspiciously_clean and confidence < 0.95:
            # Modern generators like Flux may evade all forensic cues
            triage_label = "UNCERTAIN"

        # Intro sentence
        if triage_label == "UNCERTAIN" and suspiciously_clean:
            explanation_parts.append(
                f"⚠️ **CAUTION**: The detector predicts this image is **{base_label_str}** "
                f"with {confidence_str} confidence ({confidence:.2f}), "
                f"but ALL forensic cues are below threshold. This could indicate a modern generator "
                f"(like Flux, DALL-E 3, or Midjourney v6) that evades traditional forensic detection. "
                f"**Manual review recommended.**"
            )
        elif triage_label == "UNCERTAIN":
            explanation_parts.append(
                f"The detector predicts this image is **{base_label_str}** "
                f"with {confidence_str} confidence ({confidence:.2f}), "
                f"but forensic cues conflict, so the case is marked **UNCERTAIN**."
            )
        else:
            explanation_parts.append(
                f"The model predicts this image is **{base_label_str}** "
                f"with {confidence_str} confidence ({confidence:.2f})."
            )

        # -------------------- Detailed cue explanations --------------------
        cues_used = 0

        # Noiseprint mismatch explanation
        if nm is not None and thr_nm is not None:
            if nm > thr_nm:
                # high mismatch β†’ FAKE evidence
                if prediction_label == 1:
                    explanation_parts.append(
                        f"- **Noiseprint**: camera-model fingerprint is atypical for natural cameras "
                        f"(mismatch={nm:.2f} > {thr_nm:.2f}), supporting the FAKE hypothesis."
                    )
                else:
                    explanation_parts.append(
                        f"- **Noiseprint**: camera-model fingerprint is atypical for natural cameras "
                        f"(mismatch={nm:.2f} > {thr_nm:.2f}), which would usually suggest a FAKE; "
                        f"however, other cues push the detector towards REAL."
                    )
            else:
                # low mismatch β†’ REAL evidence
                if prediction_label == 0:
                    explanation_parts.append(
                        f"- **Noiseprint**: fingerprint lies within the range seen in training real images "
                        f"(mismatch={nm:.2f} <= {thr_nm:.2f}), supporting the REAL hypothesis."
                    )
                else:
                    explanation_parts.append(
                        f"- **Noiseprint**: fingerprint lies within the range seen in training real images "
                        f"(mismatch={nm:.2f} <= {thr_nm:.2f}), but other forensic cues indicate synthesis."
                    )
            cues_used += 1

        # Residual energy explanation
        if re is not None and thr_re is not None:
            if re > thr_re:
                # high residual energy β†’ FAKE evidence
                if prediction_label == 1:
                    explanation_parts.append(
                        f"- **Denoiser residual**: high 95th-percentile residual energy "
                        f"(p95={re:.4f} > {thr_re:.4f}), supporting the FAKE hypothesis as "
                        f"strong high-frequency artifacts are typical for generated images."
                    )
                else:
                    explanation_parts.append(
                        f"- **Denoiser residual**: high 95th-percentile residual energy "
                        f"(p95={re:.4f} > {thr_re:.4f}), which would usually suggest synthesis; "
                        f"here it conflicts with the REAL prediction."
                    )
            else:
                # low residual energy β†’ REAL evidence
                if prediction_label == 0:
                    explanation_parts.append(
                        f"- **Denoiser residual**: residual energy (p95={re:.4f}) is within the range "
                        f"observed for training real photos, consistent with a REAL image."
                    )
                else:
                    explanation_parts.append(
                        f"- **Denoiser residual**: residual energy (p95={re:.4f}) is not strongly abnormal; "
                        f"the FAKE decision is driven more by other forensic cues."
                    )
            cues_used += 1

        # FFT peakiness explanation
        if fp is not None and thr_fp is not None:
            if fp > thr_fp:
                if prediction_label == 1:
                    explanation_parts.append(
                        f"- **Frequency spectrum**: the Fourier magnitude has unusually sharp peaks "
                        f"(peakiness={fp:.2f} > {thr_fp:.2f}), often linked to upsampling patterns "
                        f"of generative models."
                    )
                else:
                    explanation_parts.append(
                        f"- **Frequency spectrum**: unusually sharp peaks in the Fourier magnitude "
                        f"(peakiness={fp:.2f} > {thr_fp:.2f}), which is more typical for generated images "
                        f"and conflicts with the REAL prediction."
                    )
                cues_used += 1
            elif prediction_label == 1:
                # Even if below threshold, mention it if prediction is FAKE and it's close to threshold
                if fp > thr_fp * 0.8:  # Within 80% of threshold
                    explanation_parts.append(
                        f"- **Frequency spectrum**: peakiness ({fp:.2f}) is moderately elevated "
                        f"(threshold: {thr_fp:.2f}), contributing to the FAKE classification."
                    )
                    cues_used += 1

        # -------------------- Data-driven drivers (show what actually drove the decision) --------------------
        if contributions:
            sorted_contribs = sorted(contributions.items(), key=lambda x: abs(x[1]), reverse=True)
            # Show top 5-8 features for better explanation
            top = sorted_contribs[:max(top_k_contributions, 8)]
            pos = [(name, val) for name, val in top if val > 0]
            neg = [(name, val) for name, val in top if val < 0]

            if pos:
                explanation_parts.append(f"\n**Features driving FAKE classification:**")
                # Show top 5-8 features that push toward FAKE
                pos_display = [f"{name} ({val:+.3f})" for name, val in pos[:8]]
                explanation_parts.append(f"- {', '.join(pos_display)}")
            if neg:
                explanation_parts.append(f"\n**Features supporting REAL classification:**")
                # Show top 3-5 features that push toward REAL
                neg_display = [f"{name} ({val:+.3f})" for name, val in neg[:5]]
                explanation_parts.append(f"- {', '.join(neg_display)}")
        elif not contributions and (cues_used == 0 or (prediction_label == 1 and cues_used < 2)):
            # If no strong forensic cues but high confidence, explain it's a combination
            explanation_parts.append(
                f"\n**Note**: While the primary forensic cues (Noiseprint, Residuals, FFT) don't individually "
                f"strongly indicate synthesis, the model's decision is based on a combination of many features "
                f"including DCT coefficients, FFT radial profiles, residual statistics, and other frequency-domain "
                f"characteristics. The high confidence ({confidence:.1%}) suggests these subtle patterns collectively "
                f"indicate synthetic generation."
            )
            
            # List some of the other features that might be contributing
            other_features = []
            if 'dct_mean' in features:
                other_features.append("DCT coefficients")
            if 'fft_radial_mean' in features:
                other_features.append("FFT radial profiles")
            if 'residual_skew' in features:
                other_features.append("residual statistics")
            if 'residual_kurtosis' in features:
                other_features.append("residual distribution shape")
            
            if other_features:
                explanation_parts.append(
                    f"The model analyzes {', '.join(other_features)} and other frequency-domain patterns "
                    f"that collectively indicate synthetic generation, even when individual cues are subtle."
                )

        # In high-conflict cases, add a final triage note
        if triage_label == "UNCERTAIN" and not is_ood:
            explanation_parts.append(
                "Because the forensic cues point in different directions at only moderate confidence, "
                "this image should be flagged for manual review or stress-testing (e.g., recompression)."
            )

        return "\n".join(explanation_parts)