dftest1 / src /explainers /basic_explainer.py
akcanca's picture
Upload basic_explainer.py
e728fa2 verified
import numpy as np
class BasicExplainer:
def __init__(self, thresholds=None, triage_conf_threshold=0.8, enable_triage=True):
"""
Args:
thresholds (dict): e.g.
{
"noiseprint_mismatch": 2.5,
"residual_energy_p95": 0.08,
"fft_peakiness": 3.0
}
triage_conf_threshold (float): minimum confidence to avoid
marking a conflicted case as UNCERTAIN.
enable_triage (bool): if True, mark conflicted low-confidence
cases as UNCERTAIN in the narrative.
"""
self.thresholds = thresholds or {}
self.triage_conf_threshold = triage_conf_threshold
self.enable_triage = enable_triage
def explain(self, features, proba, prediction_label, ood_status=None, contributions=None, top_k_contributions=3):
"""
Generate a text explanation.
Args:
features (dict): Feature dictionary for a single sample.
proba (float): Probability of being fake (class 1).
prediction_label (int): 0 (real) or 1 (fake).
ood_status (dict, optional): output of SimpleClassifier.predict_uncertainty
for this single sample, e.g.
{
'probs': [p],
'dist_real': [..],
'dist_fake': [..],
'dist_min': [..],
'is_ood': [..]
}
contributions (dict, optional): local feature contributions where positive
values push toward FAKE and negative toward REAL.
top_k_contributions (int): how many top-magnitude contributions to surface.
Returns:
str: Explanation text (markdown-friendly).
"""
explanation_parts = []
# -------------------- OOD detection handling --------------------
is_ood = False
dist_real = None
dist_fake = None
if ood_status is not None:
is_ood_arr = ood_status.get('is_ood')
if is_ood_arr is not None:
is_ood = bool(np.asarray(is_ood_arr)[0])
dist_real_arr = ood_status.get('dist_real')
dist_fake_arr = ood_status.get('dist_fake')
if dist_real_arr is not None:
dist_real = float(np.asarray(dist_real_arr)[0])
if dist_fake_arr is not None:
dist_fake = float(np.asarray(dist_fake_arr)[0])
if is_ood:
explanation_parts.append("⚠️ **UNCERTAIN / POTENTIALLY OUT-OF-DISTRIBUTION**")
if dist_real is not None and dist_fake is not None:
explanation_parts.append(
f"The feature vector lies far from both Real and Fake training clusters "
f"(dist_real={dist_real:.1f}, dist_fake={dist_fake:.1f}). "
f"Note: OOD detection cannot be validated without proper evaluation data."
)
explanation_parts.append(
"The decision below should be treated with caution.\n"
)
# -------------------- Confidence / base label --------------------
# proba is P(fake); P(real) = 1 - proba
if prediction_label == 1:
confidence = proba
base_label_str = "FAKE"
else:
confidence = 1.0 - proba
base_label_str = "REAL"
if confidence > 0.8:
confidence_str = "high"
elif confidence > 0.6:
confidence_str = "moderate"
else:
confidence_str = "low"
# -------------------- Forensic cues: collect support --------------------
supports_fake = 0
supports_real = 0
# Noiseprint mismatch
nm = None
thr_nm = None
if 'noiseprint_mismatch' in features and 'noiseprint_mismatch' in self.thresholds:
nm = float(features['noiseprint_mismatch'])
thr_nm = float(self.thresholds['noiseprint_mismatch'])
# High mismatch ⇒ evidence for FAKE, low ⇒ evidence for REAL
if nm > thr_nm:
supports_fake += 1
else:
supports_real += 1
# Residual energy p95
re = None
thr_re = None
if 'residual_energy_p95' in features and 'residual_energy_p95' in self.thresholds:
re = float(features['residual_energy_p95'])
thr_re = float(self.thresholds['residual_energy_p95'])
# High residual energy ⇒ evidence for FAKE
if re > thr_re:
supports_fake += 1
else:
supports_real += 1
# FFT peakiness
fp = None
thr_fp = None
if 'fft_peakiness' in features and 'fft_peakiness' in self.thresholds:
fp = float(features['fft_peakiness'])
thr_fp = float(self.thresholds['fft_peakiness'])
# High peakiness ⇒ evidence for FAKE; otherwise treat as neutral/weak
if fp > thr_fp:
supports_fake += 1
conflict = (supports_fake > 0 and supports_real > 0)
# -------------------- Suspiciously clean detection --------------------
# If ALL forensic cues are below threshold (supports_real > 0 and supports_fake == 0),
# AND the prediction is REAL, this could indicate a modern generator that evades detection.
# Flag as potentially suspicious if all cues are "clean" but confidence isn't very high.
suspiciously_clean = (supports_fake == 0 and supports_real >= 2 and
prediction_label == 0 and confidence < 0.98)
# -------------------- Triage decision (narrative only) --------------------
triage_label = base_label_str
if self.enable_triage and conflict and confidence < self.triage_conf_threshold:
triage_label = "UNCERTAIN"
elif self.enable_triage and suspiciously_clean and confidence < 0.95:
# Modern generators like Flux may evade all forensic cues
triage_label = "UNCERTAIN"
# Intro sentence
if triage_label == "UNCERTAIN" and suspiciously_clean:
explanation_parts.append(
f"⚠️ **CAUTION**: The detector predicts this image is **{base_label_str}** "
f"with {confidence_str} confidence ({confidence:.2f}), "
f"but ALL forensic cues are below threshold. This could indicate a modern generator "
f"(like Flux, DALL-E 3, or Midjourney v6) that evades traditional forensic detection. "
f"**Manual review recommended.**"
)
elif triage_label == "UNCERTAIN":
explanation_parts.append(
f"The detector predicts this image is **{base_label_str}** "
f"with {confidence_str} confidence ({confidence:.2f}), "
f"but forensic cues conflict, so the case is marked **UNCERTAIN**."
)
else:
explanation_parts.append(
f"The model predicts this image is **{base_label_str}** "
f"with {confidence_str} confidence ({confidence:.2f})."
)
# -------------------- Detailed cue explanations --------------------
cues_used = 0
# Noiseprint mismatch explanation
if nm is not None and thr_nm is not None:
if nm > thr_nm:
# high mismatch → FAKE evidence
if prediction_label == 1:
explanation_parts.append(
f"- **Noiseprint**: camera-model fingerprint is atypical for natural cameras "
f"(mismatch={nm:.2f} > {thr_nm:.2f}), supporting the FAKE hypothesis."
)
else:
explanation_parts.append(
f"- **Noiseprint**: camera-model fingerprint is atypical for natural cameras "
f"(mismatch={nm:.2f} > {thr_nm:.2f}), which would usually suggest a FAKE; "
f"however, other cues push the detector towards REAL."
)
else:
# low mismatch → REAL evidence
if prediction_label == 0:
explanation_parts.append(
f"- **Noiseprint**: fingerprint lies within the range seen in training real images "
f"(mismatch={nm:.2f} <= {thr_nm:.2f}), supporting the REAL hypothesis."
)
else:
explanation_parts.append(
f"- **Noiseprint**: fingerprint lies within the range seen in training real images "
f"(mismatch={nm:.2f} <= {thr_nm:.2f}), but other forensic cues indicate synthesis."
)
cues_used += 1
# Residual energy explanation
if re is not None and thr_re is not None:
if re > thr_re:
# high residual energy → FAKE evidence
if prediction_label == 1:
explanation_parts.append(
f"- **Denoiser residual**: high 95th-percentile residual energy "
f"(p95={re:.4f} > {thr_re:.4f}), supporting the FAKE hypothesis as "
f"strong high-frequency artifacts are typical for generated images."
)
else:
explanation_parts.append(
f"- **Denoiser residual**: high 95th-percentile residual energy "
f"(p95={re:.4f} > {thr_re:.4f}), which would usually suggest synthesis; "
f"here it conflicts with the REAL prediction."
)
else:
# low residual energy → REAL evidence
if prediction_label == 0:
explanation_parts.append(
f"- **Denoiser residual**: residual energy (p95={re:.4f}) is within the range "
f"observed for training real photos, consistent with a REAL image."
)
else:
explanation_parts.append(
f"- **Denoiser residual**: residual energy (p95={re:.4f}) is not strongly abnormal; "
f"the FAKE decision is driven more by other forensic cues."
)
cues_used += 1
# FFT peakiness explanation
if fp is not None and thr_fp is not None:
if fp > thr_fp:
if prediction_label == 1:
explanation_parts.append(
f"- **Frequency spectrum**: the Fourier magnitude has unusually sharp peaks "
f"(peakiness={fp:.2f} > {thr_fp:.2f}), often linked to upsampling patterns "
f"of generative models."
)
else:
explanation_parts.append(
f"- **Frequency spectrum**: unusually sharp peaks in the Fourier magnitude "
f"(peakiness={fp:.2f} > {thr_fp:.2f}), which is more typical for generated images "
f"and conflicts with the REAL prediction."
)
cues_used += 1
elif prediction_label == 1:
# Even if below threshold, mention it if prediction is FAKE and it's close to threshold
if fp > thr_fp * 0.8: # Within 80% of threshold
explanation_parts.append(
f"- **Frequency spectrum**: peakiness ({fp:.2f}) is moderately elevated "
f"(threshold: {thr_fp:.2f}), contributing to the FAKE classification."
)
cues_used += 1
# -------------------- Data-driven drivers (show what actually drove the decision) --------------------
if contributions:
sorted_contribs = sorted(contributions.items(), key=lambda x: abs(x[1]), reverse=True)
# Show top 5-8 features for better explanation
top = sorted_contribs[:max(top_k_contributions, 8)]
pos = [(name, val) for name, val in top if val > 0]
neg = [(name, val) for name, val in top if val < 0]
if pos:
explanation_parts.append(f"\n**Features driving FAKE classification:**")
# Show top 5-8 features that push toward FAKE
pos_display = [f"{name} ({val:+.3f})" for name, val in pos[:8]]
explanation_parts.append(f"- {', '.join(pos_display)}")
if neg:
explanation_parts.append(f"\n**Features supporting REAL classification:**")
# Show top 3-5 features that push toward REAL
neg_display = [f"{name} ({val:+.3f})" for name, val in neg[:5]]
explanation_parts.append(f"- {', '.join(neg_display)}")
elif not contributions and (cues_used == 0 or (prediction_label == 1 and cues_used < 2)):
# If no strong forensic cues but high confidence, explain it's a combination
explanation_parts.append(
f"\n**Note**: While the primary forensic cues (Noiseprint, Residuals, FFT) don't individually "
f"strongly indicate synthesis, the model's decision is based on a combination of many features "
f"including DCT coefficients, FFT radial profiles, residual statistics, and other frequency-domain "
f"characteristics. The high confidence ({confidence:.1%}) suggests these subtle patterns collectively "
f"indicate synthetic generation."
)
# List some of the other features that might be contributing
other_features = []
if 'dct_mean' in features:
other_features.append("DCT coefficients")
if 'fft_radial_mean' in features:
other_features.append("FFT radial profiles")
if 'residual_skew' in features:
other_features.append("residual statistics")
if 'residual_kurtosis' in features:
other_features.append("residual distribution shape")
if other_features:
explanation_parts.append(
f"The model analyzes {', '.join(other_features)} and other frequency-domain patterns "
f"that collectively indicate synthetic generation, even when individual cues are subtle."
)
# In high-conflict cases, add a final triage note
if triage_label == "UNCERTAIN" and not is_ood:
explanation_parts.append(
"Because the forensic cues point in different directions at only moderate confidence, "
"this image should be flagged for manual review or stress-testing (e.g., recompression)."
)
return "\n".join(explanation_parts)