Spaces:

akcanca
/

dftest1

Sleeping

App Files Files Community

dftest1 / src /explainers /basic_explainer.py

akcanca

Upload basic_explainer.py

e728fa2 verified 3 months ago

raw

history blame contribute delete

15.3 kB

	import numpy as np


	class BasicExplainer:
	def __init__(self, thresholds=None, triage_conf_threshold=0.8, enable_triage=True):
	"""
	Args:
	thresholds (dict): e.g.
	{
	"noiseprint_mismatch": 2.5,
	"residual_energy_p95": 0.08,
	"fft_peakiness": 3.0
	}
	triage_conf_threshold (float): minimum confidence to avoid
	marking a conflicted case as UNCERTAIN.
	enable_triage (bool): if True, mark conflicted low-confidence
	cases as UNCERTAIN in the narrative.
	"""
	self.thresholds = thresholds or {}
	self.triage_conf_threshold = triage_conf_threshold
	self.enable_triage = enable_triage

	def explain(self, features, proba, prediction_label, ood_status=None, contributions=None, top_k_contributions=3):
	"""
	Generate a text explanation.

	Args:
	features (dict): Feature dictionary for a single sample.
	proba (float): Probability of being fake (class 1).
	prediction_label (int): 0 (real) or 1 (fake).
	ood_status (dict, optional): output of SimpleClassifier.predict_uncertainty
	for this single sample, e.g.
	{
	'probs': [p],
	'dist_real': [..],
	'dist_fake': [..],
	'dist_min': [..],
	'is_ood': [..]
	}
	contributions (dict, optional): local feature contributions where positive
	values push toward FAKE and negative toward REAL.
	top_k_contributions (int): how many top-magnitude contributions to surface.

	Returns:
	str: Explanation text (markdown-friendly).
	"""
	explanation_parts = []

	# -------------------- OOD detection handling --------------------
	is_ood = False
	dist_real = None
	dist_fake = None
	if ood_status is not None:
	is_ood_arr = ood_status.get('is_ood')
	if is_ood_arr is not None:
	is_ood = bool(np.asarray(is_ood_arr)[0])
	dist_real_arr = ood_status.get('dist_real')
	dist_fake_arr = ood_status.get('dist_fake')
	if dist_real_arr is not None:
	dist_real = float(np.asarray(dist_real_arr)[0])
	if dist_fake_arr is not None:
	dist_fake = float(np.asarray(dist_fake_arr)[0])

	if is_ood:
	explanation_parts.append("⚠️ UNCERTAIN / POTENTIALLY OUT-OF-DISTRIBUTION")
	if dist_real is not None and dist_fake is not None:
	explanation_parts.append(
	f"The feature vector lies far from both Real and Fake training clusters "
	f"(dist_real={dist_real:.1f}, dist_fake={dist_fake:.1f}). "
	f"Note: OOD detection cannot be validated without proper evaluation data."
	)
	explanation_parts.append(
	"The decision below should be treated with caution.\n"
	)

	# -------------------- Confidence / base label --------------------
	# proba is P(fake); P(real) = 1 - proba
	if prediction_label == 1:
	confidence = proba
	base_label_str = "FAKE"
	else:
	confidence = 1.0 - proba
	base_label_str = "REAL"

	if confidence > 0.8:
	confidence_str = "high"
	elif confidence > 0.6:
	confidence_str = "moderate"
	else:
	confidence_str = "low"

	# -------------------- Forensic cues: collect support --------------------
	supports_fake = 0
	supports_real = 0

	# Noiseprint mismatch
	nm = None
	thr_nm = None
	if 'noiseprint_mismatch' in features and 'noiseprint_mismatch' in self.thresholds:
	nm = float(features['noiseprint_mismatch'])
	thr_nm = float(self.thresholds['noiseprint_mismatch'])
	# High mismatch ⇒ evidence for FAKE, low ⇒ evidence for REAL
	if nm > thr_nm:
	supports_fake += 1
	else:
	supports_real += 1

	# Residual energy p95
	re = None
	thr_re = None
	if 'residual_energy_p95' in features and 'residual_energy_p95' in self.thresholds:
	re = float(features['residual_energy_p95'])
	thr_re = float(self.thresholds['residual_energy_p95'])
	# High residual energy ⇒ evidence for FAKE
	if re > thr_re:
	supports_fake += 1
	else:
	supports_real += 1

	# FFT peakiness
	fp = None
	thr_fp = None
	if 'fft_peakiness' in features and 'fft_peakiness' in self.thresholds:
	fp = float(features['fft_peakiness'])
	thr_fp = float(self.thresholds['fft_peakiness'])
	# High peakiness ⇒ evidence for FAKE; otherwise treat as neutral/weak
	if fp > thr_fp:
	supports_fake += 1

	conflict = (supports_fake > 0 and supports_real > 0)

	# -------------------- Suspiciously clean detection --------------------
	# If ALL forensic cues are below threshold (supports_real > 0 and supports_fake == 0),
	# AND the prediction is REAL, this could indicate a modern generator that evades detection.
	# Flag as potentially suspicious if all cues are "clean" but confidence isn't very high.
	suspiciously_clean = (supports_fake == 0 and supports_real >= 2 and
	prediction_label == 0 and confidence < 0.98)

	# -------------------- Triage decision (narrative only) --------------------
	triage_label = base_label_str
	if self.enable_triage and conflict and confidence < self.triage_conf_threshold:
	triage_label = "UNCERTAIN"
	elif self.enable_triage and suspiciously_clean and confidence < 0.95:
	# Modern generators like Flux may evade all forensic cues
	triage_label = "UNCERTAIN"

	# Intro sentence
	if triage_label == "UNCERTAIN" and suspiciously_clean:
	explanation_parts.append(
	f"⚠️ CAUTION: The detector predicts this image is {base_label_str} "
	f"with {confidence_str} confidence ({confidence:.2f}), "
	f"but ALL forensic cues are below threshold. This could indicate a modern generator "
	f"(like Flux, DALL-E 3, or Midjourney v6) that evades traditional forensic detection. "
	f"Manual review recommended."
	)
	elif triage_label == "UNCERTAIN":
	explanation_parts.append(
	f"The detector predicts this image is {base_label_str} "
	f"with {confidence_str} confidence ({confidence:.2f}), "
	f"but forensic cues conflict, so the case is marked UNCERTAIN."
	)
	else:
	explanation_parts.append(
	f"The model predicts this image is {base_label_str} "
	f"with {confidence_str} confidence ({confidence:.2f})."
	)

	# -------------------- Detailed cue explanations --------------------
	cues_used = 0

	# Noiseprint mismatch explanation
	if nm is not None and thr_nm is not None:
	if nm > thr_nm:
	# high mismatch → FAKE evidence
	if prediction_label == 1:
	explanation_parts.append(
	f"- Noiseprint: camera-model fingerprint is atypical for natural cameras "
	f"(mismatch={nm:.2f} > {thr_nm:.2f}), supporting the FAKE hypothesis."
	)
	else:
	explanation_parts.append(
	f"- Noiseprint: camera-model fingerprint is atypical for natural cameras "
	f"(mismatch={nm:.2f} > {thr_nm:.2f}), which would usually suggest a FAKE; "
	f"however, other cues push the detector towards REAL."
	)
	else:
	# low mismatch → REAL evidence
	if prediction_label == 0:
	explanation_parts.append(
	f"- Noiseprint: fingerprint lies within the range seen in training real images "
	f"(mismatch={nm:.2f} <= {thr_nm:.2f}), supporting the REAL hypothesis."
	)
	else:
	explanation_parts.append(
	f"- Noiseprint: fingerprint lies within the range seen in training real images "
	f"(mismatch={nm:.2f} <= {thr_nm:.2f}), but other forensic cues indicate synthesis."
	)
	cues_used += 1

	# Residual energy explanation
	if re is not None and thr_re is not None:
	if re > thr_re:
	# high residual energy → FAKE evidence
	if prediction_label == 1:
	explanation_parts.append(
	f"- Denoiser residual: high 95th-percentile residual energy "
	f"(p95={re:.4f} > {thr_re:.4f}), supporting the FAKE hypothesis as "
	f"strong high-frequency artifacts are typical for generated images."
	)
	else:
	explanation_parts.append(
	f"- Denoiser residual: high 95th-percentile residual energy "
	f"(p95={re:.4f} > {thr_re:.4f}), which would usually suggest synthesis; "
	f"here it conflicts with the REAL prediction."
	)
	else:
	# low residual energy → REAL evidence
	if prediction_label == 0:
	explanation_parts.append(
	f"- Denoiser residual: residual energy (p95={re:.4f}) is within the range "
	f"observed for training real photos, consistent with a REAL image."
	)
	else:
	explanation_parts.append(
	f"- Denoiser residual: residual energy (p95={re:.4f}) is not strongly abnormal; "
	f"the FAKE decision is driven more by other forensic cues."
	)
	cues_used += 1

	# FFT peakiness explanation
	if fp is not None and thr_fp is not None:
	if fp > thr_fp:
	if prediction_label == 1:
	explanation_parts.append(
	f"- Frequency spectrum: the Fourier magnitude has unusually sharp peaks "
	f"(peakiness={fp:.2f} > {thr_fp:.2f}), often linked to upsampling patterns "
	f"of generative models."
	)
	else:
	explanation_parts.append(
	f"- Frequency spectrum: unusually sharp peaks in the Fourier magnitude "
	f"(peakiness={fp:.2f} > {thr_fp:.2f}), which is more typical for generated images "
	f"and conflicts with the REAL prediction."
	)
	cues_used += 1
	elif prediction_label == 1:
	# Even if below threshold, mention it if prediction is FAKE and it's close to threshold
	if fp > thr_fp * 0.8: # Within 80% of threshold
	explanation_parts.append(
	f"- Frequency spectrum: peakiness ({fp:.2f}) is moderately elevated "
	f"(threshold: {thr_fp:.2f}), contributing to the FAKE classification."
	)
	cues_used += 1

	# -------------------- Data-driven drivers (show what actually drove the decision) --------------------
	if contributions:
	sorted_contribs = sorted(contributions.items(), key=lambda x: abs(x[1]), reverse=True)
	# Show top 5-8 features for better explanation
	top = sorted_contribs[:max(top_k_contributions, 8)]
	pos = [(name, val) for name, val in top if val > 0]
	neg = [(name, val) for name, val in top if val < 0]

	if pos:
	explanation_parts.append(f"\nFeatures driving FAKE classification:")
	# Show top 5-8 features that push toward FAKE
	pos_display = [f"{name} ({val:+.3f})" for name, val in pos[:8]]
	explanation_parts.append(f"- {', '.join(pos_display)}")
	if neg:
	explanation_parts.append(f"\nFeatures supporting REAL classification:")
	# Show top 3-5 features that push toward REAL
	neg_display = [f"{name} ({val:+.3f})" for name, val in neg[:5]]
	explanation_parts.append(f"- {', '.join(neg_display)}")
	elif not contributions and (cues_used == 0 or (prediction_label == 1 and cues_used < 2)):
	# If no strong forensic cues but high confidence, explain it's a combination
	explanation_parts.append(
	f"\nNote: While the primary forensic cues (Noiseprint, Residuals, FFT) don't individually "
	f"strongly indicate synthesis, the model's decision is based on a combination of many features "
	f"including DCT coefficients, FFT radial profiles, residual statistics, and other frequency-domain "
	f"characteristics. The high confidence ({confidence:.1%}) suggests these subtle patterns collectively "
	f"indicate synthetic generation."
	)

	# List some of the other features that might be contributing
	other_features = []
	if 'dct_mean' in features:
	other_features.append("DCT coefficients")
	if 'fft_radial_mean' in features:
	other_features.append("FFT radial profiles")
	if 'residual_skew' in features:
	other_features.append("residual statistics")
	if 'residual_kurtosis' in features:
	other_features.append("residual distribution shape")

	if other_features:
	explanation_parts.append(
	f"The model analyzes {', '.join(other_features)} and other frequency-domain patterns "
	f"that collectively indicate synthetic generation, even when individual cues are subtle."
	)

	# In high-conflict cases, add a final triage note
	if triage_label == "UNCERTAIN" and not is_ood:
	explanation_parts.append(
	"Because the forensic cues point in different directions at only moderate confidence, "
	"this image should be flagged for manual review or stress-testing (e.g., recompression)."
	)

	return "\n".join(explanation_parts)