Spaces:

akcanca
/

dftest1

Sleeping

File size: 15,342 Bytes

import numpy as np


class BasicExplainer:
    def __init__(self, thresholds=None, triage_conf_threshold=0.8, enable_triage=True):
        """
        Args:
            thresholds (dict): e.g.
                {
                  "noiseprint_mismatch": 2.5,
                  "residual_energy_p95": 0.08,
                  "fft_peakiness": 3.0
                }
            triage_conf_threshold (float): minimum confidence to avoid
                                           marking a conflicted case as UNCERTAIN.
            enable_triage (bool): if True, mark conflicted low-confidence
                                  cases as UNCERTAIN in the narrative.
        """
        self.thresholds = thresholds or {}
        self.triage_conf_threshold = triage_conf_threshold
        self.enable_triage = enable_triage

    def explain(self, features, proba, prediction_label, ood_status=None, contributions=None, top_k_contributions=3):
        """
        Generate a text explanation.

        Args:
            features (dict): Feature dictionary for a single sample.
            proba (float): Probability of being fake (class 1).
            prediction_label (int): 0 (real) or 1 (fake).
            ood_status (dict, optional): output of SimpleClassifier.predict_uncertainty
                                         for this single sample, e.g.
                                         {
                                           'probs': [p],
                                           'dist_real': [..],
                                           'dist_fake': [..],
                                           'dist_min': [..],
                                           'is_ood': [..]
                                         }
            contributions (dict, optional): local feature contributions where positive
                                            values push toward FAKE and negative toward REAL.
            top_k_contributions (int): how many top-magnitude contributions to surface.

        Returns:
            str: Explanation text (markdown-friendly).
        """
        explanation_parts = []

        # -------------------- OOD detection handling --------------------
        is_ood = False
        dist_real = None
        dist_fake = None
        if ood_status is not None:
            is_ood_arr = ood_status.get('is_ood')
            if is_ood_arr is not None:
                is_ood = bool(np.asarray(is_ood_arr)[0])
            dist_real_arr = ood_status.get('dist_real')
            dist_fake_arr = ood_status.get('dist_fake')
            if dist_real_arr is not None:
                dist_real = float(np.asarray(dist_real_arr)[0])
            if dist_fake_arr is not None:
                dist_fake = float(np.asarray(dist_fake_arr)[0])

        if is_ood:
            explanation_parts.append("⚠️ **UNCERTAIN / POTENTIALLY OUT-OF-DISTRIBUTION**")
            if dist_real is not None and dist_fake is not None:
                explanation_parts.append(
                    f"The feature vector lies far from both Real and Fake training clusters "
                    f"(dist_real={dist_real:.1f}, dist_fake={dist_fake:.1f}). "
                    f"Note: OOD detection cannot be validated without proper evaluation data."
                )
            explanation_parts.append(
                "The decision below should be treated with caution.\n"
            )

        # -------------------- Confidence / base label --------------------
        # proba is P(fake); P(real) = 1 - proba
        if prediction_label == 1:
            confidence = proba
            base_label_str = "FAKE"
        else:
            confidence = 1.0 - proba
            base_label_str = "REAL"

        if confidence > 0.8:
            confidence_str = "high"
        elif confidence > 0.6:
            confidence_str = "moderate"
        else:
            confidence_str = "low"

        # -------------------- Forensic cues: collect support --------------------
        supports_fake = 0
        supports_real = 0

        # Noiseprint mismatch
        nm = None
        thr_nm = None
        if 'noiseprint_mismatch' in features and 'noiseprint_mismatch' in self.thresholds:
            nm = float(features['noiseprint_mismatch'])
            thr_nm = float(self.thresholds['noiseprint_mismatch'])
            # High mismatch ⇒ evidence for FAKE, low ⇒ evidence for REAL
            if nm > thr_nm:
                supports_fake += 1
            else:
                supports_real += 1

        # Residual energy p95
        re = None
        thr_re = None
        if 'residual_energy_p95' in features and 'residual_energy_p95' in self.thresholds:
            re = float(features['residual_energy_p95'])
            thr_re = float(self.thresholds['residual_energy_p95'])
            # High residual energy ⇒ evidence for FAKE
            if re > thr_re:
                supports_fake += 1
            else:
                supports_real += 1

        # FFT peakiness
        fp = None
        thr_fp = None
        if 'fft_peakiness' in features and 'fft_peakiness' in self.thresholds:
            fp = float(features['fft_peakiness'])
            thr_fp = float(self.thresholds['fft_peakiness'])
            # High peakiness ⇒ evidence for FAKE; otherwise treat as neutral/weak
            if fp > thr_fp:
                supports_fake += 1

        conflict = (supports_fake > 0 and supports_real > 0)
        
        # -------------------- Suspiciously clean detection --------------------
        # If ALL forensic cues are below threshold (supports_real > 0 and supports_fake == 0),
        # AND the prediction is REAL, this could indicate a modern generator that evades detection.
        # Flag as potentially suspicious if all cues are "clean" but confidence isn't very high.
        suspiciously_clean = (supports_fake == 0 and supports_real >= 2 and 
                              prediction_label == 0 and confidence < 0.98)

        # -------------------- Triage decision (narrative only) --------------------
        triage_label = base_label_str
        if self.enable_triage and conflict and confidence < self.triage_conf_threshold:
            triage_label = "UNCERTAIN"
        elif self.enable_triage and suspiciously_clean and confidence < 0.95:
            # Modern generators like Flux may evade all forensic cues
            triage_label = "UNCERTAIN"

        # Intro sentence
        if triage_label == "UNCERTAIN" and suspiciously_clean:
            explanation_parts.append(
                f"⚠️ **CAUTION**: The detector predicts this image is **{base_label_str}** "
                f"with {confidence_str} confidence ({confidence:.2f}), "
                f"but ALL forensic cues are below threshold. This could indicate a modern generator "
                f"(like Flux, DALL-E 3, or Midjourney v6) that evades traditional forensic detection. "
                f"**Manual review recommended.**"
            )
        elif triage_label == "UNCERTAIN":
            explanation_parts.append(
                f"The detector predicts this image is **{base_label_str}** "
                f"with {confidence_str} confidence ({confidence:.2f}), "
                f"but forensic cues conflict, so the case is marked **UNCERTAIN**."
            )
        else:
            explanation_parts.append(
                f"The model predicts this image is **{base_label_str}** "
                f"with {confidence_str} confidence ({confidence:.2f})."
            )

        # -------------------- Detailed cue explanations --------------------
        cues_used = 0

        # Noiseprint mismatch explanation
        if nm is not None and thr_nm is not None:
            if nm > thr_nm:
                # high mismatch → FAKE evidence
                if prediction_label == 1:
                    explanation_parts.append(
                        f"- **Noiseprint**: camera-model fingerprint is atypical for natural cameras "
                        f"(mismatch={nm:.2f} > {thr_nm:.2f}), supporting the FAKE hypothesis."
                    )
                else:
                    explanation_parts.append(
                        f"- **Noiseprint**: camera-model fingerprint is atypical for natural cameras "
                        f"(mismatch={nm:.2f} > {thr_nm:.2f}), which would usually suggest a FAKE; "
                        f"however, other cues push the detector towards REAL."
                    )
            else:
                # low mismatch → REAL evidence
                if prediction_label == 0:
                    explanation_parts.append(
                        f"- **Noiseprint**: fingerprint lies within the range seen in training real images "
                        f"(mismatch={nm:.2f} <= {thr_nm:.2f}), supporting the REAL hypothesis."
                    )
                else:
                    explanation_parts.append(
                        f"- **Noiseprint**: fingerprint lies within the range seen in training real images "
                        f"(mismatch={nm:.2f} <= {thr_nm:.2f}), but other forensic cues indicate synthesis."
                    )
            cues_used += 1

        # Residual energy explanation
        if re is not None and thr_re is not None:
            if re > thr_re:
                # high residual energy → FAKE evidence
                if prediction_label == 1:
                    explanation_parts.append(
                        f"- **Denoiser residual**: high 95th-percentile residual energy "
                        f"(p95={re:.4f} > {thr_re:.4f}), supporting the FAKE hypothesis as "
                        f"strong high-frequency artifacts are typical for generated images."
                    )
                else:
                    explanation_parts.append(
                        f"- **Denoiser residual**: high 95th-percentile residual energy "
                        f"(p95={re:.4f} > {thr_re:.4f}), which would usually suggest synthesis; "
                        f"here it conflicts with the REAL prediction."
                    )
            else:
                # low residual energy → REAL evidence
                if prediction_label == 0:
                    explanation_parts.append(
                        f"- **Denoiser residual**: residual energy (p95={re:.4f}) is within the range "
                        f"observed for training real photos, consistent with a REAL image."
                    )
                else:
                    explanation_parts.append(
                        f"- **Denoiser residual**: residual energy (p95={re:.4f}) is not strongly abnormal; "
                        f"the FAKE decision is driven more by other forensic cues."
                    )
            cues_used += 1

        # FFT peakiness explanation
        if fp is not None and thr_fp is not None:
            if fp > thr_fp:
                if prediction_label == 1:
                    explanation_parts.append(
                        f"- **Frequency spectrum**: the Fourier magnitude has unusually sharp peaks "
                        f"(peakiness={fp:.2f} > {thr_fp:.2f}), often linked to upsampling patterns "
                        f"of generative models."
                    )
                else:
                    explanation_parts.append(
                        f"- **Frequency spectrum**: unusually sharp peaks in the Fourier magnitude "
                        f"(peakiness={fp:.2f} > {thr_fp:.2f}), which is more typical for generated images "
                        f"and conflicts with the REAL prediction."
                    )
                cues_used += 1
            elif prediction_label == 1:
                # Even if below threshold, mention it if prediction is FAKE and it's close to threshold
                if fp > thr_fp * 0.8:  # Within 80% of threshold
                    explanation_parts.append(
                        f"- **Frequency spectrum**: peakiness ({fp:.2f}) is moderately elevated "
                        f"(threshold: {thr_fp:.2f}), contributing to the FAKE classification."
                    )
                    cues_used += 1

        # -------------------- Data-driven drivers (show what actually drove the decision) --------------------
        if contributions:
            sorted_contribs = sorted(contributions.items(), key=lambda x: abs(x[1]), reverse=True)
            # Show top 5-8 features for better explanation
            top = sorted_contribs[:max(top_k_contributions, 8)]
            pos = [(name, val) for name, val in top if val > 0]
            neg = [(name, val) for name, val in top if val < 0]

            if pos:
                explanation_parts.append(f"\n**Features driving FAKE classification:**")
                # Show top 5-8 features that push toward FAKE
                pos_display = [f"{name} ({val:+.3f})" for name, val in pos[:8]]
                explanation_parts.append(f"- {', '.join(pos_display)}")
            if neg:
                explanation_parts.append(f"\n**Features supporting REAL classification:**")
                # Show top 3-5 features that push toward REAL
                neg_display = [f"{name} ({val:+.3f})" for name, val in neg[:5]]
                explanation_parts.append(f"- {', '.join(neg_display)}")
        elif not contributions and (cues_used == 0 or (prediction_label == 1 and cues_used < 2)):
            # If no strong forensic cues but high confidence, explain it's a combination
            explanation_parts.append(
                f"\n**Note**: While the primary forensic cues (Noiseprint, Residuals, FFT) don't individually "
                f"strongly indicate synthesis, the model's decision is based on a combination of many features "
                f"including DCT coefficients, FFT radial profiles, residual statistics, and other frequency-domain "
                f"characteristics. The high confidence ({confidence:.1%}) suggests these subtle patterns collectively "
                f"indicate synthetic generation."
            )
            
            # List some of the other features that might be contributing
            other_features = []
            if 'dct_mean' in features:
                other_features.append("DCT coefficients")
            if 'fft_radial_mean' in features:
                other_features.append("FFT radial profiles")
            if 'residual_skew' in features:
                other_features.append("residual statistics")
            if 'residual_kurtosis' in features:
                other_features.append("residual distribution shape")
            
            if other_features:
                explanation_parts.append(
                    f"The model analyzes {', '.join(other_features)} and other frequency-domain patterns "
                    f"that collectively indicate synthetic generation, even when individual cues are subtle."
                )

        # In high-conflict cases, add a final triage note
        if triage_label == "UNCERTAIN" and not is_ood:
            explanation_parts.append(
                "Because the forensic cues point in different directions at only moderate confidence, "
                "this image should be flagged for manual review or stress-testing (e.g., recompression)."
            )

        return "\n".join(explanation_parts)