deepshield / utils /scoring.py
ar07xd's picture
Sync from GitHub via hub-sync
36529c1 verified
from __future__ import annotations
import math
from typing import Optional, Tuple
TRUST_SCALE = [
(0, 20, "Very Likely Fake", "critical"),
(21, 40, "Likely Fake", "danger"),
(41, 55, "Possibly Manipulated", "warning"),
(56, 69, "Uncertain — Needs Verification", "warning"),
(70, 88, "Likely Real", "positive"),
(89, 100, "Very Likely Real", "safe"),
]
# Score range for forced disagreement clamp
UNCERTAIN_SCORE_LO = 56
UNCERTAIN_SCORE_HI = 69
UNVERIFIED_NEWS_SCORE_CAP = 55
def _validate_weight_total(weights: list[float], context: str) -> None:
total = sum(weights)
if total > 1.000001:
raise ValueError(f"{context} weights must not sum above 1.0 (got {total:.3f})")
def compute_authenticity_score(fake_probability: float, label: str = "") -> int:
"""Map a fake probability [0.0, 1.0] to a 0-100 authenticity score.
The first argument must always be the model's fake-probability (not the
top-label confidence). 0.0 (no fake signal) → 100, 1.0 (certain fake) → 0.
The `label` parameter is accepted for backward compatibility but not used.
"""
return int(round(max(0.0, min(100.0, (1.0 - float(fake_probability)) * 100.0))))
def get_verdict_label(score: int) -> Tuple[str, str]:
for lo, hi, label, severity in TRUST_SCALE:
if lo <= score <= hi:
return label, severity
return "Unknown", "warning"
def apply_unverified_news_gate(
score: int,
*,
has_trusted_sources: bool,
has_contradicting_evidence: bool,
truth_override_applied: bool,
) -> Tuple[int, str, str, str | None]:
"""Prevent unverifiable news claims from receiving a real verdict.
The text classifier can judge writing style, but a news claim with no
corroborating trusted source should stay in the suspicious/verification band.
Already-fake scores remain fake; the gate only caps overly-real scores.
"""
if has_trusted_sources or has_contradicting_evidence or truth_override_applied:
label, severity = get_verdict_label(score)
return score, label, severity, None
gated_score = min(score, UNVERIFIED_NEWS_SCORE_CAP)
if gated_score > 40:
return gated_score, "Suspicious", "warning", "no_trusted_source"
label, severity = get_verdict_label(gated_score)
return gated_score, label, severity, "no_trusted_source"
def compute_video_authenticity_score(
*,
mean_suspicious_prob: float,
max_suspicious_prob: float = 0.0,
suspicious_ratio: float = 0.0,
insufficient_faces: bool,
temporal_score: float | None = None,
audio_authenticity_score: float | None = None,
has_audio: bool = False,
) -> Tuple[int, str, str]:
"""Combine video evidence into an authenticity verdict.
Face-model evidence is authoritative only when enough face frames were
scored. If face content is insufficient, use temporal/audio evidence when
available instead of forcing a neutral result.
The effective visual fake probability blends the per-frame mean with the
per-frame maximum (65/35 split). This prevents a deepfake from hiding
behind many clean frames: even a cluster of highly-suspicious frames
raises the combined score meaningfully.
A suspicious_ratio cap prevents a misleadingly high authenticity score when
a significant fraction of frames are flagged regardless of the mean.
"""
if insufficient_faces:
evidence: list[tuple[float, float]] = []
if temporal_score is not None:
evidence.append((0.60, float(temporal_score)))
if has_audio and audio_authenticity_score is not None:
evidence.append((0.40, float(audio_authenticity_score)))
if not evidence:
return 50, "Insufficient face content", "warning"
total_weight = sum(weight for weight, _score in evidence)
combined = sum(weight * score for weight, score in evidence) / total_weight
score = int(round(max(0.0, min(100.0, combined))))
label, severity = get_verdict_label(score)
return score, label, severity
# Blend mean and max: mean alone is easily diluted by clean frames.
# 65% mean keeps the overall distribution; 35% max ensures a cluster of
# highly-suspicious frames cannot be hidden by majority-clean frames.
effective_prob = 0.65 * float(mean_suspicious_prob) + 0.35 * float(max_suspicious_prob)
visual_score = (1.0 - effective_prob) * 100.0
temporal_sc = float(temporal_score) if temporal_score is not None else visual_score
if has_audio and audio_authenticity_score is not None:
_validate_weight_total([0.50, 0.30, 0.20], "video audio+temporal fusion")
combined = 0.50 * visual_score + 0.30 * temporal_sc + 0.20 * float(audio_authenticity_score)
else:
_validate_weight_total([0.70, 0.30], "video visual+temporal fusion")
combined = 0.70 * visual_score + 0.30 * temporal_sc
score = int(round(max(0.0, min(100.0, combined))))
# Suspicious-ratio caps: when a meaningful fraction of frames are flagged,
# prevent the score from landing in a confident "Likely Real" band.
# ≥40% suspicious → cap at 35 (Likely Fake zone).
# ≥20% suspicious → cap at 50 (Uncertain/Suspicious zone).
if suspicious_ratio >= 0.40:
score = min(score, 35)
elif suspicious_ratio >= 0.20:
score = min(score, 50)
label, severity = get_verdict_label(score)
return score, label, severity
DISAGREEMENT_THRESHOLD = 0.25
def compute_signal_disagreement(components: dict[str, float]) -> Optional[float]:
"""Compute stdev of the primary evidence signals.
Only considers signals that carry real model opinion (excludes exif/vlm
which are weaker modifiers). Returns None when fewer than 2 signals present.
"""
primary_keys = {"face_stack", "general", "forensics"}
values = [v for k, v in components.items() if k in primary_keys]
if len(values) < 2:
return None
mean = sum(values) / len(values)
variance = sum((v - mean) ** 2 for v in values) / len(values)
return math.sqrt(variance)
def maybe_clamp_to_uncertain(score: int, components: dict[str, float]) -> Tuple[int, Optional[str]]:
"""If primary signals disagree significantly, clamp score into the Uncertain band.
Returns (final_score, disagreement_reason) where reason is None when no
clamp was applied.
"""
stdev = compute_signal_disagreement(components)
if stdev is None or stdev < DISAGREEMENT_THRESHOLD:
return score, None
# Only clamp scores that would otherwise land in a confident verdict
# (Very Likely Fake is still kept — if everything except one signal says
# fake, the anomaly is informational but doesn't override).
if score > UNCERTAIN_SCORE_HI:
clamped = UNCERTAIN_SCORE_HI
elif score < UNCERTAIN_SCORE_LO and score > 20:
clamped = UNCERTAIN_SCORE_LO
else:
return score, None
signal_summary = ", ".join(f"{k}={v:.2f}" for k, v in components.items()
if k in {"face_stack", "general", "forensics"})
reason = f"signal_disagreement(stdev={stdev:.2f}; {signal_summary})"
return clamped, reason
def get_score_color(score: int) -> str:
"""Linear interpolate Red (#E53935) → Amber (#FFA726) → Green (#43A047)."""
def lerp(a: int, b: int, t: float) -> int:
return int(round(a + (b - a) * t))
score = max(0, min(100, score))
if score <= 50:
t = score / 50.0
r, g, b = lerp(0xE5, 0xFF, t), lerp(0x39, 0xA7, t), lerp(0x35, 0x26, t)
else:
t = (score - 50) / 50.0
r, g, b = lerp(0xFF, 0x43, t), lerp(0xA7, 0xA0, t), lerp(0x26, 0x47, t)
return f"#{r:02X}{g:02X}{b:02X}"