Spaces:

ar07xd
/

deepshield

Runtime error

App Files Files Community

ar07xd commited on 23 days ago

Commit

36529c1

verified ·

1 Parent(s): 72ccda8

Sync from GitHub via hub-sync

Browse files

Files changed (5) hide show

api/v1/analyze.py +6 -3
config.py +1 -1
services/audio_service.py +5 -1
services/video_service.py +2 -2
utils/scoring.py +26 -1

api/v1/analyze.py CHANGED Viewed

@@ -97,7 +97,6 @@ def _resize_for_vis(pil) -> "Image.Image":
     scale = _VIS_MAX_PX / max(w, h)
     return pil.resize((int(w * scale), int(h * scale)), Image.LANCZOS)
 VIDEO_MAX_MB = 100
-VIDEO_NUM_FRAMES = 16
 _IMAGE_EXCLUDE = {"explainability": {"heatmap_base64", "ela_base64", "boxes_base64"}}
@@ -575,7 +574,7 @@ async def analyze_video_endpoint(
             return VideoAnalysisResponse.model_validate(payload)
     try:
-        agg = analyze_video(path, num_frames=VIDEO_NUM_FRAMES)
         stages.append("frame_extraction")
         stages.append("frame_classification")
         stages.append("aggregation")
@@ -600,6 +599,8 @@ async def analyze_video_endpoint(
     # Phase 17.3 — combined verdict formula
     score, label, severity = compute_video_authenticity_score(
         mean_suspicious_prob=agg.mean_suspicious_prob,
         insufficient_faces=agg.insufficient_faces,
         temporal_score=agg.temporal.temporal_score if agg.temporal else None,
         audio_authenticity_score=audio_result.audio_authenticity_score if audio_result else None,
@@ -1139,7 +1140,7 @@ async def analyze_video_async(
         local_db = SessionLocal()
         try:
             progress("frame_extraction", 15)
-            agg = analyze_video(path, num_frames=VIDEO_NUM_FRAMES)
             progress("aggregation", 60)
             audio_result = None
@@ -1151,6 +1152,8 @@ async def analyze_video_async(
             score_val, label_val, sev = compute_video_authenticity_score(
                 mean_suspicious_prob=agg.mean_suspicious_prob,
                 insufficient_faces=agg.insufficient_faces,
                 temporal_score=agg.temporal.temporal_score if agg.temporal else None,
                 audio_authenticity_score=audio_result.audio_authenticity_score if audio_result else None,

     scale = _VIS_MAX_PX / max(w, h)
     return pil.resize((int(w * scale), int(h * scale)), Image.LANCZOS)
 VIDEO_MAX_MB = 100
 _IMAGE_EXCLUDE = {"explainability": {"heatmap_base64", "ela_base64", "boxes_base64"}}
             return VideoAnalysisResponse.model_validate(payload)
     try:
+        agg = analyze_video(path, num_frames=settings.VIDEO_SAMPLE_FRAMES)
         stages.append("frame_extraction")
         stages.append("frame_classification")
         stages.append("aggregation")
     # Phase 17.3 — combined verdict formula
     score, label, severity = compute_video_authenticity_score(
         mean_suspicious_prob=agg.mean_suspicious_prob,
+        max_suspicious_prob=agg.max_suspicious_prob,
+        suspicious_ratio=agg.suspicious_ratio,
         insufficient_faces=agg.insufficient_faces,
         temporal_score=agg.temporal.temporal_score if agg.temporal else None,
         audio_authenticity_score=audio_result.audio_authenticity_score if audio_result else None,
         local_db = SessionLocal()
         try:
             progress("frame_extraction", 15)
+            agg = analyze_video(path, num_frames=settings.VIDEO_SAMPLE_FRAMES)
             progress("aggregation", 60)
             audio_result = None
             score_val, label_val, sev = compute_video_authenticity_score(
                 mean_suspicious_prob=agg.mean_suspicious_prob,
+                max_suspicious_prob=agg.max_suspicious_prob,
+                suspicious_ratio=agg.suspicious_ratio,
                 insufficient_faces=agg.insufficient_faces,
                 temporal_score=agg.temporal.temporal_score if agg.temporal else None,
                 audio_authenticity_score=audio_result.audio_authenticity_score if audio_result else None,

config.py CHANGED Viewed

@@ -273,7 +273,7 @@ class Settings(BaseSettings):
     # face forgery frames, so it is the dominant signal for video analysis.
     VIDEO_FFPP_WEIGHT: float = 0.70
     VIDEO_EFFNET_WEIGHT: float = 0.30
-    VIDEO_SAMPLE_FRAMES: int = 16  # frames to sample per video for inference
     EXIFTOOL_PATH: str = ""  # full path to ExifTool binary; empty = metadata write disabled
     # Auth

     # face forgery frames, so it is the dominant signal for video analysis.
     VIDEO_FFPP_WEIGHT: float = 0.70
     VIDEO_EFFNET_WEIGHT: float = 0.30
+    VIDEO_SAMPLE_FRAMES: int = 32  # frames to sample per video for inference
     EXIFTOOL_PATH: str = ""  # full path to ExifTool binary; empty = metadata write disabled
     # Auth

services/audio_service.py CHANGED Viewed

@@ -50,7 +50,11 @@ def _extract_audio_wav(video_path: str, out_path: str) -> bool:
             capture_output=True,
             timeout=60,
         )
-        return result.returncode == 0 and os.path.getsize(out_path) > 0
     except (FileNotFoundError, subprocess.TimeoutExpired, OSError) as exc:
         logger.warning(f"ffmpeg audio extraction failed: {exc}")
         return False

             capture_output=True,
             timeout=60,
         )
+        if result.returncode != 0 or not os.path.exists(out_path) or os.path.getsize(out_path) == 0:
+            stderr_tail = result.stderr.decode(errors="replace")[-400:].strip()
+            logger.warning(f"ffmpeg exited {result.returncode} — {stderr_tail or '(no stderr)'}")
+            return False
+        return True
     except (FileNotFoundError, subprocess.TimeoutExpired, OSError) as exc:
         logger.warning(f"ffmpeg audio extraction failed: {exc}")
         return False

services/video_service.py CHANGED Viewed

@@ -188,7 +188,7 @@ def _analyze_with_efficientnet(
                 label=label,
                 confidence=fake_prob,
                 suspicious_prob=fake_prob,
-                is_suspicious=(fake_prob >= 0.5) and has_face,
                 has_face=has_face,
                 scored=bool(has_face and faces),
             )
@@ -212,7 +212,7 @@ def _analyze_with_vit(
                 label=vit_label,
                 confidence=vit_fake_prob,
                 suspicious_prob=vit_fake_prob,
-                is_suspicious=(vit_fake_prob >= 0.5) and face,
                 has_face=face,
                 scored=face,
             )

                 label=label,
                 confidence=fake_prob,
                 suspicious_prob=fake_prob,
+                is_suspicious=(fake_prob >= 0.40) and has_face,
                 has_face=has_face,
                 scored=bool(has_face and faces),
             )
                 label=vit_label,
                 confidence=vit_fake_prob,
                 suspicious_prob=vit_fake_prob,
+                is_suspicious=(vit_fake_prob >= 0.40) and face,
                 has_face=face,
                 scored=face,
             )

utils/scoring.py CHANGED Viewed

@@ -70,6 +70,8 @@ def apply_unverified_news_gate(
 def compute_video_authenticity_score(
     *,
     mean_suspicious_prob: float,
     insufficient_faces: bool,
     temporal_score: float | None = None,
     audio_authenticity_score: float | None = None,
@@ -80,6 +82,14 @@ def compute_video_authenticity_score(
     Face-model evidence is authoritative only when enough face frames were
     scored. If face content is insufficient, use temporal/audio evidence when
     available instead of forcing a neutral result.
     """
     if insufficient_faces:
         evidence: list[tuple[float, float]] = []
@@ -97,7 +107,12 @@ def compute_video_authenticity_score(
         label, severity = get_verdict_label(score)
         return score, label, severity
-    visual_score = (1.0 - float(mean_suspicious_prob)) * 100.0
     temporal_sc = float(temporal_score) if temporal_score is not None else visual_score
     if has_audio and audio_authenticity_score is not None:
         _validate_weight_total([0.50, 0.30, 0.20], "video audio+temporal fusion")
@@ -106,6 +121,16 @@ def compute_video_authenticity_score(
         _validate_weight_total([0.70, 0.30], "video visual+temporal fusion")
         combined = 0.70 * visual_score + 0.30 * temporal_sc
     score = int(round(max(0.0, min(100.0, combined))))
     label, severity = get_verdict_label(score)
     return score, label, severity

 def compute_video_authenticity_score(
     *,
     mean_suspicious_prob: float,
+    max_suspicious_prob: float = 0.0,
+    suspicious_ratio: float = 0.0,
     insufficient_faces: bool,
     temporal_score: float | None = None,
     audio_authenticity_score: float | None = None,
     Face-model evidence is authoritative only when enough face frames were
     scored. If face content is insufficient, use temporal/audio evidence when
     available instead of forcing a neutral result.
+    The effective visual fake probability blends the per-frame mean with the
+    per-frame maximum (65/35 split). This prevents a deepfake from hiding
+    behind many clean frames: even a cluster of highly-suspicious frames
+    raises the combined score meaningfully.
+    A suspicious_ratio cap prevents a misleadingly high authenticity score when
+    a significant fraction of frames are flagged regardless of the mean.
     """
     if insufficient_faces:
         evidence: list[tuple[float, float]] = []
         label, severity = get_verdict_label(score)
         return score, label, severity
+    # Blend mean and max: mean alone is easily diluted by clean frames.
+    # 65% mean keeps the overall distribution; 35% max ensures a cluster of
+    # highly-suspicious frames cannot be hidden by majority-clean frames.
+    effective_prob = 0.65 * float(mean_suspicious_prob) + 0.35 * float(max_suspicious_prob)
+    visual_score = (1.0 - effective_prob) * 100.0
     temporal_sc = float(temporal_score) if temporal_score is not None else visual_score
     if has_audio and audio_authenticity_score is not None:
         _validate_weight_total([0.50, 0.30, 0.20], "video audio+temporal fusion")
         _validate_weight_total([0.70, 0.30], "video visual+temporal fusion")
         combined = 0.70 * visual_score + 0.30 * temporal_sc
     score = int(round(max(0.0, min(100.0, combined))))
+    # Suspicious-ratio caps: when a meaningful fraction of frames are flagged,
+    # prevent the score from landing in a confident "Likely Real" band.
+    # ≥40% suspicious → cap at 35 (Likely Fake zone).
+    # ≥20% suspicious → cap at 50 (Uncertain/Suspicious zone).
+    if suspicious_ratio >= 0.40:
+        score = min(score, 35)
+    elif suspicious_ratio >= 0.20:
+        score = min(score, 50)
     label, severity = get_verdict_label(score)
     return score, label, severity