ar07xd commited on
Commit
36529c1
·
verified ·
1 Parent(s): 72ccda8

Sync from GitHub via hub-sync

Browse files
api/v1/analyze.py CHANGED
@@ -97,7 +97,6 @@ def _resize_for_vis(pil) -> "Image.Image":
97
  scale = _VIS_MAX_PX / max(w, h)
98
  return pil.resize((int(w * scale), int(h * scale)), Image.LANCZOS)
99
  VIDEO_MAX_MB = 100
100
- VIDEO_NUM_FRAMES = 16
101
 
102
  _IMAGE_EXCLUDE = {"explainability": {"heatmap_base64", "ela_base64", "boxes_base64"}}
103
 
@@ -575,7 +574,7 @@ async def analyze_video_endpoint(
575
  return VideoAnalysisResponse.model_validate(payload)
576
 
577
  try:
578
- agg = analyze_video(path, num_frames=VIDEO_NUM_FRAMES)
579
  stages.append("frame_extraction")
580
  stages.append("frame_classification")
581
  stages.append("aggregation")
@@ -600,6 +599,8 @@ async def analyze_video_endpoint(
600
  # Phase 17.3 — combined verdict formula
601
  score, label, severity = compute_video_authenticity_score(
602
  mean_suspicious_prob=agg.mean_suspicious_prob,
 
 
603
  insufficient_faces=agg.insufficient_faces,
604
  temporal_score=agg.temporal.temporal_score if agg.temporal else None,
605
  audio_authenticity_score=audio_result.audio_authenticity_score if audio_result else None,
@@ -1139,7 +1140,7 @@ async def analyze_video_async(
1139
  local_db = SessionLocal()
1140
  try:
1141
  progress("frame_extraction", 15)
1142
- agg = analyze_video(path, num_frames=VIDEO_NUM_FRAMES)
1143
  progress("aggregation", 60)
1144
 
1145
  audio_result = None
@@ -1151,6 +1152,8 @@ async def analyze_video_async(
1151
 
1152
  score_val, label_val, sev = compute_video_authenticity_score(
1153
  mean_suspicious_prob=agg.mean_suspicious_prob,
 
 
1154
  insufficient_faces=agg.insufficient_faces,
1155
  temporal_score=agg.temporal.temporal_score if agg.temporal else None,
1156
  audio_authenticity_score=audio_result.audio_authenticity_score if audio_result else None,
 
97
  scale = _VIS_MAX_PX / max(w, h)
98
  return pil.resize((int(w * scale), int(h * scale)), Image.LANCZOS)
99
  VIDEO_MAX_MB = 100
 
100
 
101
  _IMAGE_EXCLUDE = {"explainability": {"heatmap_base64", "ela_base64", "boxes_base64"}}
102
 
 
574
  return VideoAnalysisResponse.model_validate(payload)
575
 
576
  try:
577
+ agg = analyze_video(path, num_frames=settings.VIDEO_SAMPLE_FRAMES)
578
  stages.append("frame_extraction")
579
  stages.append("frame_classification")
580
  stages.append("aggregation")
 
599
  # Phase 17.3 — combined verdict formula
600
  score, label, severity = compute_video_authenticity_score(
601
  mean_suspicious_prob=agg.mean_suspicious_prob,
602
+ max_suspicious_prob=agg.max_suspicious_prob,
603
+ suspicious_ratio=agg.suspicious_ratio,
604
  insufficient_faces=agg.insufficient_faces,
605
  temporal_score=agg.temporal.temporal_score if agg.temporal else None,
606
  audio_authenticity_score=audio_result.audio_authenticity_score if audio_result else None,
 
1140
  local_db = SessionLocal()
1141
  try:
1142
  progress("frame_extraction", 15)
1143
+ agg = analyze_video(path, num_frames=settings.VIDEO_SAMPLE_FRAMES)
1144
  progress("aggregation", 60)
1145
 
1146
  audio_result = None
 
1152
 
1153
  score_val, label_val, sev = compute_video_authenticity_score(
1154
  mean_suspicious_prob=agg.mean_suspicious_prob,
1155
+ max_suspicious_prob=agg.max_suspicious_prob,
1156
+ suspicious_ratio=agg.suspicious_ratio,
1157
  insufficient_faces=agg.insufficient_faces,
1158
  temporal_score=agg.temporal.temporal_score if agg.temporal else None,
1159
  audio_authenticity_score=audio_result.audio_authenticity_score if audio_result else None,
config.py CHANGED
@@ -273,7 +273,7 @@ class Settings(BaseSettings):
273
  # face forgery frames, so it is the dominant signal for video analysis.
274
  VIDEO_FFPP_WEIGHT: float = 0.70
275
  VIDEO_EFFNET_WEIGHT: float = 0.30
276
- VIDEO_SAMPLE_FRAMES: int = 16 # frames to sample per video for inference
277
  EXIFTOOL_PATH: str = "" # full path to ExifTool binary; empty = metadata write disabled
278
 
279
  # Auth
 
273
  # face forgery frames, so it is the dominant signal for video analysis.
274
  VIDEO_FFPP_WEIGHT: float = 0.70
275
  VIDEO_EFFNET_WEIGHT: float = 0.30
276
+ VIDEO_SAMPLE_FRAMES: int = 32 # frames to sample per video for inference
277
  EXIFTOOL_PATH: str = "" # full path to ExifTool binary; empty = metadata write disabled
278
 
279
  # Auth
services/audio_service.py CHANGED
@@ -50,7 +50,11 @@ def _extract_audio_wav(video_path: str, out_path: str) -> bool:
50
  capture_output=True,
51
  timeout=60,
52
  )
53
- return result.returncode == 0 and os.path.getsize(out_path) > 0
 
 
 
 
54
  except (FileNotFoundError, subprocess.TimeoutExpired, OSError) as exc:
55
  logger.warning(f"ffmpeg audio extraction failed: {exc}")
56
  return False
 
50
  capture_output=True,
51
  timeout=60,
52
  )
53
+ if result.returncode != 0 or not os.path.exists(out_path) or os.path.getsize(out_path) == 0:
54
+ stderr_tail = result.stderr.decode(errors="replace")[-400:].strip()
55
+ logger.warning(f"ffmpeg exited {result.returncode} — {stderr_tail or '(no stderr)'}")
56
+ return False
57
+ return True
58
  except (FileNotFoundError, subprocess.TimeoutExpired, OSError) as exc:
59
  logger.warning(f"ffmpeg audio extraction failed: {exc}")
60
  return False
services/video_service.py CHANGED
@@ -188,7 +188,7 @@ def _analyze_with_efficientnet(
188
  label=label,
189
  confidence=fake_prob,
190
  suspicious_prob=fake_prob,
191
- is_suspicious=(fake_prob >= 0.5) and has_face,
192
  has_face=has_face,
193
  scored=bool(has_face and faces),
194
  )
@@ -212,7 +212,7 @@ def _analyze_with_vit(
212
  label=vit_label,
213
  confidence=vit_fake_prob,
214
  suspicious_prob=vit_fake_prob,
215
- is_suspicious=(vit_fake_prob >= 0.5) and face,
216
  has_face=face,
217
  scored=face,
218
  )
 
188
  label=label,
189
  confidence=fake_prob,
190
  suspicious_prob=fake_prob,
191
+ is_suspicious=(fake_prob >= 0.40) and has_face,
192
  has_face=has_face,
193
  scored=bool(has_face and faces),
194
  )
 
212
  label=vit_label,
213
  confidence=vit_fake_prob,
214
  suspicious_prob=vit_fake_prob,
215
+ is_suspicious=(vit_fake_prob >= 0.40) and face,
216
  has_face=face,
217
  scored=face,
218
  )
utils/scoring.py CHANGED
@@ -70,6 +70,8 @@ def apply_unverified_news_gate(
70
  def compute_video_authenticity_score(
71
  *,
72
  mean_suspicious_prob: float,
 
 
73
  insufficient_faces: bool,
74
  temporal_score: float | None = None,
75
  audio_authenticity_score: float | None = None,
@@ -80,6 +82,14 @@ def compute_video_authenticity_score(
80
  Face-model evidence is authoritative only when enough face frames were
81
  scored. If face content is insufficient, use temporal/audio evidence when
82
  available instead of forcing a neutral result.
 
 
 
 
 
 
 
 
83
  """
84
  if insufficient_faces:
85
  evidence: list[tuple[float, float]] = []
@@ -97,7 +107,12 @@ def compute_video_authenticity_score(
97
  label, severity = get_verdict_label(score)
98
  return score, label, severity
99
 
100
- visual_score = (1.0 - float(mean_suspicious_prob)) * 100.0
 
 
 
 
 
101
  temporal_sc = float(temporal_score) if temporal_score is not None else visual_score
102
  if has_audio and audio_authenticity_score is not None:
103
  _validate_weight_total([0.50, 0.30, 0.20], "video audio+temporal fusion")
@@ -106,6 +121,16 @@ def compute_video_authenticity_score(
106
  _validate_weight_total([0.70, 0.30], "video visual+temporal fusion")
107
  combined = 0.70 * visual_score + 0.30 * temporal_sc
108
  score = int(round(max(0.0, min(100.0, combined))))
 
 
 
 
 
 
 
 
 
 
109
  label, severity = get_verdict_label(score)
110
  return score, label, severity
111
 
 
70
  def compute_video_authenticity_score(
71
  *,
72
  mean_suspicious_prob: float,
73
+ max_suspicious_prob: float = 0.0,
74
+ suspicious_ratio: float = 0.0,
75
  insufficient_faces: bool,
76
  temporal_score: float | None = None,
77
  audio_authenticity_score: float | None = None,
 
82
  Face-model evidence is authoritative only when enough face frames were
83
  scored. If face content is insufficient, use temporal/audio evidence when
84
  available instead of forcing a neutral result.
85
+
86
+ The effective visual fake probability blends the per-frame mean with the
87
+ per-frame maximum (65/35 split). This prevents a deepfake from hiding
88
+ behind many clean frames: even a cluster of highly-suspicious frames
89
+ raises the combined score meaningfully.
90
+
91
+ A suspicious_ratio cap prevents a misleadingly high authenticity score when
92
+ a significant fraction of frames are flagged regardless of the mean.
93
  """
94
  if insufficient_faces:
95
  evidence: list[tuple[float, float]] = []
 
107
  label, severity = get_verdict_label(score)
108
  return score, label, severity
109
 
110
+ # Blend mean and max: mean alone is easily diluted by clean frames.
111
+ # 65% mean keeps the overall distribution; 35% max ensures a cluster of
112
+ # highly-suspicious frames cannot be hidden by majority-clean frames.
113
+ effective_prob = 0.65 * float(mean_suspicious_prob) + 0.35 * float(max_suspicious_prob)
114
+ visual_score = (1.0 - effective_prob) * 100.0
115
+
116
  temporal_sc = float(temporal_score) if temporal_score is not None else visual_score
117
  if has_audio and audio_authenticity_score is not None:
118
  _validate_weight_total([0.50, 0.30, 0.20], "video audio+temporal fusion")
 
121
  _validate_weight_total([0.70, 0.30], "video visual+temporal fusion")
122
  combined = 0.70 * visual_score + 0.30 * temporal_sc
123
  score = int(round(max(0.0, min(100.0, combined))))
124
+
125
+ # Suspicious-ratio caps: when a meaningful fraction of frames are flagged,
126
+ # prevent the score from landing in a confident "Likely Real" band.
127
+ # ≥40% suspicious → cap at 35 (Likely Fake zone).
128
+ # ≥20% suspicious → cap at 50 (Uncertain/Suspicious zone).
129
+ if suspicious_ratio >= 0.40:
130
+ score = min(score, 35)
131
+ elif suspicious_ratio >= 0.20:
132
+ score = min(score, 50)
133
+
134
  label, severity = get_verdict_label(score)
135
  return score, label, severity
136