Deepfake Authenticator commited on
Commit
1bfb897
Β·
1 Parent(s): 5797106

feat: C2PA metadata detection + temporal consistency analysis (catches Veo3/Sora/Runway)

Browse files
Files changed (1) hide show
  1. backend/detector.py +384 -39
backend/detector.py CHANGED
@@ -11,10 +11,285 @@ from pathlib import Path
11
  from typing import Optional
12
  import time
13
  import concurrent.futures
 
 
14
 
15
  logger = logging.getLogger(__name__)
16
 
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  # ─────────────────────────────────────────────
19
  # Agent 1: Frame Analyzer Agent
20
  # ─────────────────────────────────────────────
@@ -382,11 +657,48 @@ class DecisionAgent:
382
  class ReportGeneratorAgent:
383
  BASE_THRESHOLD = 0.58 # Restored β€” 0.54 caused false positives
384
 
385
- def generate(self, analysis: dict, metadata: dict, audio: dict | None = None) -> dict:
 
386
  prob = analysis["overall_fake_probability"]
387
  consistency = analysis.get("consistency", 0.5)
388
  coverage = analysis.get("face_coverage", 0.5)
389
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
390
  threshold = self.BASE_THRESHOLD
391
  if consistency >= 0.70 and coverage >= 0.50:
392
  threshold -= 0.06
@@ -397,6 +709,7 @@ class ReportGeneratorAgent:
397
 
398
  visual_fake = prob >= threshold
399
 
 
400
  audio_fake = False
401
  audio_prob = 0.0
402
  if audio and audio.get("available"):
@@ -423,11 +736,12 @@ class ReportGeneratorAgent:
423
  confidence = round(calibrated * 100, 1)
424
  result = "FAKE" if is_fake else "REAL"
425
 
426
- logger.info(
427
- f"Decision: prob={prob:.3f} threshold={threshold:.3f} β†’ {result}"
428
- )
429
 
430
- details = self._build_details(analysis, metadata, prob, is_fake, threshold)
 
 
 
431
  frame_timeline = self._build_timeline(analysis.get("frame_scores", []))
432
 
433
  return {
@@ -457,22 +771,40 @@ class ReportGeneratorAgent:
457
  conf = base + (top - base) * (distance / 0.5) ** 0.6
458
  return float(np.clip(conf, 0.88, 0.99))
459
 
460
- def _build_details(self, analysis, metadata, prob, is_fake, threshold=0.54) -> list[str]:
461
- details = []
462
- frame_scores = analysis.get("frame_scores", [])
 
463
  frames_with_faces = analysis.get("frames_with_faces", 0)
464
  frames_analyzed = analysis.get("frames_analyzed", 0)
465
  probs = [s["fake_probability"] for s in frame_scores] if frame_scores else []
466
 
467
- if is_fake:
468
- if prob > 0.85:
469
- details.append("Very high-confidence deepfake β€” manipulation detected in nearly every frame")
470
- elif prob > 0.72:
471
- details.append("Strong deepfake indicators detected across multiple facial regions")
472
- elif prob > 0.60:
473
- details.append("Significant facial manipulation artifacts identified by AI ensemble")
474
  else:
475
- details.append("Subtle deepfake patterns detected β€” borderline manipulation")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
476
 
477
  if probs:
478
  high_frames = sum(1 for p in probs if p >= 0.60)
@@ -480,19 +812,17 @@ class ReportGeneratorAgent:
480
  details.append(f"Inconsistent manipulation across frames ({pct_high:.0f}% flagged)")
481
 
482
  details.append("Unnatural texture blending detected at facial boundary regions")
483
- details.append("High-frequency noise patterns inconsistent with authentic camera footage")
484
 
485
- if probs:
486
- peak = max(probs)
487
- if peak > 0.90:
488
- details.append(f"Peak frame confidence: {peak*100:.1f}% β€” extremely strong deepfake signal")
489
  else:
490
- if prob < 0.25:
491
- details.append("Strong indicators of authentic, unmanipulated video content")
492
- elif prob < 0.40:
493
- details.append("No significant deepfake artifacts detected by either model")
494
- else:
495
- details.append("Video appears authentic β€” deepfake probability below detection threshold")
 
496
 
497
  details.append("Natural facial texture and lighting consistency observed across frames")
498
  details.append("Compression artifacts consistent with genuine camera-captured footage")
@@ -502,8 +832,6 @@ class ReportGeneratorAgent:
502
 
503
  if frames_with_faces == 0:
504
  details.append("⚠️ No faces detected β€” result based on full-frame artifact analysis only")
505
- elif frames_with_faces < frames_analyzed * 0.25:
506
- details.append(f"⚠️ Low face coverage ({frames_with_faces}/{frames_analyzed} frames)")
507
 
508
  return details
509
 
@@ -523,6 +851,8 @@ class DeepfakeAuthenticator:
523
  self.face_agent = FaceDetectorAgent(min_detection_confidence=0.3)
524
  self.decision_agent = DecisionAgent()
525
  self.report_agent = ReportGeneratorAgent()
 
 
526
  self._audio = None
527
 
528
  def _get_audio(self):
@@ -540,10 +870,14 @@ class DeepfakeAuthenticator:
540
  start = time.time()
541
  logger.info(f"Starting analysis: {video_path} (fast_mode={fast_mode})")
542
 
543
- # Fast mode: fewer frames for extension captures (8s video)
544
  max_frames = 20 if fast_mode else 40
545
 
546
- # Step 1: Extract frames + metadata
 
 
 
 
 
547
  metadata = self.frame_agent.get_video_metadata(video_path)
548
  frames = self.frame_agent.extract_frames(video_path, max_frames=max_frames)
549
 
@@ -557,37 +891,48 @@ class DeepfakeAuthenticator:
557
  "audio": {"available": False, "result": "NO_AUDIO", "confidence": 0, "details": []},
558
  }
559
 
560
- # Step 2 & 3: Face detection + audio run in parallel
 
 
 
561
  audio_result = {"available": False, "result": "NO_AUDIO", "confidence": 0, "details": []}
562
 
563
  with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
564
- # Face detection (all frames in one MediaPipe context)
565
  face_future = executor.submit(self.face_agent.detect_all_frames, frames)
566
-
567
- # Audio analysis runs concurrently
568
  audio_agent = self._get_audio()
569
  audio_future = None
570
  if audio_agent:
571
  audio_future = executor.submit(audio_agent.analyze, video_path, 0.5)
572
 
573
  face_crops_per_frame = face_future.result()
574
-
575
  if audio_future:
576
  try:
577
  audio_result = audio_future.result(timeout=30)
578
  except Exception as e:
579
  logger.warning(f"Audio analysis failed: {e}")
580
 
581
- # Step 4: Visual decision (batched inference)
582
  analysis = self.decision_agent.analyze_frames(frames, face_crops_per_frame)
583
 
584
- # Step 5: Generate report
585
- report = self.report_agent.generate(analysis, metadata, audio_result)
 
 
 
 
586
  report["processing_time_sec"] = round(time.time() - start, 2)
587
  report["audio"] = audio_result
 
 
 
 
 
 
588
 
589
  logger.info(
590
  f"Analysis complete: {report['result']} ({report['confidence']}%) "
 
 
591
  f"in {report['processing_time_sec']}s"
592
  )
593
  return report
 
11
  from typing import Optional
12
  import time
13
  import concurrent.futures
14
+ import struct
15
+ import json
16
 
17
  logger = logging.getLogger(__name__)
18
 
19
 
20
+ # ─────────────────────────────────────────────
21
+ # Agent 0a: C2PA / Metadata Agent
22
+ # Detects Content Credentials from AI generators
23
+ # (Veo3, Sora, Runway, Firefly, DALL-E, etc.)
24
+ # ─────────────────────────────────────────────
25
+ class MetadataAgent:
26
+ # Known AI generator signatures in file metadata
27
+ AI_GENERATOR_SIGNATURES = [
28
+ # C2PA / Content Credentials markers
29
+ b'c2pa', b'C2PA', b'jumbf', b'JUMBF',
30
+ # Google Veo / DeepMind
31
+ b'veo', b'Veo', b'google/veo',
32
+ # OpenAI Sora
33
+ b'sora', b'Sora', b'openai',
34
+ # Runway
35
+ b'runway', b'Runway',
36
+ # Stability AI
37
+ b'stability', b'StableDiffusion', b'stable-diffusion',
38
+ # Meta
39
+ b'emu_video', b'EmuVideo',
40
+ # Adobe Firefly
41
+ b'firefly', b'adobe:firefly',
42
+ # Pika
43
+ b'pika', b'PikaLabs',
44
+ # Kling
45
+ b'kling', b'KlingAI',
46
+ # General AI markers
47
+ b'ai_generated', b'AI_GENERATED', b'synthetic_media',
48
+ b'generative_ai', b'text_to_video', b'diffusion_model',
49
+ # XMP metadata markers
50
+ b'<dc:creator>AI</dc:creator>',
51
+ b'xmp:CreatorTool>AI',
52
+ b'Kling', b'HailuoAI', b'MiniMax',
53
+ ]
54
+
55
+ # Known AI tool names in metadata strings
56
+ AI_TOOL_NAMES = [
57
+ 'veo', 'sora', 'runway', 'pika', 'kling', 'hailuo', 'minimax',
58
+ 'stable diffusion', 'stablediffusion', 'midjourney', 'dall-e',
59
+ 'firefly', 'emu video', 'lumiere', 'imagen video', 'phenaki',
60
+ 'make-a-video', 'cogvideo', 'text2video', 'gen-2', 'gen-3',
61
+ 'ai generated', 'synthetic', 'generative',
62
+ ]
63
+
64
+ def analyze(self, video_path: str) -> dict:
65
+ """
66
+ Scan file bytes and metadata for AI generator signatures.
67
+ Returns result dict with found signals.
68
+ """
69
+ result = {
70
+ "ai_signatures_found": [],
71
+ "c2pa_detected": False,
72
+ "ai_tool_detected": None,
73
+ "is_ai_generated": False,
74
+ "confidence": 0.0,
75
+ }
76
+
77
+ try:
78
+ path = Path(video_path)
79
+ if not path.exists():
80
+ return result
81
+
82
+ # Read first 512KB and last 64KB (metadata is usually at start/end)
83
+ file_size = path.stat().st_size
84
+ with open(video_path, 'rb') as f:
85
+ header = f.read(min(524288, file_size))
86
+ if file_size > 524288:
87
+ f.seek(max(0, file_size - 65536))
88
+ footer = f.read(65536)
89
+ else:
90
+ footer = b''
91
+
92
+ scan_data = header + footer
93
+ scan_lower = scan_data.lower()
94
+
95
+ # Check binary signatures
96
+ for sig in self.AI_GENERATOR_SIGNATURES:
97
+ if sig.lower() in scan_lower:
98
+ result["ai_signatures_found"].append(sig.decode(errors='ignore').strip())
99
+ if b'c2pa' in sig.lower() or b'jumbf' in sig.lower():
100
+ result["c2pa_detected"] = True
101
+
102
+ # Check readable text sections for tool names
103
+ try:
104
+ text_content = scan_data.decode('utf-8', errors='ignore').lower()
105
+ for tool in self.AI_TOOL_NAMES:
106
+ if tool in text_content:
107
+ result["ai_tool_detected"] = tool
108
+ result["ai_signatures_found"].append(f"tool:{tool}")
109
+ break
110
+ except Exception:
111
+ pass
112
+
113
+ # Check MP4/MOV metadata boxes (udta, Β©too, Β©swr, XMP)
114
+ try:
115
+ mp4_meta = self._parse_mp4_metadata(video_path)
116
+ for key, val in mp4_meta.items():
117
+ val_lower = str(val).lower()
118
+ for tool in self.AI_TOOL_NAMES:
119
+ if tool in val_lower:
120
+ result["ai_tool_detected"] = f"{key}:{tool}"
121
+ result["ai_signatures_found"].append(f"mp4:{key}={val[:60]}")
122
+ break
123
+ except Exception:
124
+ pass
125
+
126
+ # Determine final verdict
127
+ n_signals = len(set(result["ai_signatures_found"]))
128
+ if result["c2pa_detected"]:
129
+ result["is_ai_generated"] = True
130
+ result["confidence"] = 0.98
131
+ elif n_signals >= 2:
132
+ result["is_ai_generated"] = True
133
+ result["confidence"] = 0.92
134
+ elif n_signals == 1:
135
+ result["is_ai_generated"] = True
136
+ result["confidence"] = 0.82
137
+
138
+ if result["is_ai_generated"]:
139
+ logger.info(
140
+ f"AI metadata detected: c2pa={result['c2pa_detected']} "
141
+ f"tool={result['ai_tool_detected']} "
142
+ f"signals={result['ai_signatures_found'][:3]}"
143
+ )
144
+
145
+ except Exception as e:
146
+ logger.warning(f"Metadata analysis failed: {e}")
147
+
148
+ return result
149
+
150
+ def _parse_mp4_metadata(self, video_path: str) -> dict:
151
+ """Parse MP4 metadata boxes for software/creator tags."""
152
+ meta = {}
153
+ try:
154
+ with open(video_path, 'rb') as f:
155
+ data = f.read(min(2097152, Path(video_path).stat().st_size)) # first 2MB
156
+
157
+ i = 0
158
+ while i < len(data) - 8:
159
+ try:
160
+ size = struct.unpack('>I', data[i:i+4])[0]
161
+ box = data[i+4:i+8].decode('ascii', errors='ignore')
162
+ if size < 8 or size > len(data):
163
+ i += 1
164
+ continue
165
+ content = data[i+8:i+size]
166
+ # Look for known metadata boxes
167
+ if box in ('Β©too', 'Β©swr', 'Β©cmt', 'Β©nam', 'XMP_', 'uuid'):
168
+ text = content.decode('utf-8', errors='ignore').strip('\x00').strip()
169
+ if text:
170
+ meta[box] = text
171
+ i += size
172
+ except Exception:
173
+ i += 1
174
+ except Exception:
175
+ pass
176
+ return meta
177
+
178
+
179
+ # ─────────────────────────────────────────────
180
+ # Agent 0b: Temporal Consistency Agent
181
+ # Detects frame-to-frame flickering in AI video
182
+ # ─────────────────────────────────────────────
183
+ class TemporalConsistencyAgent:
184
+ """
185
+ Modern AI video generators (Veo3, Sora, Runway) produce subtle
186
+ temporal inconsistencies invisible to the eye but measurable:
187
+ - Texture flickering in hair/background
188
+ - Unnatural motion smoothness (too perfect)
189
+ - Boundary artifacts between face and background
190
+ - Color channel inconsistency across frames
191
+ """
192
+
193
+ def analyze(self, frames: list[np.ndarray]) -> dict:
194
+ if len(frames) < 4:
195
+ return {"score": 0.5, "available": False, "signals": []}
196
+
197
+ signals = []
198
+ scores = []
199
+
200
+ try:
201
+ # ── 1. Pixel-level temporal variance ─────────────────────────
202
+ # AI video: unnaturally low variance in static regions
203
+ # Real video: natural noise/grain causes higher variance
204
+ gray_frames = [cv2.cvtColor(f, cv2.COLOR_BGR2GRAY).astype(np.float32)
205
+ for f in frames]
206
+ stack = np.stack(gray_frames, axis=0) # [N, H, W]
207
+ pixel_var = np.mean(np.var(stack, axis=0)) # mean variance per pixel
208
+
209
+ # Real video: pixel_var typically 50-300
210
+ # AI video: often < 30 (too smooth) or > 500 (flickering)
211
+ if pixel_var < 25:
212
+ scores.append(0.72)
213
+ signals.append(f"Unnaturally smooth temporal texture (var={pixel_var:.1f})")
214
+ elif pixel_var > 600:
215
+ scores.append(0.68)
216
+ signals.append(f"Excessive temporal flickering (var={pixel_var:.1f})")
217
+ else:
218
+ scores.append(0.30)
219
+
220
+ # ── 2. Frame difference consistency ──────────────────────────
221
+ # AI video: frame diffs are too uniform (generated at fixed rate)
222
+ # Real video: natural motion causes variable frame differences
223
+ diffs = []
224
+ for i in range(1, len(gray_frames)):
225
+ diff = np.mean(np.abs(gray_frames[i] - gray_frames[i-1]))
226
+ diffs.append(diff)
227
+
228
+ diff_std = float(np.std(diffs))
229
+ diff_mean = float(np.mean(diffs))
230
+ diff_cv = diff_std / (diff_mean + 1e-8) # coefficient of variation
231
+
232
+ # Real video: CV typically 0.3-0.8 (variable motion)
233
+ # AI video: CV often < 0.15 (too uniform) or > 1.2 (unstable)
234
+ if diff_cv < 0.12:
235
+ scores.append(0.70)
236
+ signals.append(f"Unnaturally uniform motion pattern (CV={diff_cv:.3f})")
237
+ elif diff_cv > 1.3:
238
+ scores.append(0.65)
239
+ signals.append(f"Unstable frame transitions (CV={diff_cv:.3f})")
240
+ else:
241
+ scores.append(0.28)
242
+
243
+ # ── 3. High-frequency temporal noise ─────────────────────────
244
+ # Real cameras have consistent sensor noise patterns
245
+ # AI generators produce different noise each frame
246
+ if len(frames) >= 6:
247
+ noise_vars = []
248
+ for frame in frames:
249
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY).astype(np.float32)
250
+ blur = cv2.GaussianBlur(gray, (5, 5), 0)
251
+ noise = gray - blur
252
+ noise_vars.append(float(np.var(noise)))
253
+
254
+ noise_consistency = float(np.std(noise_vars) / (np.mean(noise_vars) + 1e-8))
255
+ if noise_consistency > 0.5:
256
+ scores.append(0.66)
257
+ signals.append(f"Inconsistent noise pattern across frames ({noise_consistency:.2f})")
258
+ else:
259
+ scores.append(0.30)
260
+
261
+ # ── 4. Color channel temporal stability ───────────────────────
262
+ # AI video often has subtle color shifts between frames
263
+ channel_drifts = []
264
+ for i in range(1, min(len(frames), 15)):
265
+ b1, g1, r1 = cv2.split(frames[i-1].astype(np.float32))
266
+ b2, g2, r2 = cv2.split(frames[i].astype(np.float32))
267
+ drift = abs(np.mean(r1) - np.mean(r2)) + \
268
+ abs(np.mean(g1) - np.mean(g2)) + \
269
+ abs(np.mean(b1) - np.mean(b2))
270
+ channel_drifts.append(drift)
271
+
272
+ mean_drift = float(np.mean(channel_drifts))
273
+ if mean_drift > 8.0:
274
+ scores.append(0.68)
275
+ signals.append(f"Color channel drift between frames ({mean_drift:.1f})")
276
+ else:
277
+ scores.append(0.28)
278
+
279
+ except Exception as e:
280
+ logger.warning(f"Temporal analysis error: {e}")
281
+ return {"score": 0.5, "available": False, "signals": []}
282
+
283
+ final_score = float(np.mean(scores)) if scores else 0.5
284
+ logger.info(f"Temporal score: {final_score:.3f} signals={signals}")
285
+
286
+ return {
287
+ "score": round(final_score, 4),
288
+ "available": True,
289
+ "signals": signals,
290
+ }
291
+
292
+
293
  # ─────────────────────────────────────────────
294
  # Agent 1: Frame Analyzer Agent
295
  # ─────────────────────────────────────────────
 
657
  class ReportGeneratorAgent:
658
  BASE_THRESHOLD = 0.58 # Restored β€” 0.54 caused false positives
659
 
660
+ def generate(self, analysis: dict, metadata: dict, audio: dict | None = None,
661
+ metadata_result: dict | None = None, temporal_result: dict | None = None) -> dict:
662
  prob = analysis["overall_fake_probability"]
663
  consistency = analysis.get("consistency", 0.5)
664
  coverage = analysis.get("face_coverage", 0.5)
665
 
666
+ # ── Metadata hard override (C2PA / AI tool signature) ─────────────
667
+ meta_ai = metadata_result and metadata_result.get("is_ai_generated", False)
668
+ if meta_ai:
669
+ # Hard signal β€” override visual result
670
+ is_fake = True
671
+ calibrated = self._calibrate(max(prob, 0.80))
672
+ confidence = round(calibrated * 100, 1)
673
+ details = self._build_details(
674
+ analysis, metadata, prob, True, self.BASE_THRESHOLD,
675
+ metadata_result=metadata_result, temporal_result=temporal_result
676
+ )
677
+ return {
678
+ "result": "FAKE",
679
+ "confidence": confidence,
680
+ "details": details,
681
+ "frame_timeline": self._build_timeline(analysis.get("frame_scores", [])),
682
+ "metadata": {
683
+ "frames_analyzed": analysis.get("frames_analyzed", 0),
684
+ "frames_with_faces": analysis.get("frames_with_faces", 0),
685
+ "video_duration_sec": metadata.get("duration_sec", 0),
686
+ "video_fps": metadata.get("fps", 0),
687
+ "resolution": f"{metadata.get('width', 0)}x{metadata.get('height', 0)}",
688
+ },
689
+ }
690
+
691
+ # ── Temporal signal boost ─────────────────────────────────────────
692
+ temporal_score = 0.5
693
+ if temporal_result and temporal_result.get("available"):
694
+ temporal_score = temporal_result["score"]
695
+ # Blend temporal into visual probability (20% weight)
696
+ if temporal_score > 0.60:
697
+ prob = prob * 0.80 + temporal_score * 0.20
698
+ prob = round(float(np.clip(prob, 0.0, 1.0)), 4)
699
+ logger.info(f"Temporal boost applied: new prob={prob:.3f}")
700
+
701
+ # ── Adaptive visual threshold ─────────────────────────────────────
702
  threshold = self.BASE_THRESHOLD
703
  if consistency >= 0.70 and coverage >= 0.50:
704
  threshold -= 0.06
 
709
 
710
  visual_fake = prob >= threshold
711
 
712
+ # ── Audio signal ──────────────────────────────────────────────────
713
  audio_fake = False
714
  audio_prob = 0.0
715
  if audio and audio.get("available"):
 
736
  confidence = round(calibrated * 100, 1)
737
  result = "FAKE" if is_fake else "REAL"
738
 
739
+ logger.info(f"Decision: prob={prob:.3f} threshold={threshold:.3f} β†’ {result}")
 
 
740
 
741
+ details = self._build_details(
742
+ analysis, metadata, prob, is_fake, threshold,
743
+ metadata_result=metadata_result, temporal_result=temporal_result
744
+ )
745
  frame_timeline = self._build_timeline(analysis.get("frame_scores", []))
746
 
747
  return {
 
771
  conf = base + (top - base) * (distance / 0.5) ** 0.6
772
  return float(np.clip(conf, 0.88, 0.99))
773
 
774
+ def _build_details(self, analysis, metadata, prob, is_fake, threshold=0.58,
775
+ metadata_result=None, temporal_result=None) -> list[str]:
776
+ details = []
777
+ frame_scores = analysis.get("frame_scores", [])
778
  frames_with_faces = analysis.get("frames_with_faces", 0)
779
  frames_analyzed = analysis.get("frames_analyzed", 0)
780
  probs = [s["fake_probability"] for s in frame_scores] if frame_scores else []
781
 
782
+ # ── Metadata signals (highest priority) ───────────────────────────
783
+ if metadata_result and metadata_result.get("is_ai_generated"):
784
+ tool = metadata_result.get("ai_tool_detected")
785
+ if metadata_result.get("c2pa_detected"):
786
+ details.append("⚠️ C2PA Content Credentials detected β€” video is cryptographically signed as AI-generated")
787
+ if tool:
788
+ details.append(f"AI generation tool identified in metadata: {tool.upper()}")
789
  else:
790
+ details.append("AI generator signature found in file metadata")
791
+
792
+ # ── Temporal signals ──────────────────────────────────────────────
793
+ if temporal_result and temporal_result.get("available") and temporal_result.get("signals"):
794
+ for sig in temporal_result["signals"][:2]:
795
+ details.append(f"Temporal: {sig}")
796
+
797
+ # ── Visual signals ────────────────────────────────────────────────
798
+ if is_fake:
799
+ if not details: # only add if no stronger signal already shown
800
+ if prob > 0.85:
801
+ details.append("Very high-confidence deepfake β€” manipulation detected in nearly every frame")
802
+ elif prob > 0.72:
803
+ details.append("Strong deepfake indicators detected across multiple facial regions")
804
+ elif prob > 0.60:
805
+ details.append("Significant facial manipulation artifacts identified by AI ensemble")
806
+ else:
807
+ details.append("Subtle deepfake patterns detected β€” borderline manipulation")
808
 
809
  if probs:
810
  high_frames = sum(1 for p in probs if p >= 0.60)
 
812
  details.append(f"Inconsistent manipulation across frames ({pct_high:.0f}% flagged)")
813
 
814
  details.append("Unnatural texture blending detected at facial boundary regions")
 
815
 
816
+ if probs and max(probs) > 0.90:
817
+ details.append(f"Peak frame confidence: {max(probs)*100:.1f}% β€” extremely strong signal")
 
 
818
  else:
819
+ if not details:
820
+ if prob < 0.25:
821
+ details.append("Strong indicators of authentic, unmanipulated video content")
822
+ elif prob < 0.40:
823
+ details.append("No significant deepfake artifacts detected by either model")
824
+ else:
825
+ details.append("Video appears authentic β€” deepfake probability below detection threshold")
826
 
827
  details.append("Natural facial texture and lighting consistency observed across frames")
828
  details.append("Compression artifacts consistent with genuine camera-captured footage")
 
832
 
833
  if frames_with_faces == 0:
834
  details.append("⚠️ No faces detected β€” result based on full-frame artifact analysis only")
 
 
835
 
836
  return details
837
 
 
851
  self.face_agent = FaceDetectorAgent(min_detection_confidence=0.3)
852
  self.decision_agent = DecisionAgent()
853
  self.report_agent = ReportGeneratorAgent()
854
+ self.metadata_agent = MetadataAgent()
855
+ self.temporal_agent = TemporalConsistencyAgent()
856
  self._audio = None
857
 
858
  def _get_audio(self):
 
870
  start = time.time()
871
  logger.info(f"Starting analysis: {video_path} (fast_mode={fast_mode})")
872
 
 
873
  max_frames = 20 if fast_mode else 40
874
 
875
+ # Step 1: Metadata check β€” instant, catches Veo3/Sora/Runway signatures
876
+ metadata_result = self.metadata_agent.analyze(video_path)
877
+ if metadata_result["is_ai_generated"]:
878
+ logger.info(f"AI metadata detected: {metadata_result['ai_signatures_found'][:3]}")
879
+
880
+ # Step 2: Extract frames
881
  metadata = self.frame_agent.get_video_metadata(video_path)
882
  frames = self.frame_agent.extract_frames(video_path, max_frames=max_frames)
883
 
 
891
  "audio": {"available": False, "result": "NO_AUDIO", "confidence": 0, "details": []},
892
  }
893
 
894
+ # Step 3: Temporal analysis β€” fast numpy, catches modern AI video patterns
895
+ temporal_result = self.temporal_agent.analyze(frames)
896
+
897
+ # Step 4: Face detection + audio in parallel
898
  audio_result = {"available": False, "result": "NO_AUDIO", "confidence": 0, "details": []}
899
 
900
  with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
 
901
  face_future = executor.submit(self.face_agent.detect_all_frames, frames)
 
 
902
  audio_agent = self._get_audio()
903
  audio_future = None
904
  if audio_agent:
905
  audio_future = executor.submit(audio_agent.analyze, video_path, 0.5)
906
 
907
  face_crops_per_frame = face_future.result()
 
908
  if audio_future:
909
  try:
910
  audio_result = audio_future.result(timeout=30)
911
  except Exception as e:
912
  logger.warning(f"Audio analysis failed: {e}")
913
 
914
+ # Step 5: Visual decision
915
  analysis = self.decision_agent.analyze_frames(frames, face_crops_per_frame)
916
 
917
+ # Step 6: Generate report combining all signals
918
+ report = self.report_agent.generate(
919
+ analysis, metadata, audio_result,
920
+ metadata_result=metadata_result,
921
+ temporal_result=temporal_result,
922
+ )
923
  report["processing_time_sec"] = round(time.time() - start, 2)
924
  report["audio"] = audio_result
925
+ report["metadata_check"] = {
926
+ "ai_generated": metadata_result["is_ai_generated"],
927
+ "c2pa_detected": metadata_result["c2pa_detected"],
928
+ "tool_detected": metadata_result["ai_tool_detected"],
929
+ "signals": metadata_result["ai_signatures_found"][:5],
930
+ }
931
 
932
  logger.info(
933
  f"Analysis complete: {report['result']} ({report['confidence']}%) "
934
+ f"meta_ai={metadata_result['is_ai_generated']} "
935
+ f"temporal={temporal_result['score']:.3f} "
936
  f"in {report['processing_time_sec']}s"
937
  )
938
  return report