Deepfake Authenticator commited on
Commit Β·
da69892
1
Parent(s): d893104
fix: adaptive threshold to balance false positives and false negatives
Browse files- Replace fixed threshold (0.65) with adaptive threshold (base 0.58)
- Lower threshold when consistency >= 0.70 AND coverage >= 0.50 (strong signal)
- Raise threshold when consistency < 0.35 (inconsistent = likely false positive)
- Switch aggregation from mean+p60 to mean+median (more robust)
- Add consistency and face_coverage metrics to analysis output
- Fixes: real comedian video flagged as FAKE, Morgan Freeman deepfake missed
- backend/detector.py +63 -32
backend/detector.py
CHANGED
|
@@ -334,13 +334,17 @@ class DecisionAgent:
|
|
| 334 |
face_crops_per_frame: list[list[np.ndarray]],
|
| 335 |
) -> dict:
|
| 336 |
"""
|
| 337 |
-
Aggregate predictions
|
| 338 |
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
"""
|
| 345 |
frame_scores = []
|
| 346 |
frames_with_faces = 0
|
|
@@ -361,7 +365,6 @@ class DecisionAgent:
|
|
| 361 |
continue
|
| 362 |
|
| 363 |
frames_with_faces += 1
|
| 364 |
-
# Mean across valid faces in this frame (not max)
|
| 365 |
frame_score = float(np.mean(valid_probs))
|
| 366 |
frame_scores.append({"frame_index": i, "fake_probability": round(frame_score, 4)})
|
| 367 |
|
|
@@ -371,37 +374,46 @@ class DecisionAgent:
|
|
| 371 |
if not frame_scores:
|
| 372 |
return {
|
| 373 |
"frame_scores": [],
|
| 374 |
-
"overall_fake_probability": 0.
|
| 375 |
"frames_analyzed": len(frames),
|
| 376 |
"frames_with_faces": 0,
|
|
|
|
|
|
|
| 377 |
}
|
| 378 |
|
| 379 |
probs = [s["fake_probability"] for s in frame_scores]
|
| 380 |
|
| 381 |
-
# Need at least 3 valid frames for a reliable result
|
| 382 |
if len(probs) < 3:
|
| 383 |
-
|
| 384 |
-
overall = float(np.mean(probs)) * 0.85 # dampen uncertain results
|
| 385 |
else:
|
| 386 |
-
mean_prob
|
| 387 |
-
|
| 388 |
-
#
|
| 389 |
-
overall
|
| 390 |
|
| 391 |
overall = round(float(np.clip(overall, 0.0, 1.0)), 4)
|
| 392 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
logger.info(
|
| 394 |
-
f"Scores β mean:
|
| 395 |
-
f"
|
| 396 |
-
f"final:
|
| 397 |
-
f"
|
|
|
|
| 398 |
)
|
| 399 |
|
| 400 |
return {
|
| 401 |
-
"frame_scores":
|
| 402 |
"overall_fake_probability": overall,
|
| 403 |
"frames_analyzed": len(frames),
|
| 404 |
"frames_with_faces": frames_with_faces,
|
|
|
|
|
|
|
| 405 |
}
|
| 406 |
|
| 407 |
|
|
@@ -410,16 +422,40 @@ class DecisionAgent:
|
|
| 410 |
# Builds the final human-readable report
|
| 411 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
| 412 |
class ReportGeneratorAgent:
|
| 413 |
-
|
|
|
|
| 414 |
|
| 415 |
def generate(self, analysis: dict, metadata: dict) -> dict:
|
| 416 |
-
prob
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 417 |
calibrated = self._calibrate(prob)
|
| 418 |
confidence = round(calibrated * 100, 1)
|
| 419 |
-
is_fake = prob >= self.FAKE_THRESHOLD
|
| 420 |
result = "FAKE" if is_fake else "REAL"
|
| 421 |
|
| 422 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 423 |
frame_timeline = self._build_timeline(analysis.get("frame_scores", []))
|
| 424 |
|
| 425 |
return {
|
|
@@ -438,16 +474,11 @@ class ReportGeneratorAgent:
|
|
| 438 |
|
| 439 |
@staticmethod
|
| 440 |
def _calibrate(prob: float) -> float:
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
Avoids over-inflating borderline scores (0.55-0.65 range).
|
| 444 |
-
"""
|
| 445 |
-
x = (prob - 0.5) * 2.5 # gentler amplification than before
|
| 446 |
-
stretched = np.tanh(x) * 0.5 + 0.5
|
| 447 |
-
return float(np.clip(stretched, 0.01, 0.99))
|
| 448 |
|
| 449 |
def _build_details(
|
| 450 |
-
self, analysis: dict, metadata: dict, prob: float, is_fake: bool
|
| 451 |
) -> list[str]:
|
| 452 |
details = []
|
| 453 |
frame_scores = analysis.get("frame_scores", [])
|
|
|
|
| 334 |
face_crops_per_frame: list[list[np.ndarray]],
|
| 335 |
) -> dict:
|
| 336 |
"""
|
| 337 |
+
Aggregate predictions with adaptive scoring.
|
| 338 |
|
| 339 |
+
Key insight: deepfakes have CONSISTENTLY elevated scores across many
|
| 340 |
+
frames, while false positives on real videos tend to have a few
|
| 341 |
+
outlier frames with high scores but low overall consistency.
|
| 342 |
+
|
| 343 |
+
Strategy:
|
| 344 |
+
- Quality-gate blurry crops
|
| 345 |
+
- Per-frame: mean of valid face scores
|
| 346 |
+
- Final: weighted blend of mean + median (robust to outliers)
|
| 347 |
+
- Also return consistency metrics for adaptive thresholding
|
| 348 |
"""
|
| 349 |
frame_scores = []
|
| 350 |
frames_with_faces = 0
|
|
|
|
| 365 |
continue
|
| 366 |
|
| 367 |
frames_with_faces += 1
|
|
|
|
| 368 |
frame_score = float(np.mean(valid_probs))
|
| 369 |
frame_scores.append({"frame_index": i, "fake_probability": round(frame_score, 4)})
|
| 370 |
|
|
|
|
| 374 |
if not frame_scores:
|
| 375 |
return {
|
| 376 |
"frame_scores": [],
|
| 377 |
+
"overall_fake_probability": 0.40,
|
| 378 |
"frames_analyzed": len(frames),
|
| 379 |
"frames_with_faces": 0,
|
| 380 |
+
"consistency": 0.0,
|
| 381 |
+
"face_coverage": 0.0,
|
| 382 |
}
|
| 383 |
|
| 384 |
probs = [s["fake_probability"] for s in frame_scores]
|
| 385 |
|
|
|
|
| 386 |
if len(probs) < 3:
|
| 387 |
+
overall = float(np.mean(probs)) * 0.80
|
|
|
|
| 388 |
else:
|
| 389 |
+
mean_prob = float(np.mean(probs))
|
| 390 |
+
median_prob = float(np.median(probs))
|
| 391 |
+
# Mean+median blend: robust to both outliers and sparse fakes
|
| 392 |
+
overall = mean_prob * 0.65 + median_prob * 0.35
|
| 393 |
|
| 394 |
overall = round(float(np.clip(overall, 0.0, 1.0)), 4)
|
| 395 |
|
| 396 |
+
# Consistency: fraction of frames above 0.50 β high for real deepfakes
|
| 397 |
+
consistency = sum(1 for p in probs if p > 0.50) / len(probs)
|
| 398 |
+
|
| 399 |
+
# Face coverage: how much of the video had detectable faces
|
| 400 |
+
face_coverage = frames_with_faces / max(len(frames), 1)
|
| 401 |
+
|
| 402 |
logger.info(
|
| 403 |
+
f"Scores β mean:{float(np.mean(probs)):.3f} "
|
| 404 |
+
f"median:{float(np.median(probs)):.3f} "
|
| 405 |
+
f"final:{overall:.3f} "
|
| 406 |
+
f"consistency:{consistency:.2f} "
|
| 407 |
+
f"coverage:{face_coverage:.2f}"
|
| 408 |
)
|
| 409 |
|
| 410 |
return {
|
| 411 |
+
"frame_scores": frame_scores,
|
| 412 |
"overall_fake_probability": overall,
|
| 413 |
"frames_analyzed": len(frames),
|
| 414 |
"frames_with_faces": frames_with_faces,
|
| 415 |
+
"consistency": round(consistency, 3),
|
| 416 |
+
"face_coverage": round(face_coverage, 3),
|
| 417 |
}
|
| 418 |
|
| 419 |
|
|
|
|
| 422 |
# Builds the final human-readable report
|
| 423 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
| 424 |
class ReportGeneratorAgent:
|
| 425 |
+
# Base threshold β adjusted adaptively per video
|
| 426 |
+
BASE_THRESHOLD = 0.58
|
| 427 |
|
| 428 |
def generate(self, analysis: dict, metadata: dict) -> dict:
|
| 429 |
+
prob = analysis["overall_fake_probability"]
|
| 430 |
+
consistency = analysis.get("consistency", 0.5)
|
| 431 |
+
coverage = analysis.get("face_coverage", 0.5)
|
| 432 |
+
|
| 433 |
+
# ββ Adaptive threshold ββββββββββββββββββββββββββββββββββββββββ
|
| 434 |
+
# Lower threshold when:
|
| 435 |
+
# - High consistency (many frames agree it's fake) β easier to flag
|
| 436 |
+
# - High face coverage (face visible throughout) β more reliable signal
|
| 437 |
+
# Raise threshold when:
|
| 438 |
+
# - Low consistency (only a few frames look fake) β likely false positive
|
| 439 |
+
# - Low coverage (face rarely visible) β unreliable signal
|
| 440 |
+
threshold = self.BASE_THRESHOLD
|
| 441 |
+
if consistency >= 0.70 and coverage >= 0.50:
|
| 442 |
+
threshold -= 0.06 # 0.52 β confident signal, lower bar
|
| 443 |
+
elif consistency >= 0.55:
|
| 444 |
+
threshold -= 0.03 # 0.55
|
| 445 |
+
elif consistency < 0.35:
|
| 446 |
+
threshold += 0.07 # 0.65 β inconsistent, raise bar
|
| 447 |
+
|
| 448 |
+
is_fake = prob >= threshold
|
| 449 |
calibrated = self._calibrate(prob)
|
| 450 |
confidence = round(calibrated * 100, 1)
|
|
|
|
| 451 |
result = "FAKE" if is_fake else "REAL"
|
| 452 |
|
| 453 |
+
logger.info(
|
| 454 |
+
f"Decision: prob={prob:.3f} threshold={threshold:.3f} "
|
| 455 |
+
f"consistency={consistency:.2f} coverage={coverage:.2f} β {result}"
|
| 456 |
+
)
|
| 457 |
+
|
| 458 |
+
details = self._build_details(analysis, metadata, prob, is_fake, threshold)
|
| 459 |
frame_timeline = self._build_timeline(analysis.get("frame_scores", []))
|
| 460 |
|
| 461 |
return {
|
|
|
|
| 474 |
|
| 475 |
@staticmethod
|
| 476 |
def _calibrate(prob: float) -> float:
|
| 477 |
+
x = (prob - 0.5) * 2.8
|
| 478 |
+
return float(np.clip(np.tanh(x) * 0.5 + 0.5, 0.01, 0.99))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 479 |
|
| 480 |
def _build_details(
|
| 481 |
+
self, analysis: dict, metadata: dict, prob: float, is_fake: bool, threshold: float = 0.58
|
| 482 |
) -> list[str]:
|
| 483 |
details = []
|
| 484 |
frame_scores = analysis.get("frame_scores", [])
|