fakeshield-api / benchmark_eval.py
Akash4911's picture
Initial Deploy: FakeShield Backend v2.0 (Sovereign Vanguard)
89e8242
# benchmark_eval.py β€” EVALUATION SUITE
"""
Runs the full FakeShield Video Forensic Lab on real and AI samples.
Generates an accuracy report and forensic signal breakdown.
"""
import os
import json
import time
import sys
# Ensure backend directory is in path
sys.path.append(os.path.join(os.getcwd(), 'backend'))
from app.services.video.video_detector import analyze_video
EVAL_DIR = "backend/eval_samples"
REPORT_FILE = "backend/benchmark_report.json"
def run_benchmark():
if not os.path.exists(EVAL_DIR):
print(f"❌ Error: {EVAL_DIR} not found. Run downloads first.")
return
samples = [f for f in os.listdir(EVAL_DIR) if f.endswith(".mp4")]
if not samples:
print(f"❌ No samples found in {EVAL_DIR}.")
return
print(f"🎬 Starting Benchmark on {len(samples)} samples...")
print("-" * 60)
results = []
for filename in samples:
video_path = os.path.join(EVAL_DIR, filename)
is_ai_ground_truth = filename.startswith("sora")
print(f"πŸ” Analyzing: {filename} (GT: {'AI' if is_ai_ground_truth else 'REAL'})")
start_time = time.time()
try:
# Run analysis
report = analyze_video(video_path, max_frames=32)
elapsed = time.time() - start_time
verdict = report.get("verdict", "UNKNOWN")
fused_score = report.get("fused_score", 0.0)
# Simple accuracy check
is_correct = False
if is_ai_ground_truth and verdict in ["AI_GENERATED", "LIKELY_AI"]:
is_correct = True
elif not is_ai_ground_truth and verdict in ["LIKELY_REAL", "UNCERTAIN"]:
# Uncertain is better than false positive for real samples
is_correct = True
res = {
"filename": filename,
"ground_truth": "AI" if is_ai_ground_truth else "REAL",
"verdict": verdict,
"score": fused_score,
"is_correct": is_correct,
"elapsed_sec": round(elapsed, 2),
"signal_scores": report.get("signal_scores", {}),
"robustness": report.get("robustness", {}),
"provenance": report.get("provenance", {})
}
results.append(res)
status_icon = "βœ…" if is_correct else "❌"
print(f" {status_icon} Verdict: {verdict} ({fused_score}%) | Time: {elapsed:.1f}s")
except Exception as e:
print(f" ❌ Error: {e}")
results.append({
"filename": filename,
"error": str(e)
})
# Summary
total = len([r for r in results if "error" not in r])
correct = len([r for r in results if r.get("is_correct")])
accuracy = (correct / total * 100) if total > 0 else 0
print("-" * 60)
print(f"πŸ“Š BENCHMARK COMPLETE")
print(f"βœ… Accuracy: {accuracy:.1f}% ({correct}/{total})")
# Save report
summary = {
"timestamp": time.ctime(),
"total_samples": total,
"correct": correct,
"accuracy_pct": accuracy,
"results": results
}
with open(REPORT_FILE, "w") as f:
json.dump(summary, f, indent=4)
print(f"πŸ“ Report saved to {REPORT_FILE}")
if __name__ == "__main__":
run_benchmark()