Spaces:

Akash4911
/

fakeshield-api

Running

App Files Files Community

fakeshield-api / benchmark_eval.py

Akash4911

Initial Deploy: FakeShield Backend v2.0 (Sovereign Vanguard)

89e8242 4 days ago

raw

history blame contribute delete

3.44 kB

	# benchmark_eval.py — EVALUATION SUITE
	"""
	Runs the full FakeShield Video Forensic Lab on real and AI samples.
	Generates an accuracy report and forensic signal breakdown.
	"""
	import os
	import json
	import time
	import sys

	# Ensure backend directory is in path
	sys.path.append(os.path.join(os.getcwd(), 'backend'))

	from app.services.video.video_detector import analyze_video

	EVAL_DIR = "backend/eval_samples"
	REPORT_FILE = "backend/benchmark_report.json"

	def run_benchmark():
	if not os.path.exists(EVAL_DIR):
	print(f"❌ Error: {EVAL_DIR} not found. Run downloads first.")
	return

	samples = [f for f in os.listdir(EVAL_DIR) if f.endswith(".mp4")]
	if not samples:
	print(f"❌ No samples found in {EVAL_DIR}.")
	return

	print(f"🎬 Starting Benchmark on {len(samples)} samples...")
	print("-" * 60)

	results = []

	for filename in samples:
	video_path = os.path.join(EVAL_DIR, filename)
	is_ai_ground_truth = filename.startswith("sora")

	print(f"🔍 Analyzing: {filename} (GT: {'AI' if is_ai_ground_truth else 'REAL'})")

	start_time = time.time()
	try:
	# Run analysis
	report = analyze_video(video_path, max_frames=32)
	elapsed = time.time() - start_time

	verdict = report.get("verdict", "UNKNOWN")
	fused_score = report.get("fused_score", 0.0)

	# Simple accuracy check
	is_correct = False
	if is_ai_ground_truth and verdict in ["AI_GENERATED", "LIKELY_AI"]:
	is_correct = True
	elif not is_ai_ground_truth and verdict in ["LIKELY_REAL", "UNCERTAIN"]:
	# Uncertain is better than false positive for real samples
	is_correct = True

	res = {
	"filename": filename,
	"ground_truth": "AI" if is_ai_ground_truth else "REAL",
	"verdict": verdict,
	"score": fused_score,
	"is_correct": is_correct,
	"elapsed_sec": round(elapsed, 2),
	"signal_scores": report.get("signal_scores", {}),
	"robustness": report.get("robustness", {}),
	"provenance": report.get("provenance", {})
	}
	results.append(res)

	status_icon = "✅" if is_correct else "❌"
	print(f" {status_icon} Verdict: {verdict} ({fused_score}%) \| Time: {elapsed:.1f}s")

	except Exception as e:
	print(f" ❌ Error: {e}")
	results.append({
	"filename": filename,
	"error": str(e)
	})

	# Summary
	total = len([r for r in results if "error" not in r])
	correct = len([r for r in results if r.get("is_correct")])
	accuracy = (correct / total * 100) if total > 0 else 0

	print("-" * 60)
	print(f"📊 BENCHMARK COMPLETE")
	print(f"✅ Accuracy: {accuracy:.1f}% ({correct}/{total})")

	# Save report
	summary = {
	"timestamp": time.ctime(),
	"total_samples": total,
	"correct": correct,
	"accuracy_pct": accuracy,
	"results": results
	}

	with open(REPORT_FILE, "w") as f:
	json.dump(summary, f, indent=4)

	print(f"📝 Report saved to {REPORT_FILE}")

	if __name__ == "__main__":
	run_benchmark()