import torch from torchvision import transforms from PIL import Image import cv2 import numpy as np from detector_config import ( ALLOW_LOCAL_MODEL_FALLBACK, VIDEO_DETECTOR_BACKEND, VIDEO_FAKE_THRESHOLD, VIDEO_UNCERTAIN_MARGIN, ) from model_loader import get_video_model def build_video_insight(result, confidence, fake_score, real_score, probs): if len(probs) == 0: probs = np.array([real_score]) real_frames = int(np.sum(probs >= 0.5)) fake_frames = int(len(probs) - real_frames) frame_confidences = np.maximum(probs, 1 - probs) * 100 winning_frames = max(real_frames, fake_frames) consistency = (winning_frames / len(probs)) * 100 score_gap = abs(real_score - fake_score) * 100 if confidence >= 85: certainty = "High" elif confidence >= 65: certainty = "Moderate" else: certainty = "Low" if result == "Uncertain": summary = "The detector did not find a large enough gap between fake and real video evidence." elif certainty == "Low": summary = "Frame-level predictions are close together, so the video result is uncertain." elif result == "Fake": summary = "More sampled evidence leaned toward manipulated or synthetic content." else: summary = "More sampled evidence leaned toward authentic content." return { "certainty": certainty, "summary": summary, "scores": { "fake": round(fake_score * 100, 2), "real": round(real_score * 100, 2), }, "frames": { "analyzed": len(probs), "fake_leaning": fake_frames, "real_leaning": real_frames, "min_confidence": round(float(np.min(frame_confidences)), 2), "max_confidence": round(float(np.max(frame_confidences)), 2), "avg_confidence": round(float(np.mean(frame_confidences)), 2), }, "metrics": { "confidence": round(confidence, 2), "score_gap": round(score_gap, 2), "uncertainty": round(100 - confidence, 2), "consistency": round(consistency, 2), "avg_frame_confidence": round(float(np.mean(frame_confidences)), 2), }, "risk_level": "High" if result == "Fake" and confidence >= 70 else "Medium" if result == "Fake" else "Low", } # ------------------------------- # Preprocessing (FIXED) # ------------------------------- transform = transforms.Compose([ transforms.Resize((224, 224)), # ✅ FIXED transforms.ToTensor(), transforms.Normalize( [0.485, 0.456, 0.406], [0.229, 0.224, 0.225] ) ]) # ------------------------------- # Video Prediction # ------------------------------- def predict_video(video_path): if VIDEO_DETECTOR_BACKEND == "huggingface": try: from hf_detectors import get_hf_video_detector result = get_hf_video_detector().predict( video_path, threshold=VIDEO_FAKE_THRESHOLD, uncertain_margin=VIDEO_UNCERTAIN_MARGIN, ) if "error" in result: return result probs = np.array([result["real_score"] / 100], dtype=float) result["insight"] = build_video_insight( result["result"], result["confidence"], result["fake_score"] / 100, result["real_score"] / 100, probs, ) return result except Exception as error: if not ALLOW_LOCAL_MODEL_FALLBACK: return {"error": f"Hugging Face video detector failed: {error}"} cap = cv2.VideoCapture(video_path) total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0) frames = [] max_frames = 8 frame_skip = max(1, total_frames // max_frames) if total_frames else 15 frame_count = 0 while True: ret, frame = cap.read() if not ret: break frame_count += 1 if frame_count % frame_skip != 0: continue frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) frames.append(transform(image)) if len(frames) >= max_frames: break cap.release() if not frames: return {"error": "No frames processed"} batch = torch.stack(frames) with torch.no_grad(): output = get_video_model()(batch) probs = torch.sigmoid(output).detach().cpu().numpy().reshape(-1) real_score = float(np.mean(probs)) fake_score = 1 - real_score if real_score >= fake_score: result = "Real" confidence = real_score else: result = "Fake" confidence = fake_score frame_scores = [] for index, prob in enumerate(probs, start=1): frame_real_score = float(prob) frame_fake_score = 1 - frame_real_score frame_result = "Real" if frame_real_score >= frame_fake_score else "Fake" frame_scores.append({ "frame": index, "result": frame_result, "confidence": round(max(frame_real_score, frame_fake_score) * 100, 2), "fake_score": round(frame_fake_score * 100, 2), "real_score": round(frame_real_score * 100, 2), }) return { "result": result, "confidence": round(confidence * 100, 2), "fake_score": round(fake_score * 100, 2), "real_score": round(real_score * 100, 2), "raw_probability": round(real_score, 6), "frames_analyzed": len(frames), "performance": [round(float(max(prob, 1 - prob)) * 100, 2) for prob in probs], "frame_scores": frame_scores, "insight": build_video_insight(result, confidence * 100, fake_score, real_score, probs), }