Spaces:
Sleeping
Sleeping
| import torch | |
| from torchvision import transforms | |
| from PIL import Image | |
| import cv2 | |
| import numpy as np | |
| from detector_config import ( | |
| ALLOW_LOCAL_MODEL_FALLBACK, | |
| VIDEO_DETECTOR_BACKEND, | |
| VIDEO_FAKE_THRESHOLD, | |
| VIDEO_UNCERTAIN_MARGIN, | |
| ) | |
| from model_loader import get_video_model | |
| def build_video_insight(result, confidence, fake_score, real_score, probs): | |
| if len(probs) == 0: | |
| probs = np.array([real_score]) | |
| real_frames = int(np.sum(probs >= 0.5)) | |
| fake_frames = int(len(probs) - real_frames) | |
| frame_confidences = np.maximum(probs, 1 - probs) * 100 | |
| winning_frames = max(real_frames, fake_frames) | |
| consistency = (winning_frames / len(probs)) * 100 | |
| score_gap = abs(real_score - fake_score) * 100 | |
| if confidence >= 85: | |
| certainty = "High" | |
| elif confidence >= 65: | |
| certainty = "Moderate" | |
| else: | |
| certainty = "Low" | |
| if result == "Uncertain": | |
| summary = "The detector did not find a large enough gap between fake and real video evidence." | |
| elif certainty == "Low": | |
| summary = "Frame-level predictions are close together, so the video result is uncertain." | |
| elif result == "Fake": | |
| summary = "More sampled evidence leaned toward manipulated or synthetic content." | |
| else: | |
| summary = "More sampled evidence leaned toward authentic content." | |
| return { | |
| "certainty": certainty, | |
| "summary": summary, | |
| "scores": { | |
| "fake": round(fake_score * 100, 2), | |
| "real": round(real_score * 100, 2), | |
| }, | |
| "frames": { | |
| "analyzed": len(probs), | |
| "fake_leaning": fake_frames, | |
| "real_leaning": real_frames, | |
| "min_confidence": round(float(np.min(frame_confidences)), 2), | |
| "max_confidence": round(float(np.max(frame_confidences)), 2), | |
| "avg_confidence": round(float(np.mean(frame_confidences)), 2), | |
| }, | |
| "metrics": { | |
| "confidence": round(confidence, 2), | |
| "score_gap": round(score_gap, 2), | |
| "uncertainty": round(100 - confidence, 2), | |
| "consistency": round(consistency, 2), | |
| "avg_frame_confidence": round(float(np.mean(frame_confidences)), 2), | |
| }, | |
| "risk_level": "High" if result == "Fake" and confidence >= 70 else "Medium" if result == "Fake" else "Low", | |
| } | |
| # ------------------------------- | |
| # Preprocessing (FIXED) | |
| # ------------------------------- | |
| transform = transforms.Compose([ | |
| transforms.Resize((224, 224)), # ✅ FIXED | |
| transforms.ToTensor(), | |
| transforms.Normalize( | |
| [0.485, 0.456, 0.406], | |
| [0.229, 0.224, 0.225] | |
| ) | |
| ]) | |
| # ------------------------------- | |
| # Video Prediction | |
| # ------------------------------- | |
| def predict_video(video_path): | |
| if VIDEO_DETECTOR_BACKEND == "huggingface": | |
| try: | |
| from hf_detectors import get_hf_video_detector | |
| result = get_hf_video_detector().predict( | |
| video_path, | |
| threshold=VIDEO_FAKE_THRESHOLD, | |
| uncertain_margin=VIDEO_UNCERTAIN_MARGIN, | |
| ) | |
| if "error" in result: | |
| return result | |
| probs = np.array([result["real_score"] / 100], dtype=float) | |
| result["insight"] = build_video_insight( | |
| result["result"], | |
| result["confidence"], | |
| result["fake_score"] / 100, | |
| result["real_score"] / 100, | |
| probs, | |
| ) | |
| return result | |
| except Exception as error: | |
| if not ALLOW_LOCAL_MODEL_FALLBACK: | |
| return {"error": f"Hugging Face video detector failed: {error}"} | |
| cap = cv2.VideoCapture(video_path) | |
| total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0) | |
| frames = [] | |
| max_frames = 8 | |
| frame_skip = max(1, total_frames // max_frames) if total_frames else 15 | |
| frame_count = 0 | |
| while True: | |
| ret, frame = cap.read() | |
| if not ret: | |
| break | |
| frame_count += 1 | |
| if frame_count % frame_skip != 0: | |
| continue | |
| frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| image = Image.fromarray(frame) | |
| frames.append(transform(image)) | |
| if len(frames) >= max_frames: | |
| break | |
| cap.release() | |
| if not frames: | |
| return {"error": "No frames processed"} | |
| batch = torch.stack(frames) | |
| with torch.no_grad(): | |
| output = get_video_model()(batch) | |
| probs = torch.sigmoid(output).detach().cpu().numpy().reshape(-1) | |
| real_score = float(np.mean(probs)) | |
| fake_score = 1 - real_score | |
| if real_score >= fake_score: | |
| result = "Real" | |
| confidence = real_score | |
| else: | |
| result = "Fake" | |
| confidence = fake_score | |
| frame_scores = [] | |
| for index, prob in enumerate(probs, start=1): | |
| frame_real_score = float(prob) | |
| frame_fake_score = 1 - frame_real_score | |
| frame_result = "Real" if frame_real_score >= frame_fake_score else "Fake" | |
| frame_scores.append({ | |
| "frame": index, | |
| "result": frame_result, | |
| "confidence": round(max(frame_real_score, frame_fake_score) * 100, 2), | |
| "fake_score": round(frame_fake_score * 100, 2), | |
| "real_score": round(frame_real_score * 100, 2), | |
| }) | |
| return { | |
| "result": result, | |
| "confidence": round(confidence * 100, 2), | |
| "fake_score": round(fake_score * 100, 2), | |
| "real_score": round(real_score * 100, 2), | |
| "raw_probability": round(real_score, 6), | |
| "frames_analyzed": len(frames), | |
| "performance": [round(float(max(prob, 1 - prob)) * 100, 2) for prob in probs], | |
| "frame_scores": frame_scores, | |
| "insight": build_video_insight(result, confidence * 100, fake_score, real_score, probs), | |
| } | |