import torch from transformers import CLIPProcessor, CLIPModel from PIL import Image class VideoDeepfakeDetector: def __init__(self): print("⚡ Loading Video AI Model...") try: # Both model and processor now pull directly from Hugging Face! self.model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") print("✅ Video Model Ready.") except Exception as e: print(f"❌ Error: {e}") exit() def predict(self, image_path): try: image = Image.open(image_path) # --- VIDEO SPECIFIC LABELS --- # These work best for Vlogs, Motion Blur, and Lower Quality Frames labels = [ # --- REAL CATEGORY --- "raw authentic photo from a camera", "youtube vlog frame with text and emojis", "low quality phone camera footage", # --- FAKE CATEGORY --- "ai generated image from text prompt", "hyper-realistic cgi 3d render", "digital art style synthetic face", "deepfake face swap artifacts", "unnatural smooth skin texture" ] inputs = self.processor( text=labels, images=image, return_tensors="pt", padding=True ) with torch.no_grad(): outputs = self.model(**inputs) probs = outputs.logits_per_image.softmax(dim=1) scores = probs.tolist()[0] # Real = Indices 0, 1, 2 real_score = sum(scores[:3]) # Fake = Indices 3 to 7 fake_score = sum(scores[3:]) # Video Sensitivity: 0.51 (Stricter because video frames are messy) if fake_score > 0.51: return "DEEPFAKE DETECTED", fake_score else: return "REAL", real_score except Exception as e: print(f"Error predicting {image_path}: {e}") return "ERROR", 0.0