import torch
from transformers import CLIPProcessor, CLIPModel
from PIL import Image

class VideoDeepfakeDetector:
    def __init__(self):
        print("⚡ Loading Video AI Model...")
        try:
            # Both model and processor now pull directly from Hugging Face!
            self.model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
            self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
            print("✅ Video Model Ready.")
        except Exception as e:
            print(f"❌ Error: {e}")
            exit()

    def predict(self, image_path):
        try:
            image = Image.open(image_path)
            
            # --- VIDEO SPECIFIC LABELS ---
            # These work best for Vlogs, Motion Blur, and Lower Quality Frames
            labels = [
                # --- REAL CATEGORY ---
                "raw authentic photo from a camera",         
                "youtube vlog frame with text and emojis",   
                "low quality phone camera footage",          

                # --- FAKE CATEGORY ---
                "ai generated image from text prompt",       
                "hyper-realistic cgi 3d render",             
                "digital art style synthetic face",          
                "deepfake face swap artifacts",              
                "unnatural smooth skin texture"              
            ]
            
            inputs = self.processor(
                text=labels, 
                images=image, 
                return_tensors="pt", 
                padding=True
            )

            with torch.no_grad():
                outputs = self.model(**inputs)
            
            probs = outputs.logits_per_image.softmax(dim=1)
            scores = probs.tolist()[0]
            
            # Real = Indices 0, 1, 2
            real_score = sum(scores[:3])
            
            # Fake = Indices 3 to 7
            fake_score = sum(scores[3:])
            
            # Video Sensitivity: 0.51 (Stricter because video frames are messy)
            if fake_score > 0.51:
                return "DEEPFAKE DETECTED", fake_score
            else:
                return "REAL", real_score

        except Exception as e:
            print(f"Error predicting {image_path}: {e}")
            return "ERROR", 0.0