import gradio as gr
import torch
import cv2
import numpy as np
from model import GenConViT

device = "cuda" if torch.cuda.is_available() else "cpu"

# Load model
model = GenConViT().to(device)
state = torch.load("genconvit_ed_inference.pth", map_location=device)
model.load_state_dict(state)
model.eval()

def preprocess(frame):
    frame = cv2.resize(frame, (224, 224))
    frame = frame[:, :, ::-1] / 255.0
    frame = torch.tensor(frame, dtype=torch.float32).permute(2, 0, 1)
    return frame.unsqueeze(0)

def predict(video):
    cap = cv2.VideoCapture(video)
    scores = []

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        inp = preprocess(frame).to(device)
        with torch.no_grad():
            pred = model(inp)
            prob = torch.softmax(pred, dim=1)[0, 1].item()
            scores.append(prob)

    cap.release()

    if len(scores) == 0:
        return "No frames detected."

    deepfake_prob = float(np.mean(scores))
    label = "Deepfake" if deepfake_prob > 0.5 else "Real"

    return f"{label} (score: {deepfake_prob:.4f})"

# UI
demo = gr.Interface(
    fn=predict,
    inputs=gr.Video(),
    outputs="text",
    title="GenConViT Deepfake Detector",
)

demo.launch()