File size: 2,350 Bytes
664cdae
dbe3f97
 
 
 
 
fa3b1cb
dbe3f97
 
 
 
 
fa3b1cb
 
dbe3f97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import gradio as gr
import torch
import cv2
from transformers import AutoModelForImageClassification, AutoImageProcessor
from PIL import Image
import numpy as np

# ----------------------------------------------------------
# Load Hugging Face GenConViT Model
# ----------------------------------------------------------
model = AutoModelForImageClassification.from_pretrained(
    "Thanuja2109/GenConViT"
)

processor = AutoImageProcessor.from_pretrained(
    "Thanuja2109/GenConViT"
)

model.eval()

# ----------------------------------------------------------
# Deepfake detection function
# ----------------------------------------------------------
def detect_deepfake(video):

    # Load video
    cap = cv2.VideoCapture(video)

    if not cap.isOpened():
        return "Error: cannot open video", None

    scores = []
    frames_collected = 0

    # Sample 1 frame every 10
    frame_interval = 10

    frame_img = None

    i = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if i % frame_interval == 0:
            # Convert to RGB
            rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            pil_img = Image.fromarray(rgb)

            inputs = processor(images=pil_img, return_tensors="pt")

            with torch.no_grad():
                logits = model(**inputs).logits
                prob_fake = torch.softmax(logits, dim=1)[0][1].item()

            scores.append(prob_fake)
            frame_img = pil_img  # save last sampled frame

        i += 1

    cap.release()

    if len(scores) == 0:
        return "No frames processed", None

    avg_score = np.mean(scores)

    label = "🔴 Deepfake" if avg_score > 0.5 else "🟢 Real"

    result_text = f"""
### Prediction: **{label}**
**Confidence (fake probability): {avg_score:.4f}**
"""

    return result_text, frame_img


# ----------------------------------------------------------
# Gradio Interface
# ----------------------------------------------------------
app = gr.Interface(
    fn=detect_deepfake,
    inputs=gr.Video(label="Upload a video"),
    outputs=[
        gr.Markdown(label="Prediction"),
        gr.Image(label="Analyzed Frame")
    ],
    title="GenConViT Deepfake Video Detector",
    description="Upload a video. The app samples frames and uses GenConViT to detect deepfakes."
)

app.launch()