File size: 3,935 Bytes
94adcf4
 
 
 
48fb957
50816f6
94adcf4
 
3be3d58
94adcf4
 
 
 
48fb957
 
 
 
50816f6
e1a3b70
50816f6
e1a3b70
50816f6
 
 
e1a3b70
1d764ab
94adcf4
b5f6950
1d764ab
 
ea99508
 
 
1d764ab
a8ba371
6449f64
 
a195a24
1d764ab
 
94adcf4
 
 
1d764ab
94adcf4
1d764ab
a62e9f8
94adcf4
ea99508
1d764ab
94adcf4
 
 
a195a24
ea99508
 
 
a195a24
ea99508
1d764ab
48fb957
a195a24
 
1d764ab
 
 
50816f6
 
a195a24
ea99508
50816f6
ea99508
 
 
ba73931
 
50816f6
ba73931
50816f6
 
 
 
 
 
1d764ab
ea99508
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94adcf4
a62e9f8
94adcf4
ea99508
94adcf4
 
 
a62e9f8
ea99508
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import cv2
from PIL import Image
from transformers import AutoImageProcessor, AutoModelForImageClassification
import torch
import gradio as gr
from spaces import GPU

# Load model and processor
model_name = "Smogy/SMOGY-Ai-images-detector"
processor = AutoImageProcessor.from_pretrained(model_name)
model = AutoModelForImageClassification.from_pretrained(model_name)
model.eval()

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Get label mappings
id2label = model.config.id2label
label2id = {v.lower(): k for k, v in id2label.items()}

# Try to identify AI and Human class indexes
ai_class_index = label2id.get("artificial") or label2id.get("ai") or 0
human_class_index = label2id.get("human") or label2id.get("real") or 1

@GPU
def analyze_video(video):
    cap = cv2.VideoCapture(video)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    if total_frames == 0:
        return {
            "error": "❌ Could not read video frames."
        }

    num_frames_to_process = min(200, total_frames)
    frame_indices = [int(i * total_frames / num_frames_to_process) for i in range(num_frames_to_process)]

    frames_to_process = []
    current_index = 0
    frame_num = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret or current_index >= len(frame_indices):
            break
        if frame_num == frame_indices[current_index]:
            rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            pil_image = Image.fromarray(rgb)
            frames_to_process.append(pil_image)
            current_index += 1
        frame_num += 1

    cap.release()
    if not frames_to_process:
        return {
            "error": "❌ No frames extracted."
        }

    inputs = processor(images=frames_to_process, return_tensors="pt")
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.nn.functional.softmax(outputs.logits, dim=1)
        predictions = torch.argmax(probs, dim=1).tolist()

    human_confidences = probs[:, human_class_index].tolist()
    ai_confidences = [1 - h for h in human_confidences]

    total = len(ai_confidences)
    ai_frame_count = sum(1 for score in ai_confidences if score >= 0.5)
    ai_frame_ratio = ai_frame_count / total
    avg_score = sum(ai_confidences) / total

    if ai_frame_ratio < 0.25:
        verdict = "βœ… Very likely real β€” no strong signs of AI generation."
    elif ai_frame_ratio < 0.5:
        verdict = "⚠️ Mostly real β€” a few signs could raise suspicion."
    elif avg_score >= 0.85:
        verdict = "🚨 Definitely AI-generated β€” high confidence across many frames."
    elif avg_score >= 0.75:
        verdict = "⚠️ Likely AI-generated β€” multiple signs suggest manipulation."
    else:
        verdict = "πŸ€” Unclear β€” the video shows mixed signals."

    return {
        "ai_frame_count": ai_frame_count,
        "total_frames": total,
        "ai_frame_ratio": round(ai_frame_ratio, 3),
        "average_ai_confidence": round(avg_score, 3),
        "verdict": verdict
    }

# Optional: Pretty UI for humans
def analyze_video_ui(video):
    summary = analyze_video(video)
    if "error" in summary:
        return summary["error"]
    return (
        f"Frames flagged as AI-generated: {summary['ai_frame_count']} "
        f"out of {summary['total_frames']} "
        f"({summary['ai_frame_ratio'] * 100:.1f}%)\n"
        f"Average AI Confidence: {summary['average_ai_confidence'] * 100:.1f}%\n"
        f"Final Verdict: {summary['verdict']}"
    )

# Gradio interface
gr.Interface(
    fn=analyze_video_ui,
    inputs=gr.Video(label="Upload a video"),
    outputs=gr.Textbox(label="Detection Results"),
    title="AI Frame Detector",
    description="Analyzes sampled frames from a video to detect whether it's likely AI-generated or real."
).launch()