vu0018 commited on
Commit
905a3b3
ยท
verified ยท
1 Parent(s): 89d568d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +184 -121
app.py CHANGED
@@ -1,130 +1,193 @@
1
- import os
2
- import io
3
- import cv2
4
  import numpy as np
5
  import torch
6
- import gradio as gr
7
- from transformers import (
8
- AutoFeatureExtractor,
9
- AutoModelForVideoClassification,
10
- )
11
- from huggingface_hub import hf_hub_download
12
-
13
-
14
- MODEL_ID = "Hemgg/deepfake-video-model-100"
15
- NUM_FRAMES = 16
16
- TARGET_SIZE = 224
17
-
18
- device = "cuda" if torch.cuda.is_available() else "cpu"
19
-
20
- MODEL = None
21
- FEATURE_EXTRACTOR = None
22
-
23
-
24
- def load_model_and_processor():
25
- global MODEL, FEATURE_EXTRACTOR
26
- if MODEL is None:
27
- FEATURE_EXTRACTOR = AutoFeatureExtractor.from_pretrained(MODEL_ID)
28
- MODEL = AutoModelForVideoClassification.from_pretrained(MODEL_ID).to(device)
29
- MODEL.eval()
30
-
31
-
32
- def extract_frames(video_path, num_frames=NUM_FRAMES):
33
- cap = cv2.VideoCapture(video_path)
34
- if not cap.isOpened():
35
- raise RuntimeError("Could not open video")
36
-
37
- frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
38
- if frame_count <= 0:
39
- raise RuntimeError("Video contains no frames")
40
-
41
- indices = np.linspace(0, frame_count - 1, num_frames).astype(int)
42
-
43
- frames = []
44
- idx = 0
45
- for i in range(frame_count):
46
- ret, frame = cap.read()
47
- if not ret:
48
- break
49
- if i == indices[idx]:
50
- frames.append(frame)
51
- idx += 1
52
- if idx >= len(indices):
53
- break
54
-
55
- cap.release()
56
-
57
- # If video too short, duplicate last frame
58
- while len(frames) < num_frames:
59
- frames.append(frames[-1])
60
-
61
- return frames
62
 
 
 
 
63
 
64
- def preprocess_frames(frames):
65
- output = []
66
- for frame in frames:
67
- # BGR โ†’ RGB
68
- img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
69
-
70
- # Resize and center crop
71
- h, w, _ = img.shape
72
- short = min(h, w)
73
- scale = TARGET_SIZE / short
74
- img = cv2.resize(img, (int(w * scale), int(h * scale)))
75
-
76
- h2, w2, _ = img.shape
77
- y = (h2 - TARGET_SIZE) // 2
78
- x = (w2 - TARGET_SIZE) // 2
79
- img = img[y:y+TARGET_SIZE, x:x+TARGET_SIZE]
80
-
81
- output.append(img)
82
-
83
- return np.stack(output)
84
-
85
-
86
- def predict_video(video_path):
87
- load_model_and_processor()
88
-
89
- frames = extract_frames(video_path)
90
- frames_np = preprocess_frames(frames)
91
-
92
- # Use Hugging Face feature extractor to normalize frames
93
- inputs = FEATURE_EXTRACTOR(list(frames_np), return_tensors="pt")
94
- inputs = {k: v.to(device) for k, v in inputs.items()}
95
-
96
- with torch.no_grad():
97
- outputs = MODEL(**inputs)
98
- probs = torch.softmax(outputs.logits, dim=-1)[0].cpu().numpy()
99
-
100
- # Map index โ†’ label
101
- id2label = MODEL.config.id2label
102
- scores = {id2label[i]: float(probs[i]) for i in range(len(probs))}
103
-
104
- top_idx = np.argmax(probs)
105
- return id2label[top_idx], float(probs[top_idx]), scores
106
-
107
-
108
- # -----------------------------
109
- # Gradio UI
110
- # -----------------------------
111
-
112
- with gr.Blocks() as demo:
113
- gr.Markdown("# Deepfake Video Detector")
114
- gr.Markdown("Upload a video and the model will classify it as real or fake.")
115
 
116
- video_input = gr.Video(label="Upload video", type="filepath")
117
- btn = gr.Button("Analyze")
118
 
119
- out_label = gr.Text(label="Prediction")
120
- out_score = gr.Number(label="Confidence")
121
- out_json = gr.JSON(label="All class probabilities")
122
 
123
- btn.click(
124
- fn=predict_video,
125
- inputs=video_input,
126
- outputs=[out_label, out_score, out_json]
127
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
  if __name__ == "__main__":
130
- demo.launch(server_name="0.0.0.0", share=True)
 
 
 
 
1
  import numpy as np
2
  import torch
3
+ from torch.utils.model_zoo import load_url
4
+ import matplotlib.pyplot as plt
5
+ from scipy.special import expit
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ import os
8
+ if not os.path.exists("deepfake-detection"):
9
+ os.system("git clone https://github.com/ai-cho/deepfake-detection.git")
10
 
11
+ import sys
12
+ sys.path.append('..')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ sys.path.append('deepfake-detection')
 
15
 
16
+ from blazeface import FaceExtractor, BlazeFace, VideoReader
17
+ from architectures import fornet,weights
18
+ from isplutils import utils
19
 
20
+ import cv2
21
+ import time
22
+
23
+ import ssl
24
+ ssl._create_default_https_context = ssl._create_unverified_context
25
+
26
+ import warnings
27
+ warnings.filterwarnings('ignore')
28
+ def fpv(video_path, device):
29
+ facedet = BlazeFace().to(device)
30
+ facedet.load_weights("deepfake-detection/blazeface/blazeface.pth")
31
+ facedet.load_anchors("deepfake-detection/blazeface/anchors.npy")
32
+ videoreader = VideoReader(verbose=False)
33
+ cap = cv2.VideoCapture(video_path)
34
+
35
+ frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
36
+ fps = cap.get(cv2.CAP_PROP_FPS)
37
+ video_duration = int(frame_count / fps) # ์ดˆ ๋‹จ์œ„
38
+
39
+ video_read_fn = lambda x: videoreader.read_frames(x, num_frames=video_duration)
40
+ face_extractor = FaceExtractor(video_read_fn=video_read_fn,facedet=facedet)
41
+ return face_extractor
42
+
43
+ def soft_voting(model_list, vid_faces, transf, device):
44
+ faces_left = [] # ๋ฐ”์šด๋”ฉ ๋ฐ•์Šค์˜ x์ขŒํ‘œ ๋” ์ž‘์Œ
45
+ faces_right= []
46
+
47
+ for frame in vid_faces:
48
+ if len(frame['faces']) == 1:
49
+ faces_left.append(frame['faces'][0])
50
+
51
+ elif len(frame['faces']) == 2:
52
+ if frame['detections'][0][0] < frame['detections'][1][0]:
53
+ faces_left.append(frame['faces'][0])
54
+ faces_right.append(frame['faces'][1])
55
+
56
+ else:
57
+ faces_left.append(frame['faces'][1])
58
+ faces_right.append(frame['faces'][0])
59
+
60
+ try:
61
+ faces_left_1 = torch.stack( [ transf(image=frame)['image'] for frame in faces_left if faces_left] )
62
+ except:
63
+ pass
64
+
65
+ try:
66
+ faces_right_1 = torch.stack( [ transf(image=frame)['image'] for frame in faces_right if faces_right] )
67
+ except:
68
+ pass
69
+
70
+ results = []
71
+ faces = []
72
+
73
+ with torch.no_grad():
74
+ try:
75
+ result_init = 0
76
+ result_total_1 = np.zeros_like(model_list[0](faces_left_1.to(device)).cpu().numpy().flatten())
77
+
78
+ for model in model_list:
79
+ faces_real_pred = model(faces_left_1.to(device)).cpu().numpy().flatten()
80
+ result_total_1 = np.add(result_total_1, faces_real_pred)
81
+ result = expit(faces_real_pred).mean()
82
+ result_init += result
83
+ results.append(result_init/len(model_list))
84
+ left_most_frame = np.where(result_total_1 == np.max(result_total_1))[0].item()
85
+ left_face = faces_left[left_most_frame]
86
+ faces.append(left_face)
87
+ except:
88
+ pass
89
+
90
+ try:
91
+ result_init = 0
92
+ result_total_2 = np.zeros_like(model_list[0](faces_right_1.to(device)).cpu().numpy().flatten())
93
+ for model in model_list:
94
+ faces_real_pred = model(faces_right_1.to(device)).cpu().numpy().flatten()
95
+ result_total_2 = np.add(result_total_2, faces_real_pred)
96
+ result = expit(faces_real_pred).mean()
97
+ result_init += result
98
+ results.append(result_init/len(model_list))
99
+ right_most_frame = np.where(result_total_2 == np.max(result_total_2))[0].item()
100
+ right_face = faces_right[right_most_frame]
101
+ faces.append(right_face)
102
+ except:
103
+ pass
104
+ return results, faces
105
+
106
+ def main(file_path):
107
+ THRESHOLD = 0.5
108
+ net_model = 'EfficientNetB4'
109
+ train_db = 'DFDC'
110
+ device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
111
+ face_policy = 'scale'
112
+ face_size = 224
113
+ frames_per_video = 32
114
+ model_list = []
115
+ for net_model in ['EfficientNetB4', 'EfficientNetB4ST', 'EfficientNetAutoAttB4']:
116
+ for train_db in ['DFDC']:
117
+ model_url = weights.weight_url['{:s}_{:s}'.format(net_model,train_db)]
118
+ net = getattr(fornet,net_model)().eval().to(device)
119
+ net.load_state_dict(load_url(model_url,map_location=device,check_hash=True))
120
+ transf = utils.get_transformer(face_policy, face_size, net.get_normalizer(), train=False)
121
+ model_list.append(net)
122
+
123
+ faces = fpv(file_path, device).process_video(file_path)
124
+ deepfake_results, deepfake_faces = soft_voting(model_list, faces, transf, device)
125
+ if len(deepfake_faces) == 1:
126
+ deepfake_results = np.array(deepfake_results)
127
+ fake_prob = deepfake_results.item()
128
+ real_prob = 1-fake_prob
129
+ return real_prob, fake_prob, deepfake_faces
130
+
131
+
132
+ elif len(deepfake_faces) == 2:
133
+ deepfake_result1 = np.array(deepfake_results[0])
134
+ deepfake_result2 = np.array(deepfake_results[1])
135
+
136
+ result1_fake_prob = deepfake_result1.item()
137
+ result2_fake_prob = deepfake_result2.item()
138
+ return result1_fake_prob, result2_fake_prob, deepfake_faces # left, right
139
+
140
+ def predict_deepfake(file_obj):
141
+ result = main(file_obj)
142
+
143
+ # Check the type of result to decide the output format
144
+ if len(result[2]) == 1:
145
+ real_prob, fake_prob, faces = result
146
+ return {"Real Probability": real_prob, "Fake Probability": fake_prob, "Person Face": faces[0]}
147
+ elif len(result[2]) == 2:
148
+ result1_fake, result2_fake, faces = result
149
+ return {
150
+ "Left Person Fake Probability": result1_fake,
151
+ "Right Person Fake Probability": result2_fake,
152
+ "Left Person Face": faces[0],
153
+ "Right Person Face": faces[1]
154
+ }
155
+
156
+ # Gradio ํฌ๋งทํŒ… ํ•จ์ˆ˜
157
+ def gradio_output(result):
158
+ if "Real Probability" in result:
159
+ return (
160
+ f"Real Probability: {result['Real Probability']}, "
161
+ f"Fake Probability: {result['Fake Probability']}",
162
+ result["Person Face"],
163
+ None,
164
+ )
165
+ elif "Left Person Fake Probability" in result:
166
+ return (
167
+ f"Left Fake Probability: {result['Left Person Fake Probability']}, "
168
+ f"Right Fake Probability: {result['Right Person Fake Probability']}",
169
+ result["Left Person Face"],
170
+ result["Right Person Face"],
171
+ )
172
+ else: # ์–ผ๊ตด ์—†์Œ ์ฒ˜๋ฆฌ
173
+ return (
174
+ result["Message"],
175
+ None, # Left Person Face
176
+ None, # Right Person Face
177
+ )
178
+ import gradio as gr
179
+ # Gradio
180
+ demo = gr.Interface(
181
+ fn=lambda video: gradio_output(predict_deepfake(video)),
182
+ inputs=gr.Video(label="Upload Video"),
183
+ outputs=[
184
+ gr.Label(label="Deepfake Detection Result"),
185
+ gr.Image(label="Left/Single Person Face"),
186
+ gr.Image(label="Right Person Face"),
187
+ ],
188
+ title="Deepfake Detection Demo",
189
+ description="Upload a video to detect if it is a deepfake or real. Supports cases with one or two faces, or no faces.",
190
+ )
191
 
192
  if __name__ == "__main__":
193
+ demo.launch(share=True, debug=True)