vu0018 commited on
Commit
e8a6045
Β·
verified Β·
1 Parent(s): 905a3b3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -187
app.py CHANGED
@@ -1,193 +1,92 @@
1
- import numpy as np
 
 
 
 
 
 
 
2
  import torch
3
- from torch.utils.model_zoo import load_url
4
- import matplotlib.pyplot as plt
5
- from scipy.special import expit
6
 
7
- import os
8
- if not os.path.exists("deepfake-detection"):
9
- os.system("git clone https://github.com/ai-cho/deepfake-detection.git")
10
 
11
- import sys
12
- sys.path.append('..')
13
 
14
- sys.path.append('deepfake-detection')
 
 
 
15
 
16
- from blazeface import FaceExtractor, BlazeFace, VideoReader
17
- from architectures import fornet,weights
18
- from isplutils import utils
19
 
20
- import cv2
21
- import time
22
-
23
- import ssl
24
- ssl._create_default_https_context = ssl._create_unverified_context
25
-
26
- import warnings
27
- warnings.filterwarnings('ignore')
28
- def fpv(video_path, device):
29
- facedet = BlazeFace().to(device)
30
- facedet.load_weights("deepfake-detection/blazeface/blazeface.pth")
31
- facedet.load_anchors("deepfake-detection/blazeface/anchors.npy")
32
- videoreader = VideoReader(verbose=False)
33
- cap = cv2.VideoCapture(video_path)
34
-
35
- frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
36
- fps = cap.get(cv2.CAP_PROP_FPS)
37
- video_duration = int(frame_count / fps) # 초 λ‹¨μœ„
38
-
39
- video_read_fn = lambda x: videoreader.read_frames(x, num_frames=video_duration)
40
- face_extractor = FaceExtractor(video_read_fn=video_read_fn,facedet=facedet)
41
- return face_extractor
42
-
43
- def soft_voting(model_list, vid_faces, transf, device):
44
- faces_left = [] # λ°”μš΄λ”© λ°•μŠ€μ˜ xμ’Œν‘œ 더 μž‘μŒ
45
- faces_right= []
46
-
47
- for frame in vid_faces:
48
- if len(frame['faces']) == 1:
49
- faces_left.append(frame['faces'][0])
50
-
51
- elif len(frame['faces']) == 2:
52
- if frame['detections'][0][0] < frame['detections'][1][0]:
53
- faces_left.append(frame['faces'][0])
54
- faces_right.append(frame['faces'][1])
55
-
56
- else:
57
- faces_left.append(frame['faces'][1])
58
- faces_right.append(frame['faces'][0])
59
-
60
- try:
61
- faces_left_1 = torch.stack( [ transf(image=frame)['image'] for frame in faces_left if faces_left] )
62
- except:
63
- pass
64
-
65
- try:
66
- faces_right_1 = torch.stack( [ transf(image=frame)['image'] for frame in faces_right if faces_right] )
67
- except:
68
- pass
69
-
70
- results = []
71
- faces = []
72
-
73
- with torch.no_grad():
74
- try:
75
- result_init = 0
76
- result_total_1 = np.zeros_like(model_list[0](faces_left_1.to(device)).cpu().numpy().flatten())
77
-
78
- for model in model_list:
79
- faces_real_pred = model(faces_left_1.to(device)).cpu().numpy().flatten()
80
- result_total_1 = np.add(result_total_1, faces_real_pred)
81
- result = expit(faces_real_pred).mean()
82
- result_init += result
83
- results.append(result_init/len(model_list))
84
- left_most_frame = np.where(result_total_1 == np.max(result_total_1))[0].item()
85
- left_face = faces_left[left_most_frame]
86
- faces.append(left_face)
87
- except:
88
- pass
89
-
90
- try:
91
- result_init = 0
92
- result_total_2 = np.zeros_like(model_list[0](faces_right_1.to(device)).cpu().numpy().flatten())
93
- for model in model_list:
94
- faces_real_pred = model(faces_right_1.to(device)).cpu().numpy().flatten()
95
- result_total_2 = np.add(result_total_2, faces_real_pred)
96
- result = expit(faces_real_pred).mean()
97
- result_init += result
98
- results.append(result_init/len(model_list))
99
- right_most_frame = np.where(result_total_2 == np.max(result_total_2))[0].item()
100
- right_face = faces_right[right_most_frame]
101
- faces.append(right_face)
102
- except:
103
- pass
104
- return results, faces
105
-
106
- def main(file_path):
107
- THRESHOLD = 0.5
108
- net_model = 'EfficientNetB4'
109
- train_db = 'DFDC'
110
- device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
111
- face_policy = 'scale'
112
- face_size = 224
113
- frames_per_video = 32
114
- model_list = []
115
- for net_model in ['EfficientNetB4', 'EfficientNetB4ST', 'EfficientNetAutoAttB4']:
116
- for train_db in ['DFDC']:
117
- model_url = weights.weight_url['{:s}_{:s}'.format(net_model,train_db)]
118
- net = getattr(fornet,net_model)().eval().to(device)
119
- net.load_state_dict(load_url(model_url,map_location=device,check_hash=True))
120
- transf = utils.get_transformer(face_policy, face_size, net.get_normalizer(), train=False)
121
- model_list.append(net)
122
-
123
- faces = fpv(file_path, device).process_video(file_path)
124
- deepfake_results, deepfake_faces = soft_voting(model_list, faces, transf, device)
125
- if len(deepfake_faces) == 1:
126
- deepfake_results = np.array(deepfake_results)
127
- fake_prob = deepfake_results.item()
128
- real_prob = 1-fake_prob
129
- return real_prob, fake_prob, deepfake_faces
130
-
131
-
132
- elif len(deepfake_faces) == 2:
133
- deepfake_result1 = np.array(deepfake_results[0])
134
- deepfake_result2 = np.array(deepfake_results[1])
135
-
136
- result1_fake_prob = deepfake_result1.item()
137
- result2_fake_prob = deepfake_result2.item()
138
- return result1_fake_prob, result2_fake_prob, deepfake_faces # left, right
139
-
140
- def predict_deepfake(file_obj):
141
- result = main(file_obj)
142
-
143
- # Check the type of result to decide the output format
144
- if len(result[2]) == 1:
145
- real_prob, fake_prob, faces = result
146
- return {"Real Probability": real_prob, "Fake Probability": fake_prob, "Person Face": faces[0]}
147
- elif len(result[2]) == 2:
148
- result1_fake, result2_fake, faces = result
149
- return {
150
- "Left Person Fake Probability": result1_fake,
151
- "Right Person Fake Probability": result2_fake,
152
- "Left Person Face": faces[0],
153
- "Right Person Face": faces[1]
154
- }
155
-
156
- # Gradio ν¬λ§·νŒ… ν•¨μˆ˜
157
- def gradio_output(result):
158
- if "Real Probability" in result:
159
- return (
160
- f"Real Probability: {result['Real Probability']}, "
161
- f"Fake Probability: {result['Fake Probability']}",
162
- result["Person Face"],
163
- None,
164
- )
165
- elif "Left Person Fake Probability" in result:
166
- return (
167
- f"Left Fake Probability: {result['Left Person Fake Probability']}, "
168
- f"Right Fake Probability: {result['Right Person Fake Probability']}",
169
- result["Left Person Face"],
170
- result["Right Person Face"],
171
- )
172
- else: # μ–Όκ΅΄ μ—†μŒ 처리
173
- return (
174
- result["Message"],
175
- None, # Left Person Face
176
- None, # Right Person Face
177
- )
178
- import gradio as gr
179
- # Gradio
180
- demo = gr.Interface(
181
- fn=lambda video: gradio_output(predict_deepfake(video)),
182
- inputs=gr.Video(label="Upload Video"),
183
- outputs=[
184
- gr.Label(label="Deepfake Detection Result"),
185
- gr.Image(label="Left/Single Person Face"),
186
- gr.Image(label="Right Person Face"),
187
- ],
188
- title="Deepfake Detection Demo",
189
- description="Upload a video to detect if it is a deepfake or real. Supports cases with one or two faces, or no faces.",
190
- )
191
-
192
- if __name__ == "__main__":
193
- demo.launch(share=True, debug=True)
 
1
+ """
2
+ Hugging Face App: Face Detection in Video
3
+ -----------------------------------------
4
+ Uploads a video β†’ detects faces β†’ returns processed video.
5
+ """
6
+
7
+ import gradio as gr
8
+ import cv2
9
  import torch
10
+ import numpy as np
11
+ import tempfile
12
+ from transformers import AutoProcessor, AutoModelForObjectDetection
13
 
 
 
 
14
 
15
+ MODEL_ID = "avaabedi/deepface-detector"
 
16
 
17
+ # Load model + processor (only once)
18
+ processor = AutoProcessor.from_pretrained(MODEL_ID)
19
+ model = AutoModelForObjectDetection.from_pretrained(MODEL_ID)
20
+ model.eval()
21
 
 
 
 
22
 
23
+ def detect_faces_in_frame(frame):
24
+ """Detect faces in a single frame using HF model."""
25
+ inputs = processor(images=frame, return_tensors="pt")
26
+ with torch.no_grad():
27
+ outputs = model(**inputs)
28
+
29
+ results = processor.post_process_object_detection(
30
+ outputs,
31
+ threshold=0.5
32
+ )[0]
33
+
34
+ return results["boxes"], results["scores"], results["labels"]
35
+
36
+
37
+ def process_video(video_path):
38
+ """Reads video, detects faces frame-by-frame, draws boxes, writes output video."""
39
+ cap = cv2.VideoCapture(video_path)
40
+ if not cap.isOpened():
41
+ return "Error: cannot read video."
42
+
43
+ fps = cap.get(cv2.CAP_PROP_FPS)
44
+ w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
45
+ h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
46
+
47
+ # Output video file
48
+ temp_out = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
49
+ out_path = temp_out.name
50
+
51
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
52
+ writer = cv2.VideoWriter(out_path, fourcc, fps, (w, h))
53
+
54
+ while True:
55
+ ret, frame = cap.read()
56
+ if not ret:
57
+ break
58
+
59
+ # Detect faces
60
+ boxes, scores, labels = detect_faces_in_frame(frame)
61
+
62
+ # Draw detections
63
+ for box, score in zip(boxes, scores):
64
+ x1, y1, x2, y2 = map(int, box.tolist())
65
+ cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
66
+ cv2.putText(frame, f"{score:.2f}", (x1, y1 - 5),
67
+ cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
68
+
69
+ writer.write(frame)
70
+
71
+ cap.release()
72
+ writer.release()
73
+
74
+ return out_path
75
+
76
+
77
+ # ------------------------------------------------
78
+ # GRADIO UI
79
+ # ------------------------------------------------
80
+ with gr.Blocks() as demo:
81
+ gr.Markdown("# πŸŽ₯ Face Detection in Video (Hugging Face)")
82
+
83
+ video_input = gr.Video(label="Upload a video") # no type=
84
+ process_btn = gr.Button("Detect Faces")
85
+
86
+ video_output = gr.Video(label="Output Video")
87
+
88
+ process_btn.click(fn=process_video,
89
+ inputs=video_input,
90
+ outputs=video_output)
91
+
92
+ demo.launch(server_name="0.0.0.0", share=True)