Spaces:

vu0018
/

Deepfake_detection

Runtime error

App Files Files Community

vu0018 commited on 22 days ago

Commit

e8a6045

verified ·

1 Parent(s): 905a3b3

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -187

app.py CHANGED Viewed

@@ -1,193 +1,92 @@
-import numpy as np
 import torch
-from torch.utils.model_zoo import load_url
-import matplotlib.pyplot as plt
-from scipy.special import expit
-import os
-if not os.path.exists("deepfake-detection"):
-    os.system("git clone https://github.com/ai-cho/deepfake-detection.git")
-import sys
-sys.path.append('..')
-sys.path.append('deepfake-detection')
-from blazeface import FaceExtractor, BlazeFace, VideoReader
-from architectures import fornet,weights
-from isplutils import utils
-import cv2
-import time
-import ssl
-ssl._create_default_https_context = ssl._create_unverified_context
-import warnings
-warnings.filterwarnings('ignore')
-def fpv(video_path, device):
-  facedet = BlazeFace().to(device)
-  facedet.load_weights("deepfake-detection/blazeface/blazeface.pth")
-  facedet.load_anchors("deepfake-detection/blazeface/anchors.npy")
-  videoreader = VideoReader(verbose=False)
-  cap = cv2.VideoCapture(video_path)
-  frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-  fps = cap.get(cv2.CAP_PROP_FPS)
-  video_duration = int(frame_count / fps) # 초 단위
-  video_read_fn = lambda x: videoreader.read_frames(x, num_frames=video_duration)
-  face_extractor = FaceExtractor(video_read_fn=video_read_fn,facedet=facedet)
-  return face_extractor
-def soft_voting(model_list, vid_faces, transf, device):
-  faces_left = [] # 바운딩 박스의 x좌표 더 작음
-  faces_right= []
-  for frame in vid_faces:
-    if len(frame['faces']) == 1:
-      faces_left.append(frame['faces'][0])
-    elif len(frame['faces']) == 2:
-      if frame['detections'][0][0] < frame['detections'][1][0]:
-        faces_left.append(frame['faces'][0])
-        faces_right.append(frame['faces'][1])
-      else:
-        faces_left.append(frame['faces'][1])
-        faces_right.append(frame['faces'][0])
-  try:
-    faces_left_1 = torch.stack( [ transf(image=frame)['image'] for frame in faces_left if faces_left] )
-  except:
-    pass
-  try:
-    faces_right_1 = torch.stack( [ transf(image=frame)['image'] for frame in faces_right if faces_right] )
-  except:
-    pass
-  results = []
-  faces = []
-  with torch.no_grad():
-    try:
-      result_init = 0
-      result_total_1 = np.zeros_like(model_list[0](faces_left_1.to(device)).cpu().numpy().flatten())
-      for model in model_list:
-        faces_real_pred = model(faces_left_1.to(device)).cpu().numpy().flatten()
-        result_total_1 = np.add(result_total_1, faces_real_pred)
-        result = expit(faces_real_pred).mean()
-        result_init += result
-      results.append(result_init/len(model_list))
-      left_most_frame = np.where(result_total_1 == np.max(result_total_1))[0].item()
-      left_face = faces_left[left_most_frame]
-      faces.append(left_face)
-    except:
-      pass
-    try:
-      result_init = 0
-      result_total_2 = np.zeros_like(model_list[0](faces_right_1.to(device)).cpu().numpy().flatten())
-      for model in model_list:
-        faces_real_pred = model(faces_right_1.to(device)).cpu().numpy().flatten()
-        result_total_2 = np.add(result_total_2, faces_real_pred)
-        result = expit(faces_real_pred).mean()
-        result_init += result
-      results.append(result_init/len(model_list))
-      right_most_frame = np.where(result_total_2 == np.max(result_total_2))[0].item()
-      right_face = faces_right[right_most_frame]
-      faces.append(right_face)
-    except:
-      pass
-    return results, faces
-def main(file_path):
-  THRESHOLD = 0.5
-  net_model = 'EfficientNetB4'
-  train_db = 'DFDC'
-  device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
-  face_policy = 'scale'
-  face_size = 224
-  frames_per_video = 32
-  model_list = []
-  for net_model in ['EfficientNetB4', 'EfficientNetB4ST', 'EfficientNetAutoAttB4']:
-    for train_db in ['DFDC']:
-      model_url = weights.weight_url['{:s}_{:s}'.format(net_model,train_db)]
-      net = getattr(fornet,net_model)().eval().to(device)
-      net.load_state_dict(load_url(model_url,map_location=device,check_hash=True))
-      transf = utils.get_transformer(face_policy, face_size, net.get_normalizer(), train=False)
-      model_list.append(net)
-  faces = fpv(file_path, device).process_video(file_path)
-  deepfake_results, deepfake_faces = soft_voting(model_list, faces, transf, device)
-  if len(deepfake_faces) == 1:
-    deepfake_results = np.array(deepfake_results)
-    fake_prob = deepfake_results.item()
-    real_prob = 1-fake_prob
-    return real_prob, fake_prob, deepfake_faces
-  elif len(deepfake_faces) == 2:
-    deepfake_result1 = np.array(deepfake_results[0])
-    deepfake_result2 = np.array(deepfake_results[1])
-    result1_fake_prob = deepfake_result1.item()
-    result2_fake_prob = deepfake_result2.item()
-    return result1_fake_prob, result2_fake_prob, deepfake_faces # left, right
-def predict_deepfake(file_obj):
-    result = main(file_obj)
-    # Check the type of result to decide the output format
-    if len(result[2]) == 1:
-        real_prob, fake_prob, faces = result
-        return {"Real Probability": real_prob, "Fake Probability": fake_prob, "Person Face": faces[0]}
-    elif len(result[2]) == 2:
-        result1_fake, result2_fake, faces = result
-        return {
-            "Left Person Fake Probability": result1_fake,
-            "Right Person Fake Probability": result2_fake,
-            "Left Person Face": faces[0],
-            "Right Person Face": faces[1]
-        }
-# Gradio 포맷팅 함수
-def gradio_output(result):
-    if "Real Probability" in result:
-        return (
-            f"Real Probability: {result['Real Probability']}, "
-            f"Fake Probability: {result['Fake Probability']}",
-            result["Person Face"],
-            None,
-        )
-    elif "Left Person Fake Probability" in result:
-        return (
-            f"Left Fake Probability: {result['Left Person Fake Probability']}, "
-            f"Right Fake Probability: {result['Right Person Fake Probability']}",
-            result["Left Person Face"],
-            result["Right Person Face"],
-        )
-    else:  # 얼굴 없음 처리
-        return (
-            result["Message"],
-            None,  # Left Person Face
-            None,  # Right Person Face
-        )
-import gradio as gr
-# Gradio
-demo = gr.Interface(
-    fn=lambda video: gradio_output(predict_deepfake(video)),
-    inputs=gr.Video(label="Upload Video"),
-    outputs=[
-        gr.Label(label="Deepfake Detection Result"),
-        gr.Image(label="Left/Single Person Face"),
-        gr.Image(label="Right Person Face"),
-    ],
-    title="Deepfake Detection Demo",
-    description="Upload a video to detect if it is a deepfake or real. Supports cases with one or two faces, or no faces.",
-)
-if __name__ == "__main__":
-    demo.launch(share=True, debug=True)

+"""
+Hugging Face App: Face Detection in Video
+-----------------------------------------
+Uploads a video → detects faces → returns processed video.
+"""
+import gradio as gr
+import cv2
 import torch
+import numpy as np
+import tempfile
+from transformers import AutoProcessor, AutoModelForObjectDetection
+MODEL_ID = "avaabedi/deepface-detector"
+# Load model + processor (only once)
+processor = AutoProcessor.from_pretrained(MODEL_ID)
+model = AutoModelForObjectDetection.from_pretrained(MODEL_ID)
+model.eval()
+def detect_faces_in_frame(frame):
+    """Detect faces in a single frame using HF model."""
+    inputs = processor(images=frame, return_tensors="pt")
+    with torch.no_grad():
+        outputs = model(**inputs)
+    results = processor.post_process_object_detection(
+        outputs,
+        threshold=0.5
+    )[0]
+    return results["boxes"], results["scores"], results["labels"]
+def process_video(video_path):
+    """Reads video, detects faces frame-by-frame, draws boxes, writes output video."""
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        return "Error: cannot read video."
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    w  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    h  = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    # Output video file
+    temp_out = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
+    out_path = temp_out.name
+    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+    writer = cv2.VideoWriter(out_path, fourcc, fps, (w, h))
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        # Detect faces
+        boxes, scores, labels = detect_faces_in_frame(frame)
+        # Draw detections
+        for box, score in zip(boxes, scores):
+            x1, y1, x2, y2 = map(int, box.tolist())
+            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
+            cv2.putText(frame, f"{score:.2f}", (x1, y1 - 5),
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
+        writer.write(frame)
+    cap.release()
+    writer.release()
+    return out_path
+# ------------------------------------------------
+#               GRADIO UI
+# ------------------------------------------------
+with gr.Blocks() as demo:
+    gr.Markdown("# 🎥 Face Detection in Video (Hugging Face)")
+    video_input = gr.Video(label="Upload a video")  # no type=
+    process_btn = gr.Button("Detect Faces")
+    video_output = gr.Video(label="Output Video")
+    process_btn.click(fn=process_video,
+                      inputs=video_input,
+                      outputs=video_output)
+demo.launch(server_name="0.0.0.0", share=True)