Spaces:

vu0018
/

Deepfake_detection

Runtime error

App Files Files Community

vu0018 commited on Nov 30, 2025

Commit

905a3b3

verified ·

1 Parent(s): 89d568d

Update app.py

Browse files

Files changed (1) hide show

app.py +184 -121

app.py CHANGED Viewed

@@ -1,130 +1,193 @@
-import os
-import io
-import cv2
 import numpy as np
 import torch
-import gradio as gr
-from transformers import (
-    AutoFeatureExtractor,
-    AutoModelForVideoClassification,
-)
-from huggingface_hub import hf_hub_download
-MODEL_ID = "Hemgg/deepfake-video-model-100"
-NUM_FRAMES = 16
-TARGET_SIZE = 224
-device = "cuda" if torch.cuda.is_available() else "cpu"
-MODEL = None
-FEATURE_EXTRACTOR = None
-def load_model_and_processor():
-    global MODEL, FEATURE_EXTRACTOR
-    if MODEL is None:
-        FEATURE_EXTRACTOR = AutoFeatureExtractor.from_pretrained(MODEL_ID)
-        MODEL = AutoModelForVideoClassification.from_pretrained(MODEL_ID).to(device)
-        MODEL.eval()
-def extract_frames(video_path, num_frames=NUM_FRAMES):
-    cap = cv2.VideoCapture(video_path)
-    if not cap.isOpened():
-        raise RuntimeError("Could not open video")
-    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    if frame_count <= 0:
-        raise RuntimeError("Video contains no frames")
-    indices = np.linspace(0, frame_count - 1, num_frames).astype(int)
-    frames = []
-    idx = 0
-    for i in range(frame_count):
-        ret, frame = cap.read()
-        if not ret:
-            break
-        if i == indices[idx]:
-            frames.append(frame)
-            idx += 1
-            if idx >= len(indices):
-                break
-    cap.release()
-    # If video too short, duplicate last frame
-    while len(frames) < num_frames:
-        frames.append(frames[-1])
-    return frames
-def preprocess_frames(frames):
-    output = []
-    for frame in frames:
-        # BGR → RGB
-        img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-        # Resize and center crop
-        h, w, _ = img.shape
-        short = min(h, w)
-        scale = TARGET_SIZE / short
-        img = cv2.resize(img, (int(w * scale), int(h * scale)))
-        h2, w2, _ = img.shape
-        y = (h2 - TARGET_SIZE) // 2
-        x = (w2 - TARGET_SIZE) // 2
-        img = img[y:y+TARGET_SIZE, x:x+TARGET_SIZE]
-        output.append(img)
-    return np.stack(output)
-def predict_video(video_path):
-    load_model_and_processor()
-    frames = extract_frames(video_path)
-    frames_np = preprocess_frames(frames)
-    # Use Hugging Face feature extractor to normalize frames
-    inputs = FEATURE_EXTRACTOR(list(frames_np), return_tensors="pt")
-    inputs = {k: v.to(device) for k, v in inputs.items()}
-    with torch.no_grad():
-        outputs = MODEL(**inputs)
-        probs = torch.softmax(outputs.logits, dim=-1)[0].cpu().numpy()
-    # Map index → label
-    id2label = MODEL.config.id2label
-    scores = {id2label[i]: float(probs[i]) for i in range(len(probs))}
-    top_idx = np.argmax(probs)
-    return id2label[top_idx], float(probs[top_idx]), scores
-# -----------------------------
-# Gradio UI
-# -----------------------------
-with gr.Blocks() as demo:
-    gr.Markdown("# Deepfake Video Detector")
-    gr.Markdown("Upload a video and the model will classify it as real or fake.")
-    video_input = gr.Video(label="Upload video", type="filepath")
-    btn = gr.Button("Analyze")
-    out_label = gr.Text(label="Prediction")
-    out_score = gr.Number(label="Confidence")
-    out_json = gr.JSON(label="All class probabilities")
-    btn.click(
-        fn=predict_video,
-        inputs=video_input,
-        outputs=[out_label, out_score, out_json]
-    )
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", share=True)

 import numpy as np
 import torch
+from torch.utils.model_zoo import load_url
+import matplotlib.pyplot as plt
+from scipy.special import expit
+import os
+if not os.path.exists("deepfake-detection"):
+    os.system("git clone https://github.com/ai-cho/deepfake-detection.git")
+import sys
+sys.path.append('..')
+sys.path.append('deepfake-detection')
+from blazeface import FaceExtractor, BlazeFace, VideoReader
+from architectures import fornet,weights
+from isplutils import utils
+import cv2
+import time
+import ssl
+ssl._create_default_https_context = ssl._create_unverified_context
+import warnings
+warnings.filterwarnings('ignore')
+def fpv(video_path, device):
+  facedet = BlazeFace().to(device)
+  facedet.load_weights("deepfake-detection/blazeface/blazeface.pth")
+  facedet.load_anchors("deepfake-detection/blazeface/anchors.npy")
+  videoreader = VideoReader(verbose=False)
+  cap = cv2.VideoCapture(video_path)
+  frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+  fps = cap.get(cv2.CAP_PROP_FPS)
+  video_duration = int(frame_count / fps) # 초 단위
+  video_read_fn = lambda x: videoreader.read_frames(x, num_frames=video_duration)
+  face_extractor = FaceExtractor(video_read_fn=video_read_fn,facedet=facedet)
+  return face_extractor
+def soft_voting(model_list, vid_faces, transf, device):
+  faces_left = [] # 바운딩 박스의 x좌표 더 작음
+  faces_right= []
+  for frame in vid_faces:
+    if len(frame['faces']) == 1:
+      faces_left.append(frame['faces'][0])
+    elif len(frame['faces']) == 2:
+      if frame['detections'][0][0] < frame['detections'][1][0]:
+        faces_left.append(frame['faces'][0])
+        faces_right.append(frame['faces'][1])
+      else:
+        faces_left.append(frame['faces'][1])
+        faces_right.append(frame['faces'][0])
+  try:
+    faces_left_1 = torch.stack( [ transf(image=frame)['image'] for frame in faces_left if faces_left] )
+  except:
+    pass
+  try:
+    faces_right_1 = torch.stack( [ transf(image=frame)['image'] for frame in faces_right if faces_right] )
+  except:
+    pass
+  results = []
+  faces = []
+  with torch.no_grad():
+    try:
+      result_init = 0
+      result_total_1 = np.zeros_like(model_list[0](faces_left_1.to(device)).cpu().numpy().flatten())
+      for model in model_list:
+        faces_real_pred = model(faces_left_1.to(device)).cpu().numpy().flatten()
+        result_total_1 = np.add(result_total_1, faces_real_pred)
+        result = expit(faces_real_pred).mean()
+        result_init += result
+      results.append(result_init/len(model_list))
+      left_most_frame = np.where(result_total_1 == np.max(result_total_1))[0].item()
+      left_face = faces_left[left_most_frame]
+      faces.append(left_face)
+    except:
+      pass
+    try:
+      result_init = 0
+      result_total_2 = np.zeros_like(model_list[0](faces_right_1.to(device)).cpu().numpy().flatten())
+      for model in model_list:
+        faces_real_pred = model(faces_right_1.to(device)).cpu().numpy().flatten()
+        result_total_2 = np.add(result_total_2, faces_real_pred)
+        result = expit(faces_real_pred).mean()
+        result_init += result
+      results.append(result_init/len(model_list))
+      right_most_frame = np.where(result_total_2 == np.max(result_total_2))[0].item()
+      right_face = faces_right[right_most_frame]
+      faces.append(right_face)
+    except:
+      pass
+    return results, faces
+def main(file_path):
+  THRESHOLD = 0.5
+  net_model = 'EfficientNetB4'
+  train_db = 'DFDC'
+  device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
+  face_policy = 'scale'
+  face_size = 224
+  frames_per_video = 32
+  model_list = []
+  for net_model in ['EfficientNetB4', 'EfficientNetB4ST', 'EfficientNetAutoAttB4']:
+    for train_db in ['DFDC']:
+      model_url = weights.weight_url['{:s}_{:s}'.format(net_model,train_db)]
+      net = getattr(fornet,net_model)().eval().to(device)
+      net.load_state_dict(load_url(model_url,map_location=device,check_hash=True))
+      transf = utils.get_transformer(face_policy, face_size, net.get_normalizer(), train=False)
+      model_list.append(net)
+  faces = fpv(file_path, device).process_video(file_path)
+  deepfake_results, deepfake_faces = soft_voting(model_list, faces, transf, device)
+  if len(deepfake_faces) == 1:
+    deepfake_results = np.array(deepfake_results)
+    fake_prob = deepfake_results.item()
+    real_prob = 1-fake_prob
+    return real_prob, fake_prob, deepfake_faces
+  elif len(deepfake_faces) == 2:
+    deepfake_result1 = np.array(deepfake_results[0])
+    deepfake_result2 = np.array(deepfake_results[1])
+    result1_fake_prob = deepfake_result1.item()
+    result2_fake_prob = deepfake_result2.item()
+    return result1_fake_prob, result2_fake_prob, deepfake_faces # left, right
+def predict_deepfake(file_obj):
+    result = main(file_obj)
+    # Check the type of result to decide the output format
+    if len(result[2]) == 1:
+        real_prob, fake_prob, faces = result
+        return {"Real Probability": real_prob, "Fake Probability": fake_prob, "Person Face": faces[0]}
+    elif len(result[2]) == 2:
+        result1_fake, result2_fake, faces = result
+        return {
+            "Left Person Fake Probability": result1_fake,
+            "Right Person Fake Probability": result2_fake,
+            "Left Person Face": faces[0],
+            "Right Person Face": faces[1]
+        }
+# Gradio 포맷팅 함수
+def gradio_output(result):
+    if "Real Probability" in result:
+        return (
+            f"Real Probability: {result['Real Probability']}, "
+            f"Fake Probability: {result['Fake Probability']}",
+            result["Person Face"],
+            None,
+        )
+    elif "Left Person Fake Probability" in result:
+        return (
+            f"Left Fake Probability: {result['Left Person Fake Probability']}, "
+            f"Right Fake Probability: {result['Right Person Fake Probability']}",
+            result["Left Person Face"],
+            result["Right Person Face"],
+        )
+    else:  # 얼굴 없음 처리
+        return (
+            result["Message"],
+            None,  # Left Person Face
+            None,  # Right Person Face
+        )
+import gradio as gr
+# Gradio
+demo = gr.Interface(
+    fn=lambda video: gradio_output(predict_deepfake(video)),
+    inputs=gr.Video(label="Upload Video"),
+    outputs=[
+        gr.Label(label="Deepfake Detection Result"),
+        gr.Image(label="Left/Single Person Face"),
+        gr.Image(label="Right Person Face"),
+    ],
+    title="Deepfake Detection Demo",
+    description="Upload a video to detect if it is a deepfake or real. Supports cases with one or two faces, or no faces.",
+)
 if __name__ == "__main__":
+    demo.launch(share=True, debug=True)