File size: 2,874 Bytes
9e54219
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import gradio as gr
import cv2
import librosa
import numpy as np
from keras.models import load_model
import os
import cv2
import json
import pickle
import librosa
import shutil
import numpy as np
import pandas as pd
from pathlib import Path
from scipy.io import wavfile
from moviepy.editor import VideoFileClip
from keras.utils import np_utils
from sklearn.preprocessing import LabelEncoder

# ė°ģ“ķ„° ģ „ģ²˜ė¦¬
def preprocess_video(video_path):
    face_cascade = cv2.CascadeClassifier('/content/drive/Shareddrives/23 į„‹į…µį†«į„€į…©į†¼į„Œį…µį„‚į…³į†¼ į„†į…©į„ƒį…¦į†Æį„…į…µį†¼_į„ƒį…©į†Æį„‘į…µį†«/haarcascade_frontalface_default.xml')
    cnn_data = []
    rnn_data = []

    cap = cv2.VideoCapture(video_path)
    count = 0
    while len(cnn_data) < 2:
        ret, frame = cap.read()
        if ret:
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5)
            for (x, y, w, h) in faces:
                face_img = gray[y:y+h, x:x+w]
                resized_img = cv2.resize(face_img, (224, 224))
                cnn_data.append(resized_img)
                count += 1
                if count >= 15:
                    break
        else:
            break

    if len(cnn_data) < 281:
        video_clip = VideoFileClip(video_path)
        audio_clip = video_clip.audio
        audio_clip.write_audiofile("audio.wav")
        y, sr = librosa.load("audio.wav", sr=44100)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
        mfcc = mfcc[:, :400]
        rnn_data.append(mfcc)
        os.remove("audio.wav")

    cnn_data = np.array(cnn_data)
    rnn_data = np.array(rnn_data)
    return cnn_data, rnn_data

# ė”„ķŽ˜ģ“ķ¬ 영상 유묓 ķŒė³„
def detect_deepfake(video_path):
    cnn_data, rnn_data = preprocess_video(video_path)

    cnn_data_np = np.array(cnn_data)
    rnn_data_np= np.array(rnn_data)

    def augment_data(data, target_size):
      # ģ¦ź°•ėœ ė°ģ“ķ„° ė°°ģ—“ ģ“ˆźø°ķ™”
      augmented_data = np.empty((target_size,) + data.shape[1:])

      # RNN ė°ģ“ķ„°ė„¼ ė°˜ģ „ķ•˜ģ—¬ 복사
      for i in range(target_size):
          augmented_data[i] = np.flip(data[i % data.shape[0]], axis=0)

      return augmented_data

    # RNN ė°ģ“ķ„° ģ¦ź°•
    augmented_rnn_data = augment_data(rnn_data_np, cnn_data_np.shape[0])

    y_pred = multimodal_model.predict([cnn_data, augmented_rnn_data])

    #print(y_pred)
    max_prob = np.max(y_pred)
    print(max_prob)

    if max_prob < 0.5:
        result =  "Deepfake"
    else:
        result =  "Real"
    return result
iface = gr.Interface(
    fn=detect_deepfake,
    inputs="video",
    outputs="text",
    title="Video Deepfake Detection",
    description="Upload a video to check if it contains deepfake content.",
    allow_flagging=False,
    analytics_enabled=False
)

iface.launch()