ProgramSkripsi / model /pred_func.py
Yuuki0's picture
first commit
e0c75d6
import os
import numpy as np
import cv2
import torch
import dlib
import face_recognition
from torchvision import transforms
from tqdm import tqdm
from dataset.loader import normalize_data
from .config import load_config
from .genconvit import GenConViT
from decord import VideoReader, cpu
import glob
from PIL import Image
device = "cuda" if torch.cuda.is_available() else "cpu"
def load_genconvit(config, net, ed_weight, vae_weight, fp16):
model = GenConViT(
config,
ed= ed_weight,
vae= vae_weight,
net=net,
fp16=fp16
)
model.to(device)
model.eval()
if fp16:
model.half()
return model
def face_rec(frames):
temp_face = np.zeros((len(frames), 224, 224, 3), dtype=np.uint8)
count = 0
mod = "cnn" if dlib.DLIB_USE_CUDA else "hog"
for _, frame in tqdm(enumerate(frames), total=len(frames)):
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
face_locations = face_recognition.face_locations(
frame, number_of_times_to_upsample=0, model=mod
)
for face_location in face_locations:
if count < len(frames):
top, right, bottom, left = face_location
face_image = frame[top:bottom, left:right]
face_image = cv2.resize(
face_image, (224, 224), interpolation=cv2.INTER_AREA
)
face_image = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
temp_face[count] = face_image
count += 1
else:
break
return ([], 0) if count == 0 else (temp_face[:count], count)
def preprocess_frame(frame):
df_tensor = torch.tensor(frame, device=device).float()
df_tensor = df_tensor.permute((0, 3, 1, 2))
for i in range(len(df_tensor)):
df_tensor[i] = normalize_data()["vid"](df_tensor[i] / 255.0)
return df_tensor
def pred_vid(df, model, net=None):
with torch.no_grad():
output = model(df, net=net).squeeze()
if len(output.shape) == 1:
output = output.unsqueeze(0)
# Apply softmax to get probabilities
probabilities = torch.softmax(output, dim=1)
return max_prediction_value(probabilities)
def max_prediction_value(y_pred):
# Finds the index and value of the maximum prediction value.
mean_val = torch.mean(y_pred, dim=0)
max_val, max_idx = torch.max(mean_val, dim=0)
return max_idx.item(), max_val.item()
def real_or_fake(prediction):
return {0: "FAKE", 1: "REAL"}[prediction]
def extract_frames(video_file, num_frames=15):
vr = VideoReader(video_file, ctx=cpu(0))
total_frames = len(vr)
if num_frames == -1:
# if -1, get all frames
indices = np.arange(total_frames).astype(int)
else:
indices = np.linspace(0, total_frames -1, num_frames, dtype=int)
return vr.get_batch(indices).asnumpy() # seek frames with step_size
def df_face_from_folder(vid, num_frames):
img_list = glob.glob(vid+"/*")
img = []
for f in img_list:
try:
im = Image.open(f).convert('RGB')
img.append(np.asarray(im))
except:
pass
face, count = face_rec(img[:num_frames])
return preprocess_frame(face) if count > 0 else []
def df_face(vid, num_frames):
img = extract_frames(vid, num_frames)
face, count = face_rec(img)
return preprocess_frame(face) if count > 0 else []
def is_video(vid):
return os.path.isfile(vid) and vid.endswith(
tuple([".avi", ".mp4", ".mpg", ".mpeg", ".mov"])
)
def is_video_folder(vid_folder):
img_list = glob.glob(vid_folder+"/*")
return len(img_list)>=1 and img_list[0].endswith(tuple(["png", "jpeg","jpg"]))
def set_result():
return {
"video": {
"name": [],
"pred": [],
"klass": [],
"pred_label": [],
"correct_label": [],
}
}
def store_result(
result, filename, y, y_val, klass, correct_label=None, compression=None
):
result["video"]["name"].append(filename)
result["video"]["pred"].append(y_val)
result["video"]["klass"].append(klass.lower())
result["video"]["pred_label"].append(real_or_fake(y))
if correct_label is not None:
result["video"]["correct_label"].append(correct_label)
if compression is not None:
result["video"]["compression"].append(compression)
return result