Spaces:
Runtime error
Runtime error
| import os | |
| import numpy as np | |
| import cv2 | |
| import torch | |
| import dlib | |
| import face_recognition | |
| from torchvision import transforms | |
| from tqdm import tqdm | |
| from dataset.loader import normalize_data | |
| from .config import load_config | |
| from .genconvit import GenConViT | |
| from decord import VideoReader, cpu | |
| import glob | |
| from PIL import Image | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| def load_genconvit(config, net, ed_weight, vae_weight, fp16): | |
| model = GenConViT( | |
| config, | |
| ed= ed_weight, | |
| vae= vae_weight, | |
| net=net, | |
| fp16=fp16 | |
| ) | |
| model.to(device) | |
| model.eval() | |
| if fp16: | |
| model.half() | |
| return model | |
| def face_rec(frames): | |
| temp_face = np.zeros((len(frames), 224, 224, 3), dtype=np.uint8) | |
| count = 0 | |
| mod = "cnn" if dlib.DLIB_USE_CUDA else "hog" | |
| for _, frame in tqdm(enumerate(frames), total=len(frames)): | |
| frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) | |
| face_locations = face_recognition.face_locations( | |
| frame, number_of_times_to_upsample=0, model=mod | |
| ) | |
| for face_location in face_locations: | |
| if count < len(frames): | |
| top, right, bottom, left = face_location | |
| face_image = frame[top:bottom, left:right] | |
| face_image = cv2.resize( | |
| face_image, (224, 224), interpolation=cv2.INTER_AREA | |
| ) | |
| face_image = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB) | |
| temp_face[count] = face_image | |
| count += 1 | |
| else: | |
| break | |
| return ([], 0) if count == 0 else (temp_face[:count], count) | |
| def preprocess_frame(frame): | |
| df_tensor = torch.tensor(frame, device=device).float() | |
| df_tensor = df_tensor.permute((0, 3, 1, 2)) | |
| for i in range(len(df_tensor)): | |
| df_tensor[i] = normalize_data()["vid"](df_tensor[i] / 255.0) | |
| return df_tensor | |
| def pred_vid(df, model, net=None): | |
| with torch.no_grad(): | |
| output = model(df, net=net).squeeze() | |
| if len(output.shape) == 1: | |
| output = output.unsqueeze(0) | |
| # Apply softmax to get probabilities | |
| probabilities = torch.softmax(output, dim=1) | |
| return max_prediction_value(probabilities) | |
| def max_prediction_value(y_pred): | |
| # Finds the index and value of the maximum prediction value. | |
| mean_val = torch.mean(y_pred, dim=0) | |
| max_val, max_idx = torch.max(mean_val, dim=0) | |
| return max_idx.item(), max_val.item() | |
| def real_or_fake(prediction): | |
| return {0: "FAKE", 1: "REAL"}[prediction] | |
| def extract_frames(video_file, num_frames=15): | |
| vr = VideoReader(video_file, ctx=cpu(0)) | |
| total_frames = len(vr) | |
| if num_frames == -1: | |
| # if -1, get all frames | |
| indices = np.arange(total_frames).astype(int) | |
| else: | |
| indices = np.linspace(0, total_frames -1, num_frames, dtype=int) | |
| return vr.get_batch(indices).asnumpy() # seek frames with step_size | |
| def df_face_from_folder(vid, num_frames): | |
| img_list = glob.glob(vid+"/*") | |
| img = [] | |
| for f in img_list: | |
| try: | |
| im = Image.open(f).convert('RGB') | |
| img.append(np.asarray(im)) | |
| except: | |
| pass | |
| face, count = face_rec(img[:num_frames]) | |
| return preprocess_frame(face) if count > 0 else [] | |
| def df_face(vid, num_frames): | |
| img = extract_frames(vid, num_frames) | |
| face, count = face_rec(img) | |
| return preprocess_frame(face) if count > 0 else [] | |
| def is_video(vid): | |
| return os.path.isfile(vid) and vid.endswith( | |
| tuple([".avi", ".mp4", ".mpg", ".mpeg", ".mov"]) | |
| ) | |
| def is_video_folder(vid_folder): | |
| img_list = glob.glob(vid_folder+"/*") | |
| return len(img_list)>=1 and img_list[0].endswith(tuple(["png", "jpeg","jpg"])) | |
| def set_result(): | |
| return { | |
| "video": { | |
| "name": [], | |
| "pred": [], | |
| "klass": [], | |
| "pred_label": [], | |
| "correct_label": [], | |
| } | |
| } | |
| def store_result( | |
| result, filename, y, y_val, klass, correct_label=None, compression=None | |
| ): | |
| result["video"]["name"].append(filename) | |
| result["video"]["pred"].append(y_val) | |
| result["video"]["klass"].append(klass.lower()) | |
| result["video"]["pred_label"].append(real_or_fake(y)) | |
| if correct_label is not None: | |
| result["video"]["correct_label"].append(correct_label) | |
| if compression is not None: | |
| result["video"]["compression"].append(compression) | |
| return result | |