import gradio as gr import torch import os import cv2 import urllib.request from model.pred_func import load_genconvit, df_face, pred_vid, real_or_fake from model.config import load_config # --- Model Download --- def download_models(): """ Downloads the pre-trained model weights if they don't exist. """ weight_dir = 'weight' ed_url = 'https://huggingface.co/Deressa/GenConViT/resolve/main/genconvit_ed_inference.pth' vae_url = 'https://huggingface.co/Deressa/GenConViT/resolve/main/genconvit_vae_inference.pth' ed_path = os.path.join(weight_dir, 'genconvit_ed_inference.pth') vae_path = os.path.join(weight_dir, 'genconvit_vae_inference.pth') if not os.path.exists(weight_dir): os.makedirs(weight_dir) if not os.path.exists(ed_path): print("Downloading ED model weights...") urllib.request.urlretrieve(ed_url, ed_path) print("Download complete.") if not os.path.exists(vae_path): print("Downloading VAE model weights...") urllib.request.urlretrieve(vae_url, vae_path) print("Download complete.") # --- Global Variables --- config = load_config() model = None def load_model_once(): """ Loads the model into memory. This function is called once at the start. """ global model if model is None: download_models() print("Loading GenConViT model...") ed_weight = 'genconvit_ed_inference' vae_weight = 'genconvit_vae_inference' # Set net='genconvit' to use both ED and VAE as per prediction.py logic for best results model = load_genconvit(config, net='genconvit', ed_weight=ed_weight, vae_weight=vae_weight, fp16=False) print("Model loaded successfully.") def get_video_duration(video_path): cap = cv2.VideoCapture(video_path) if not cap.isOpened(): return 0 fps = cap.get(cv2.CAP_PROP_FPS) frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT) cap.release() if fps == 0: return 0 return frame_count / fps # --- Prediction Function --- def detect_deepfake(video_path, model_type, num_frames): if video_path is None: return "❌ Please upload a video file." # ===== VALIDASI DURASI VIDEO ===== duration = get_video_duration(video_path) if duration > 60: return "❌ Video terlalu besar. Durasi maksimal adalah 1 menit (60 detik)." try: print(f"Processing video: {video_path} with model: {model_type}") # Map model_type to internal net identifier net_mapping = { "GenConViT": "genconvit", "AE": "ed", "VAE": "vae" } net_val = net_mapping.get(model_type, "genconvit") # Extract faces from the video faces = df_face(video_path, num_frames) if len(faces) == 0: return "No faces were detected in the video. Please try another video." # Make prediction y, y_val = pred_vid(faces, model, net=net_val) # Get the label (REAL or FAKE) label = real_or_fake(y) # The confidence score y_val is a bit complex in the original code. # For simplicity, we'll show the raw score associated with the prediction. # A lower score generally means more likely to be REAL, higher means more likely to be FAKE. confidence = y_val if label == 'FAKE' else 1 - y_val return { "FAKE": confidence, "REAL": 1 - confidence } except Exception as e: print(f"An error occurred: {e}") return "An error occurred during processing. The video might be corrupted or in an unsupported format." # --- Gradio Interface --- title = "GenConViT: Deepfake Video Detection" description = """ Upload a video file to detect if it's a deepfake. This application uses the Generative Convolutional Vision Transformer (GenConViT) to analyze the video. The model achieves an average accuracy of 95.8% and an AUC of 99.3% across multiple datasets. """ # Load the model once when the app starts load_model_once() iface = gr.Interface( fn=detect_deepfake, inputs=[ gr.Video(label="Upload Video"), gr.Radio(["GenConViT", "AE", "VAE"], label="Pilih Model", value="GenConViT"), gr.Slider(1, 200, value=15, step=1, label="Number of Frames") ], outputs=gr.Label(num_top_classes=2, label="Prediction Result"), title=title, description=description, flagging_mode="never", examples=[ ["sample_prediction_data/aajsqyyjni.mp4", "GenConViT", 15], ["sample_prediction_data/anndvqgoko.mp4", "GenConViT", 15], ["sample_prediction_data/0017_fake.mp4.mp4", "GenConViT", 15], ["sample_prediction_data/0048_fake.mp4.mp4", "GenConViT", 15] ] ) if __name__ == "__main__": iface.queue().launch()