CC
Deploy DeepFake video classifier to Hugging Face Spaces
198f874
import cv2
import numpy as np
import torch
from PIL import Image
import tempfile
import os
from pathlib import Path
import logging
# logger = logging.getLogger(__name__)
# # ImageNet normalization constants
# MEAN = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
# STD = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
def save_uploaded_video(upload_file, temp_dir: str) -> str:
"""Save uploaded video to temporary file and return path."""
file_path = os.path.join(temp_dir, upload_file.filename)
with open(file_path, "wb") as buffer:
buffer.write(upload_file.file.read())
return file_path
# def extract_frames(video_path: str, num_frames: int = 16) -> list:
# """Extract evenly spaced frames from video."""
# cap = cv2.VideoCapture(video_path)
# total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# if total_frames <= 0:
# cap.release()
# return []
# indices = np.linspace(0, total_frames - 1, num=min(num_frames, total_frames), dtype=int)
# frames = []
# for idx in indices:
# cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
# ret, frame = cap.read()
# if ret:
# frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# frames.append(frame_rgb)
# cap.release()
# return frames
# utils.py — replace extract_frames + preprocess_frame with these
import cv2
import numpy as np
import torch
from PIL import Image
import os
import logging
logger = logging.getLogger(__name__)
MEAN = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
STD = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
# Load OpenCV's face detector (ships with opencv-python, no extra install)
_face_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
)
def _crop_face(frame_bgr: np.ndarray, margin: float = 0.3) -> np.ndarray:
"""
Detect and crop the largest face in a BGR frame.
Returns the face crop, or the full frame if no face found.
"""
gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
faces = _face_cascade.detectMultiScale(
gray, scaleFactor=1.1, minNeighbors=5, minSize=(60, 60)
)
if len(faces) == 0:
# Fall back to centre crop (better than full frame)
h, w = frame_bgr.shape[:2]
size = min(h, w)
y0 = (h - size) // 2
x0 = (w - size) // 2
return frame_bgr[y0:y0+size, x0:x0+size]
# Pick the largest detected face
x, y, fw, fh = max(faces, key=lambda f: f[2] * f[3])
# Add margin
mx = int(fw * margin)
my = int(fh * margin)
H, W = frame_bgr.shape[:2]
x1 = max(0, x - mx)
y1 = max(0, y - my)
x2 = min(W, x + fw + mx)
y2 = min(H, y + fh + my)
return frame_bgr[y1:y2, x1:x2]
def extract_frames(video_path: str, num_frames: int = 16) -> list:
"""Extract evenly spaced frames from video, with face crop."""
cap = cv2.VideoCapture(video_path)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
if total_frames <= 0:
cap.release()
return []
indices = np.linspace(0, total_frames - 1, num=min(num_frames, total_frames), dtype=int)
frames = []
for idx in indices:
cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
ret, frame = cap.read()
if ret:
face = _crop_face(frame) # <-- crop face
frame_rgb = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
frames.append(frame_rgb)
cap.release()
return frames
def preprocess_frame(frame: np.ndarray, target_size: int = 224) -> torch.Tensor:
"""Preprocess a single frame for model input."""
# Convert to PIL and resize
pil_img = Image.fromarray(frame).resize((target_size, target_size), Image.BILINEAR)
# Convert to tensor and normalize to [0, 1]
tensor = torch.from_numpy(np.array(pil_img)).float().permute(2, 0, 1) / 255.0
# Normalize with ImageNet stats
tensor = (tensor - MEAN) / STD
tensor = torch.nan_to_num(tensor, nan=0.0, posinf=5.0, neginf=-5.0)
return tensor
def video_to_tensor(video_path: str, num_frames: int = 16, img_size: int = 224) -> torch.Tensor:
"""Convert video to tensor of shape (num_frames, 3, img_size, img_size)."""
frames = extract_frames(video_path, num_frames)
if not frames:
raise ValueError("Could not extract frames from video")
tensors = []
for frame in frames:
tensor = preprocess_frame(frame, img_size)
tensors.append(tensor)
# Pad if needed
if len(tensors) < num_frames:
last_tensor = tensors[-1]
while len(tensors) < num_frames:
tensors.append(last_tensor.clone())
return torch.stack(tensors)