Spaces:

oo01
/

Deepfake-video-classifier

Running

Deepfake-video-classifier / app /utils.py

Deploy DeepFake video classifier to Hugging Face Spaces

198f874 27 days ago

4.75 kB

	import cv2
	import numpy as np
	import torch
	from PIL import Image
	import tempfile
	import os
	from pathlib import Path
	import logging

	# logger = logging.getLogger(__name__)

	# # ImageNet normalization constants
	# MEAN = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
	# STD = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)


	def save_uploaded_video(upload_file, temp_dir: str) -> str:
	"""Save uploaded video to temporary file and return path."""
	file_path = os.path.join(temp_dir, upload_file.filename)
	with open(file_path, "wb") as buffer:
	buffer.write(upload_file.file.read())
	return file_path


	# def extract_frames(video_path: str, num_frames: int = 16) -> list:
	# """Extract evenly spaced frames from video."""
	# cap = cv2.VideoCapture(video_path)
	# total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

	# if total_frames <= 0:
	# cap.release()
	# return []

	# indices = np.linspace(0, total_frames - 1, num=min(num_frames, total_frames), dtype=int)
	# frames = []

	# for idx in indices:
	# cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
	# ret, frame = cap.read()
	# if ret:
	# frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	# frames.append(frame_rgb)

	# cap.release()
	# return frames
	# utils.py — replace extract_frames + preprocess_frame with these

	import cv2
	import numpy as np
	import torch
	from PIL import Image
	import os
	import logging

	logger = logging.getLogger(__name__)

	MEAN = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
	STD = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)

	# Load OpenCV's face detector (ships with opencv-python, no extra install)
	_face_cascade = cv2.CascadeClassifier(
	cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
	)

	def _crop_face(frame_bgr: np.ndarray, margin: float = 0.3) -> np.ndarray:
	"""
	Detect and crop the largest face in a BGR frame.
	Returns the face crop, or the full frame if no face found.
	"""
	gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
	faces = _face_cascade.detectMultiScale(
	gray, scaleFactor=1.1, minNeighbors=5, minSize=(60, 60)
	)

	if len(faces) == 0:
	# Fall back to centre crop (better than full frame)
	h, w = frame_bgr.shape[:2]
	size = min(h, w)
	y0 = (h - size) // 2
	x0 = (w - size) // 2
	return frame_bgr[y0:y0+size, x0:x0+size]

	# Pick the largest detected face
	x, y, fw, fh = max(faces, key=lambda f: f[2] * f[3])

	# Add margin
	mx = int(fw * margin)
	my = int(fh * margin)
	H, W = frame_bgr.shape[:2]
	x1 = max(0, x - mx)
	y1 = max(0, y - my)
	x2 = min(W, x + fw + mx)
	y2 = min(H, y + fh + my)

	return frame_bgr[y1:y2, x1:x2]


	def extract_frames(video_path: str, num_frames: int = 16) -> list:
	"""Extract evenly spaced frames from video, with face crop."""
	cap = cv2.VideoCapture(video_path)
	total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

	if total_frames <= 0:
	cap.release()
	return []

	indices = np.linspace(0, total_frames - 1, num=min(num_frames, total_frames), dtype=int)
	frames = []

	for idx in indices:
	cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
	ret, frame = cap.read()
	if ret:
	face = _crop_face(frame) # <-- crop face
	frame_rgb = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
	frames.append(frame_rgb)

	cap.release()
	return frames

	def preprocess_frame(frame: np.ndarray, target_size: int = 224) -> torch.Tensor:
	"""Preprocess a single frame for model input."""
	# Convert to PIL and resize
	pil_img = Image.fromarray(frame).resize((target_size, target_size), Image.BILINEAR)

	# Convert to tensor and normalize to [0, 1]
	tensor = torch.from_numpy(np.array(pil_img)).float().permute(2, 0, 1) / 255.0

	# Normalize with ImageNet stats
	tensor = (tensor - MEAN) / STD
	tensor = torch.nan_to_num(tensor, nan=0.0, posinf=5.0, neginf=-5.0)

	return tensor


	def video_to_tensor(video_path: str, num_frames: int = 16, img_size: int = 224) -> torch.Tensor:
	"""Convert video to tensor of shape (num_frames, 3, img_size, img_size)."""
	frames = extract_frames(video_path, num_frames)

	if not frames:
	raise ValueError("Could not extract frames from video")

	tensors = []
	for frame in frames:
	tensor = preprocess_frame(frame, img_size)
	tensors.append(tensor)

	# Pad if needed
	if len(tensors) < num_frames:
	last_tensor = tensors[-1]
	while len(tensors) < num_frames:
	tensors.append(last_tensor.clone())

	return torch.stack(tensors)