Upload 6 files

3b237c2 verified 19 days ago

4.75 kB

	import cv2
	import numpy as np
	import torch
	from PIL import Image
	import tempfile
	import os
	from pathlib import Path
	import logging

	# logger = logging.getLogger(__name__)

	# # ImageNet normalization constants
	# MEAN = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
	# STD = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)


	def save_uploaded_video(upload_file, temp_dir: str) -> str:
	"""Save uploaded video to temporary file and return path."""
	file_path = os.path.join(temp_dir, upload_file.filename)
	with open(file_path, "wb") as buffer:
	buffer.write(upload_file.file.read())
	return file_path


	# def extract_frames(video_path: str, num_frames: int = 16) -> list:
	# """Extract evenly spaced frames from video."""
	# cap = cv2.VideoCapture(video_path)
	# total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

	# if total_frames <= 0:
	# cap.release()
	# return []

	# indices = np.linspace(0, total_frames - 1, num=min(num_frames, total_frames), dtype=int)
	# frames = []

	# for idx in indices:
	# cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
	# ret, frame = cap.read()
	# if ret:
	# frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	# frames.append(frame_rgb)

	# cap.release()
	# return frames
	# utils.py — replace extract_frames + preprocess_frame with these

	import cv2
	import numpy as np
	import torch
	from PIL import Image
	import os
	import logging

	logger = logging.getLogger(__name__)

	MEAN = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
	STD = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)

	# Load OpenCV's face detector (ships with opencv-python, no extra install)
	_face_cascade = cv2.CascadeClassifier(
	cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
	)

	def _crop_face(frame_bgr: np.ndarray, margin: float = 0.3) -> np.ndarray:
	"""
	Detect and crop the largest face in a BGR frame.
	Returns the face crop, or the full frame if no face found.
	"""
	gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
	faces = _face_cascade.detectMultiScale(
	gray, scaleFactor=1.1, minNeighbors=5, minSize=(60, 60)
	)

	if len(faces) == 0:
	# Fall back to centre crop (better than full frame)
	h, w = frame_bgr.shape[:2]
	size = min(h, w)
	y0 = (h - size) // 2
	x0 = (w - size) // 2
	return frame_bgr[y0:y0+size, x0:x0+size]

	# Pick the largest detected face
	x, y, fw, fh = max(faces, key=lambda f: f[2] * f[3])

	# Add margin
	mx = int(fw * margin)
	my = int(fh * margin)
	H, W = frame_bgr.shape[:2]
	x1 = max(0, x - mx)
	y1 = max(0, y - my)
	x2 = min(W, x + fw + mx)
	y2 = min(H, y + fh + my)

	return frame_bgr[y1:y2, x1:x2]


	def extract_frames(video_path: str, num_frames: int = 16) -> list:
	"""Extract evenly spaced frames from video, with face crop."""
	cap = cv2.VideoCapture(video_path)
	total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

	if total_frames <= 0:
	cap.release()
	return []

	indices = np.linspace(0, total_frames - 1, num=min(num_frames, total_frames), dtype=int)
	frames = []

	for idx in indices:
	cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
	ret, frame = cap.read()
	if ret:
	face = _crop_face(frame) # <-- crop face
	frame_rgb = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
	frames.append(frame_rgb)

	cap.release()
	return frames

	def preprocess_frame(frame: np.ndarray, target_size: int = 224) -> torch.Tensor:
	"""Preprocess a single frame for model input."""
	# Convert to PIL and resize
	pil_img = Image.fromarray(frame).resize((target_size, target_size), Image.BILINEAR)

	# Convert to tensor and normalize to [0, 1]
	tensor = torch.from_numpy(np.array(pil_img)).float().permute(2, 0, 1) / 255.0

	# Normalize with ImageNet stats
	tensor = (tensor - MEAN) / STD
	tensor = torch.nan_to_num(tensor, nan=0.0, posinf=5.0, neginf=-5.0)

	return tensor


	def video_to_tensor(video_path: str, num_frames: int = 16, img_size: int = 224) -> torch.Tensor:
	"""Convert video to tensor of shape (num_frames, 3, img_size, img_size)."""
	frames = extract_frames(video_path, num_frames)

	if not frames:
	raise ValueError("Could not extract frames from video")

	tensors = []
	for frame in frames:
	tensor = preprocess_frame(frame, img_size)
	tensors.append(tensor)

	# Pad if needed
	if len(tensors) < num_frames:
	last_tensor = tensors[-1]
	while len(tensors) < num_frames:
	tensors.append(last_tensor.clone())

	return torch.stack(tensors)