Spaces:

Lingteng
/

LHMPP

Running on Zero

LHMPP / core /utils /face_detector.py

Lingteng Qiu (邱陵腾）

rm assets & wheels

434b0b0 21 days ago

6.82 kB

	#!/usr/bin/env python
	# -- coding: utf-8 --
	# Copyright (c) Xuangeng Chu (xg.chu@outlook.com)
	# Modified based on code from Orest Kupyn (University of Oxford).
	# @Organization : Tongyi Lab, Alibaba
	# @Author : Lingteng Qiu
	# @Email : 220019047@link.cuhk.edu.cn
	# @Time : 2025-08-31 10:02:15
	# @Function : Face detection and bbox NMS

	import sys

	import numpy as np
	import torch
	import torchvision

	sys.path.append("./")


	def expand_bbox(bbox, scale=1.1):
	xmin, ymin, xmax, ymax = bbox.unbind(dim=-1)
	cenx, ceny = (xmin + xmax) / 2, (ymin + ymax) / 2
	# ceny = ceny - (ymax - ymin) * 0.05
	extend_size = torch.sqrt((ymax - ymin) * (xmax - xmin)) * scale
	xmine, xmaxe = cenx - extend_size / 2, cenx + extend_size / 2
	ymine, ymaxe = ceny - extend_size / 2, ceny + extend_size / 2
	expanded_bbox = torch.stack([xmine, ymine, xmaxe, ymaxe], dim=-1)
	return torch.stack([xmine, ymine, xmaxe, ymaxe], dim=-1)


	def nms(
	boxes_xyxy,
	scores,
	flame_params,
	confidence_threshold: float = 0.5,
	iou_threshold: float = 0.5,
	top_k: int = 1000,
	keep_top_k: int = 100,
	):
	for pred_bboxes_xyxy, pred_bboxes_conf, pred_flame_params in zip(
	boxes_xyxy.detach().float(),
	scores.detach().float(),
	flame_params.detach().float(),
	):
	pred_bboxes_conf = pred_bboxes_conf.squeeze(-1) # [Anchors]
	conf_mask = pred_bboxes_conf >= confidence_threshold

	pred_bboxes_conf = pred_bboxes_conf[conf_mask]
	pred_bboxes_xyxy = pred_bboxes_xyxy[conf_mask]
	pred_flame_params = pred_flame_params[conf_mask]

	# Filter all predictions by self.nms_top_k
	if pred_bboxes_conf.size(0) > top_k:
	topk_candidates = torch.topk(
	pred_bboxes_conf, k=top_k, largest=True, sorted=True
	)
	pred_bboxes_conf = pred_bboxes_conf[topk_candidates.indices]
	pred_bboxes_xyxy = pred_bboxes_xyxy[topk_candidates.indices]
	pred_flame_params = pred_flame_params[topk_candidates.indices]

	# NMS
	idx_to_keep = torchvision.ops.boxes.nms(
	boxes=pred_bboxes_xyxy, scores=pred_bboxes_conf, iou_threshold=iou_threshold
	)

	final_bboxes = pred_bboxes_xyxy[idx_to_keep][:keep_top_k] # [Instances, 4]
	final_scores = pred_bboxes_conf[idx_to_keep][:keep_top_k] # [Instances, 1]
	final_params = pred_flame_params[idx_to_keep][
	:keep_top_k
	] # [Instances, Flame Params]
	return final_bboxes, final_scores, final_params


	class VGGHeadDetector(torch.nn.Module):
	def __init__(self, model_path, device):
	super().__init__()
	self.image_size = 640
	self._device = device
	self.model_path = model_path

	self._init_models()

	def _init_models(self):
	self.model = torch.jit.load(self.model_path, map_location="cpu")
	self.model.to(self._device).eval()

	def forward(self, image_tensor, conf_threshold=0.5):
	if not hasattr(self, "model"):
	self._init_models()
	image_tensor = image_tensor.to(self._device).float()
	image, padding, scale = self._preprocess(image_tensor)
	bbox, scores, flame_params = self.model(image)
	bbox, vgg_results = self._postprocess(
	bbox, scores, flame_params, conf_threshold
	)
	if bbox is None:
	print("VGGHeadDetector: No face detected: {}!".format(image_key))
	return None, None
	vgg_results["normalize"] = {"padding": padding, "scale": scale}
	# bbox
	bbox = bbox.clip(0, self.image_size)
	bbox[[0, 2]] -= padding[0]
	bbox[[1, 3]] -= padding[1]
	bbox /= scale
	bbox = bbox.clip(0, self.image_size / scale)

	return vgg_results, bbox

	@torch.no_grad()
	def detect_face(self, image_tensor):
	# image_tensor [3, H, W]
	_, bbox = self.forward(image_tensor=image_tensor)
	return expand_bbox(bbox, scale=1.65).long()

	def _preprocess(self, image):
	_, h, w = image.shape
	if h > w:
	new_h, new_w = self.image_size, int(w * self.image_size / h)
	else:
	new_h, new_w = int(h * self.image_size / w), self.image_size
	scale = self.image_size / max(h, w)
	image = torchvision.transforms.functional.resize(
	image, (new_h, new_w), antialias=True
	)
	pad_w = self.image_size - image.shape[2]
	pad_h = self.image_size - image.shape[1]
	image = torchvision.transforms.functional.pad(
	image,
	(pad_w // 2, pad_h // 2, pad_w - pad_w // 2, pad_h - pad_h // 2),
	fill=127,
	)
	image = image.unsqueeze(0).float() / 255.0
	return image, np.array([pad_w // 2, pad_h // 2]), scale

	def _postprocess(self, bbox, scores, flame_params, conf_threshold):
	# flame_params = {"shape": 300, "exp": 100, "rotation": 6, "jaw": 3, "translation": 3, "scale": 1}
	bbox, scores, flame_params = nms(
	bbox, scores, flame_params, confidence_threshold=conf_threshold
	)
	if bbox.shape[0] == 0:
	return None, None
	max_idx = (
	((bbox[:, 3] - bbox[:, 1]) * (bbox[:, 2] - bbox[:, 0])).argmax().long()
	)
	bbox, flame_params = bbox[max_idx], flame_params[max_idx]
	if bbox[0] < 5 and bbox[1] < 5 and bbox[2] > 635 and bbox[3] > 635:
	return None, None
	# flame
	posecode = torch.cat([flame_params.new_zeros(3), flame_params[400:403]])
	vgg_results = {
	"rotation_6d": flame_params[403:409],
	"translation": flame_params[409:412],
	"scale": flame_params[412:],
	"shapecode": flame_params[:300],
	"expcode": flame_params[300:400],
	"posecode": posecode,
	}
	return bbox, vgg_results


	class FaceDetector:
	def __init__(self, model_path, device):
	self.model = VGGHeadDetector(model_path=model_path, device=device)

	@torch.no_grad()
	def __call__(self, image_tensor):
	return self.model.detect_face(image_tensor)

	def __repr__(self):
	return f"Model: {self.model}"


	if __name__ == "__main__":
	from PIL import Image

	device = "cuda"
	model_path = "./pretrained_models/gagatracker/vgghead/vgg_heads_l.trcd"
	easy_head_detect = FaceDetector(model_path=model_path, device=device)

	rgb_path = "./man_1.png"
	rgb = np.array(Image.open(rgb_path))
	rgb = torch.from_numpy(rgb).permute(2, 0, 1)
	bbox = easy_head_detect(rgb)
	head_rgb = rgb[:, int(bbox[1]) : int(bbox[3]), int(bbox[0]) : int(bbox[2])]
	head_rgb = head_rgb.permute(1, 2, 0)
	head_rgb = head_rgb.cpu().numpy()
	Image.fromarray(head_rgb).save("head_rgb.png")