Spaces:

FunAudioLLM
/

Fun-CineForge-Demo

Running on Zero

App Files Files Community

Fun-CineForge-Demo / speaker_diarization /local /vision_tools /face_detection.py

xuan3986

Upload 111 files

03022ee verified 20 days ago

raw

history blame contribute delete

7.22 kB

	import torch
	import cv2
	import os
	import numpy as np
	import onnxruntime


	def area_of(left_top, right_bottom) -> torch.Tensor:
	"""Compute the areas of rectangles given two corners.

	Args:
	left_top (N, 2): left top corner.
	right_bottom (N, 2): right bottom corner.

	Returns:
	area (N): return the area.
	"""
	hw = torch.clamp(right_bottom - left_top, min=0.0)
	return hw[..., 0] * hw[..., 1]

	def iou_of(boxes0, boxes1, eps=1e-5):
	"""Return intersection-over-union (Jaccard index) of boxes.

	Args:
	boxes0 (N, 4): ground truth boxes.
	boxes1 (N or 1, 4): predicted boxes.
	eps: a small number to avoid 0 as denominator.
	Returns:
	iou (N): IoU values.
	"""
	overlap_left_top = torch.max(boxes0[..., :2], boxes1[..., :2])
	overlap_right_bottom = torch.min(boxes0[..., 2:], boxes1[..., 2:])

	overlap_area = area_of(overlap_left_top, overlap_right_bottom)
	area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
	area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
	return overlap_area / (area0 + area1 - overlap_area + eps)

	def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
	"""
	Args:
	box_scores (N, 5): boxes in corner-form and probabilities.
	iou_threshold: intersection over union threshold.
	top_k: keep top_k results. If k <= 0, keep all the results.
	candidate_size: only consider the candidates with the highest scores.
	Returns:
	picked: a list of indexes of the kept boxes
	"""
	scores = box_scores[:, -1]
	boxes = box_scores[:, :-1]
	picked = []
	_, indexes = scores.sort(descending=True)
	indexes = indexes[:candidate_size]
	while len(indexes) > 0:
	current = indexes[0]
	picked.append(current.item())
	if 0 < top_k == len(picked) or len(indexes) == 1:
	break
	current_box = boxes[current, :]
	indexes = indexes[1:]
	rest_boxes = boxes[indexes, :]
	iou = iou_of(
	rest_boxes,
	current_box.unsqueeze(0),
	)
	indexes = indexes[iou <= iou_threshold]

	return box_scores[picked, :]


	class Resize(object):
	def __init__(self, size=(300, 300)):
	self.size = size

	def __call__(self, image, boxes=None, labels=None):
	image = cv2.resize(image, (self.size[0],
	self.size[1]))
	return image, boxes, labels


	class SubtractMeans(object):
	def __init__(self, mean):
	self.mean = np.array(mean, dtype=np.float32)

	def __call__(self, image, boxes=None, labels=None):
	image = image.astype(np.float32)
	image -= self.mean
	return image.astype(np.float32), boxes, labels


	class ToTensor(object):
	def __call__(self, cvimage, boxes=None, labels=None):
	return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), boxes, labels


	class Compose(object):
	"""Composes several augmentations together.
	Args:
	transforms (List[Transform]): list of transforms to compose.
	Example:
	>>> augmentations.Compose([
	>>> transforms.CenterCrop(10),
	>>> transforms.ToTensor(),
	>>> ])
	"""

	def __init__(self, transforms):
	self.transforms = transforms

	def __call__(self, img, boxes=None, labels=None):
	for t in self.transforms:
	img, boxes, labels = t(img, boxes, labels)
	return img, boxes, labels


	class PredictionTransform:
	def __init__(self, size, mean=0.0, std=1.0):
	self.transform = Compose([
	Resize(size),
	SubtractMeans(mean),
	lambda img, boxes=None, labels=None: (img / std, boxes, labels),
	ToTensor()
	])

	def __call__(self, image):
	image, _, _ = self.transform(image)
	return image


	class Config:
	image_size = [320, 240]
	image_mean_test = np.array([127, 127, 127])
	image_std = 128.0


	class Predictor:
	"""
	Face detection with pretrained model.
	Reference:
	- https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB
	"""
	def __init__(
	self,
	onnx_dir,
	device='cpu',
	device_id=0,
	iou_threshold=0.3,
	filter_threshold=0.01,
	candidate_size=200,
	):
	onnx_file_name = os.path.join(onnx_dir, 'version-RFB-320.onnx')
	assert os.path.exists(onnx_file_name), \
	'%s does not exist. Please check if it has been downloaded accurately.' % onnx_file_name
	self.ort_net = self.create_net(onnx_file_name, device, device_id)
	self.transform = PredictionTransform(
	Config.image_size, Config.image_mean_test, Config.image_std)
	self.iou_threshold = iou_threshold
	self.filter_threshold = filter_threshold
	self.candidate_size = candidate_size
	self.device = device

	def __call__(self, image, top_k=-1, prob_threshold=None):
	height, width, _ = image.shape
	image = self.transform(image)
	images = image.unsqueeze(0).numpy()
	# net inference
	inputs = {self.ort_net.get_inputs()[0].name:images}
	scores, boxes = self.ort_net.run(None, inputs)
	boxes = torch.from_numpy(boxes[0])
	scores = torch.from_numpy(scores[0])
	if not prob_threshold:
	prob_threshold = self.filter_threshold
	picked_box_probs = []
	picked_labels = []
	for class_index in range(1, scores.size(1)):
	probs = scores[:, class_index]
	mask = probs > prob_threshold
	probs = probs[mask]
	if probs.size(0) == 0:
	continue
	subset_boxes = boxes[mask, :]
	box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1)
	box_probs = hard_nms(box_probs, self.iou_threshold, top_k, self.candidate_size)
	picked_box_probs.append(box_probs)
	picked_labels.extend([class_index] * box_probs.size(0))
	if not picked_box_probs:
	return torch.tensor([]), torch.tensor([]), torch.tensor([])
	picked_box_probs = torch.cat(picked_box_probs)
	picked_box_probs[:, 0] *= width
	picked_box_probs[:, 1] *= height
	picked_box_probs[:, 2] *= width
	picked_box_probs[:, 3] *= height
	return picked_box_probs[:, :4], torch.tensor(picked_labels), picked_box_probs[:, 4]

	def create_net(self, onnx_file_name, device='cpu', device_id=0):
	options = onnxruntime.SessionOptions()
	# set op_num_threads
	options.intra_op_num_threads = 8
	options.inter_op_num_threads = 8
	# set providers
	providers = ['CPUExecutionProvider']
	if device == 'cuda':
	providers.insert(0, ('CUDAExecutionProvider', {'device_id': device_id}))
	ort_session = onnxruntime.InferenceSession(onnx_file_name, options, providers=providers)
	return ort_session


	if __name__ == '__main__':
	predictor_det = Predictor('pretrained_models', 'cuda', '0')
	image_input = np.random.randn(1920, 1080, 3).astype('float32')
	bboxes, _, probs = predictor_det(image_input, top_k=10, prob_threshold=0.9)