from PIL import Image from insightface.app import FaceAnalysis import numpy as np import os from pathlib import Path import time import argparse import cv2 class FaceInference: """人脸检测推理类,封装insightface的推理功能""" def __init__(self, det_thresh=0.5, det_size=(640, 640), ctx_id=0): """ 初始化人脸检测器 Args: det_thresh: 检测阈值 det_size: 检测图像尺寸 ctx_id: GPU设备ID,如果为-1则使用CPU,否则使用GPU """ # 如果 ctx_id 为 -1,使用 CPU;否则使用 GPU if ctx_id == -1: providers = ['CPUExecutionProvider'] provider_options = [{}] ctx_id = -1 # InsightFace 使用 -1 表示 CPU else: providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] provider_options = [{"device_id": str(ctx_id)}, {}] self.face_analysis = FaceAnalysis( allowed_modules=['detection'], providers=providers, provider_options=provider_options, ) self.face_analysis.prepare(ctx_id=ctx_id, det_thresh=det_thresh, det_size=det_size) def _make_square_bbox(self, x1, y1, x2, y2, image_width, image_height): """ 将矩形bbox转换为方形bbox,保持人脸比例不变 Args: x1, y1, x2, y2: 原始bbox坐标 image_width, image_height: 图像尺寸 Returns: tuple: (new_x1, new_y1, new_x2, new_y2) 方形bbox坐标 """ # 计算原始bbox的中心点和尺寸 center_x = (x1 + x2) / 2 center_y = (y1 + y2) / 2 width = x2 - x1 height = y2 - y1 # 取较大的边作为方形的边长 square_size = max(width, height) # 计算方形bbox的坐标 half_size = square_size / 2 new_x1 = center_x - half_size new_y1 = center_y - half_size new_x2 = center_x + half_size new_y2 = center_y + half_size # 处理边界情况,确保方形bbox在图像范围内 if new_x1 < 0: new_x1 = 0 new_x2 = square_size if new_y1 < 0: new_y1 = 0 new_y2 = square_size if new_x2 > image_width: new_x2 = image_width new_x1 = image_width - square_size if new_y2 > image_height: new_y2 = image_height new_y1 = image_height - square_size # 再次确保坐标在有效范围内 new_x1 = max(0, new_x1) new_y1 = max(0, new_y1) new_x2 = min(image_width, new_x2) new_y2 = min(image_height, new_y2) return new_x1, new_y1, new_x2, new_y2 def infer_from_array(self, image_array, n=None): """ 对输入numpy数组进行人脸检测推理 Args: image_array: numpy数组,形状为[H, W, 3],值范围为0-255 n: 选择前n个最大的人脸,如果为None则选择所有人脸 Returns: dict: 包含检测结果的字典,格式为: { 'faces': 检测到的人脸列表, 'bboxes': bbox列表,每个元素为[x, y, width, height], 'masks': mask列表,每个元素为单通道mask图像, 'masked_images': masked图像列表,每个元素为应用mask后的图像, 'image_shape': 原始图像的形状 (height, width, channels) } 如果未检测到人脸,返回中心区域矩形作为默认bbox """ try: if image_array is None: print("错误:输入图像数组为空") return {} # 确保图像数组是正确的格式 if len(image_array.shape) != 3 or image_array.shape[2] != 3: print(f"错误:图像数组形状不正确,期望[H, W, 3],实际{image_array.shape}") return {} # 确保数据类型和值范围正确 if image_array.dtype != np.uint8: image_array = image_array.astype(np.uint8) faces = self.face_analysis.get(image_array) height, width = image_array.shape[:2] if not faces: return { 'faces': [], 'bboxes': [], 'masks': [], 'masked_images': [], 'image_shape': image_array.shape } # 先按人脸面积大小排序,选择前n个最大的人脸 if n is not None and n > 0: # 计算每个人脸的面积并排序 faces_with_area = [(face, (face['bbox'][2] - face['bbox'][0]) * (face['bbox'][3] - face['bbox'][1])) for face in faces] faces_with_area.sort(key=lambda x: x[1], reverse=True) # 按面积从大到小排序 faces = [face for face, _ in faces_with_area[:n]] # 取前n个最大的人脸 # print(f"选择了前{n}个最大的人脸,总面积分别为: {[area for _, area in faces_with_area[:n]]}") # 再按x坐标从左到右排序 faces = sorted(faces, key=lambda x: x['bbox'][0]) # 生成bbox、mask和masked图像 bboxes = [] masks = [] masked_images = [] for i, face in enumerate(faces): bbox = face['bbox'] x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3] # 将矩形bbox转换为方形bbox square_x1, square_y1, square_x2, square_y2 = self._make_square_bbox( x1, y1, x2, y2, width, height ) # 创建方形mask mask = np.zeros(image_array.shape[:2], dtype=np.uint8) mask[int(square_y1):int(square_y2), int(square_x1):int(square_x2)] = 1.0 # 创建mask与原图相乘的结果 masked_image = image_array.copy() masked_image = cv2.bitwise_and(masked_image, masked_image, mask=mask) bboxes.append([square_x1, square_y1, square_x2 - square_x1, square_y2 - square_y1]) masks.append(mask) masked_images.append(masked_image) # print(f" 人脸 {i+1}: 原始bbox=[{x1:.1f}, {y1:.1f}, {x2-x1:.1f}, {y2-y1:.1f}] -> 方形bbox=[{square_x1:.1f}, {square_y1:.1f}, {square_x2-square_x1:.1f}, {square_y2-square_y1:.1f}]") return { 'faces': faces, 'bboxes': bboxes, 'masks': masks, 'masked_images': masked_images, 'image_shape': image_array.shape } except Exception as e: print(f"处理图像数组时出错: {str(e)}") # 异常情况下也返回中心区域 if 'image_array' in locals() and image_array is not None: return { 'faces': [], 'bboxes': [], 'masks': [], 'masked_images': [], 'image_shape': image_array.shape } return {} def infer(self, image_path, n=None): """ 对输入图像进行人脸检测推理 Args: image_path: 图像文件路径或图片 n: 选择前n个最大的人脸,如果为None则选择所有人脸 Returns: dict: 包含检测结果的字典,格式为: { 'faces': 检测到的人脸列表, 'bboxes': bbox列表,每个元素为[x, y, width, height], 'masks': mask列表,每个元素为单通道mask图像, 'masked_images': masked图像列表,每个元素为应用mask后的图像, 'image_shape': 原始图像的形状 (height, width, channels) } 如果未检测到人脸,返回中心区域矩形作为默认bbox """ try: image = cv2.imread(image_path) if image is None: print(f"错误:无法读取图像 {image_path}") # 无法读取图像,返回空结果 return {} faces = self.face_analysis.get(image) height, width = image.shape[:2] if not faces: print(f"警告:图像 {os.path.basename(image_path)} 中未检测到人脸,使用中心区域作为默认方形bbox") # 计算中心区域方形(边长为原图较小边的50%) min_dim = min(width, height) square_size = min_dim // 2 center_x, center_y = width // 2, height // 2 x1 = center_x - square_size // 2 y1 = center_y - square_size // 2 x2 = x1 + square_size y2 = y1 + square_size # 确保bbox在图像范围内 x1 = max(0, x1) y1 = max(0, y1) x2 = min(width, x2) y2 = min(height, y2) # 创建中心区域的方形mask mask = np.zeros(image.shape[:2], dtype=np.uint8) mask[int(y1):int(y2), int(x1):int(x2)] = 1.0 # 创建masked图像 masked_image = image.copy() masked_image = cv2.bitwise_and(masked_image, masked_image, mask=mask) return { 'faces': [], 'bboxes': [[x1, y1, x2 - x1, y2 - y1]], 'masks': [mask], 'masked_images': [masked_image], 'image_shape': image.shape } # 先按人脸面积大小排序,选择前n个最大的人脸 if n is not None and n > 0: # 计算每个人脸的面积并排序 faces_with_area = [(face, (face['bbox'][2] - face['bbox'][0]) * (face['bbox'][3] - face['bbox'][1])) for face in faces] faces_with_area.sort(key=lambda x: x[1], reverse=True) # 按面积从大到小排序 faces = [face for face, _ in faces_with_area[:n]] # 取前n个最大的人脸 # 再按x坐标从左到右排序 faces = sorted(faces, key=lambda x: x['bbox'][0]) # 生成bbox、mask和masked图像 bboxes = [] masks = [] masked_images = [] for i, face in enumerate(faces): bbox = face['bbox'] x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3] # 将矩形bbox转换为方形bbox square_x1, square_y1, square_x2, square_y2 = self._make_square_bbox( x1, y1, x2, y2, width, height ) # 创建方形mask mask = np.zeros(image.shape[:2], dtype=np.uint8) mask[int(square_y1):int(square_y2), int(square_x1):int(square_x2)] = 1.0 # 创建mask与原图相乘的结果 masked_image = image.copy() masked_image = cv2.bitwise_and(masked_image, masked_image, mask=mask) bboxes.append([square_x1, square_y1, square_x2 - square_x1, square_y2 - square_y1]) masks.append(mask) masked_images.append(masked_image) return { 'faces': faces, 'bboxes': bboxes, 'masks': masks, 'masked_images': masked_images, 'image_shape': image.shape } except Exception as e: print(f"处理图像 {image_path} 时出错: {str(e)}") # 异常情况下也返回中心区域方形 if 'image' in locals() and image is not None: height, width = image.shape[:2] # 计算中心区域方形(边长为原图较小边的50%) min_dim = min(width, height) square_size = min_dim // 2 center_x, center_y = width // 2, height // 2 x1 = center_x - square_size // 2 y1 = center_y - square_size // 2 x2 = x1 + square_size y2 = y1 + square_size # 确保bbox在图像范围内 x1 = max(0, x1) y1 = max(0, y1) x2 = min(width, x2) y2 = min(height, y2) # 创建中心区域的方形mask mask = np.zeros(image.shape[:2], dtype=np.uint8) mask[int(y1):int(y2), int(x1):int(x2)] = 1.0 # 创建masked图像 masked_image = image.copy() masked_image = cv2.bitwise_and(masked_image, masked_image, mask=mask) return { 'faces': [], 'bboxes': [[x1, y1, x2 - x1, y2 - y1]], 'masks': [mask], 'masked_images': [masked_image], 'image_shape': image.shape } return {} class FaceProcessor: def __init__(self, det_thresh=0.5, det_size=(640, 640)): self.face_analysis = FaceAnalysis(allowed_modules=['detection']) self.face_analysis.prepare(ctx_id=0, det_thresh=det_thresh, det_size=det_size) def _make_square_bbox(self, x1, y1, x2, y2, image_width, image_height): """ 将矩形bbox转换为方形bbox,保持人脸比例不变 Args: x1, y1, x2, y2: 原始bbox坐标 image_width, image_height: 图像尺寸 Returns: tuple: (new_x1, new_y1, new_x2, new_y2) 方形bbox坐标 """ # 计算原始bbox的中心点和尺寸 center_x = (x1 + x2) / 2 center_y = (y1 + y2) / 2 width = x2 - x1 height = y2 - y1 # 取较大的边作为方形的边长 square_size = max(width, height) # 计算方形bbox的坐标 half_size = square_size / 2 new_x1 = center_x - half_size new_y1 = center_y - half_size new_x2 = center_x + half_size new_y2 = center_y + half_size # 处理边界情况,确保方形bbox在图像范围内 if new_x1 < 0: new_x1 = 0 new_x2 = square_size if new_y1 < 0: new_y1 = 0 new_y2 = square_size if new_x2 > image_width: new_x2 = image_width new_x1 = image_width - square_size if new_y2 > image_height: new_y2 = image_height new_y1 = image_height - square_size # 再次确保坐标在有效范围内 new_x1 = max(0, new_x1) new_y1 = max(0, new_y1) new_x2 = min(image_width, new_x2) new_y2 = min(image_height, new_y2) return new_x1, new_y1, new_x2, new_y2 def get_face_bbox_and_mask(self, image): faces = self.face_analysis.get(image) if not faces: print("警告:图像中未检测到人脸。") return None, None, None # 按x坐标从左到右排序 faces = sorted(faces, key=lambda x: x['bbox'][0]) height, width = image.shape[:2] bboxes = [] masks = [] masked_images = [] for i, face in enumerate(faces): bbox = face['bbox'] x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3] # 将矩形bbox转换为方形bbox square_x1, square_y1, square_x2, square_y2 = self._make_square_bbox( x1, y1, x2, y2, width, height ) # 创建方形mask mask = np.zeros(image.shape[:2], dtype=np.uint8) mask[int(square_y1):int(square_y2), int(square_x1):int(square_x2)] = 1.0 # 创建mask与原图相乘的结果 masked_image = image.copy() masked_image = cv2.bitwise_and(masked_image, masked_image, mask=mask) bboxes.append([square_x1, square_y1, square_x2 - square_x1, square_y2 - square_y1]) masks.append(mask) masked_images.append(masked_image) return bboxes, masks, masked_images def main(): parser = argparse.ArgumentParser(description='Process images to detect faces and save bbox, mask, and masked images.') parser.add_argument('--input_dir', type=str, default="./data/bbox_test_input", help='Directory containing input images.') parser.add_argument('--bbox_output_dir', type=str, default="./temp/bbox", help='Directory to save bbox npy files.') parser.add_argument('--mask_output_dir', type=str, default="./temp/mask", help='Directory to save mask images.') parser.add_argument('--masked_image_output_dir', type=str, default="./temp/masked_images", help='Directory to save masked images.') args = parser.parse_args() # 创建输出目录 os.makedirs(args.bbox_output_dir, exist_ok=True) os.makedirs(args.mask_output_dir, exist_ok=True) os.makedirs(args.masked_image_output_dir, exist_ok=True) # 初始化人脸检测器 face_processor = FaceProcessor() # 支持的图像格式 supported_formats = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif'} # 获取所有图像文件 image_files = [] for file in os.listdir(args.input_dir): if Path(file).suffix.lower() in supported_formats: image_files.append(file) if not image_files: print(f"警告:在目录 {args.input_dir} 中未找到支持的图像文件") return # 处理每个图像 for image_file in image_files: image_path = os.path.join(args.input_dir, image_file) # 读取图像 image = cv2.imread(image_path) if image is None: print(f" 错误:无法读取图像 {image_path}") continue # 获取人脸检测结果 bboxes, masks, masked_images = face_processor.get_face_bbox_and_mask(image) if bboxes is None: print(f" 跳过:未检测到人脸") continue # 生成基础文件名(不含扩展名) base_name = Path(image_file).stem # 保存bbox为npy文件 bbox_file = os.path.join(args.bbox_output_dir, f"{base_name}_bbox.npy") np.save(bbox_file, np.array(bboxes)) # 保存mask和masked图像 for i, (mask, masked_image) in enumerate(zip(masks, masked_images)): # 保存mask mask_file = os.path.join(args.mask_output_dir, f"{base_name}_face{i+1}_mask.png") cv2.imwrite(mask_file, mask) # 保存masked图像 masked_image_file = os.path.join(args.masked_image_output_dir, f"{base_name}_face{i+1}_masked.png") cv2.imwrite(masked_image_file, masked_image) print(f" 已保存人脸{i+1}的mask: {mask_file}") print(f" 已保存人脸{i+1}的masked图像: {masked_image_file}") print(f"\n处理完成!") print(f"bbox文件保存在: {args.bbox_output_dir}") print(f"mask文件保存在: {args.mask_output_dir}") print(f"masked图像保存在: {args.masked_image_output_dir}") if __name__ == "__main__": main()