import cv2 import numpy as np import torch import logging import os from PIL import Image from typing import Tuple, List, Optional try: from comfy_api.v0_0_3_io import ( ComfyNode, Schema, InputBehavior, NumberDisplay, IntegerInput, MaskInput, ImageInput, ImageOutput, ComboInput, CustomInput, IntegerOutput, NodeOutput, ) COMFY_V3_AVAILABLE = True except ImportError: # Mock classes for v1/v2 compatibility ComfyNode = object Schema = None InputBehavior = None NumberDisplay = None ImageInput = None ImageOutput = None ComboInput = None CustomInput = None IntegerInput = None NodeOutput = None COMFY_V3_AVAILABLE = False # Configure logging level from environment variable log_level = os.getenv('COMFYUI_FACE_DETECTION_LOG_LEVEL', 'INFO').upper() logging.basicConfig(level=getattr(logging, log_level, logging.INFO)) logger = logging.getLogger(__name__) if COMFY_V3_AVAILABLE: class FaceDetectionNode(ComfyNode): @classmethod def DEFINE_SCHEMA(cls): return Schema( node_id="FaceDetectionNode", display_name="Face Detection and Crop", description="Detect and crop faces from images using Haar cascades.", category="image/processing", inputs=[ ImageInput("image", display_name="Input Image"), CustomInput("detection_threshold", io_type="FLOAT", min=0.1, max=1.0, default=0.8, tooltip="Confidence threshold for face detection", display_mode=NumberDisplay.slider), IntegerInput("min_face_size", display_name="Min Face Size", min=32, max=512, default=64, tooltip="Minimum size for detected faces", display_mode=NumberDisplay.slider), IntegerInput("padding", display_name="Padding", min=0, max=256, default=32, tooltip="Padding around detected faces", display_mode=NumberDisplay.slider), ComboInput("output_mode", options=["largest_face", "all_faces"], tooltip="Output mode for detected faces"), ComboInput("face_output_format", options=["strip", "individual"], tooltip="Format for multiple faces: strip (horizontal layout) or individual (separate batch items). Only applies when output_mode='all_faces'. Max size: 512px.", behavior=InputBehavior.optional), ComboInput("classifier_type", options=["default", "alternative"], behavior=InputBehavior.optional), ], outputs=[ ImageOutput("cropped_faces", display_name="Cropped Faces", tooltip="Detected and cropped faces"), ], is_output_node=False, ) @staticmethod def _get_cascade_classifiers(): """Get cascade classifiers - static method for stateless execution""" default_cascade = None alternative_cascade = None try: # Default Haar cascade - most commonly used and well-tested default_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml' if os.path.exists(default_path): default_cascade = cv2.CascadeClassifier(default_path) if default_cascade.empty(): logger.error(f"Failed to load cascade from {default_path}") default_cascade = None else: logger.error(f"Default cascade file not found: {default_path}") # Alternative Haar cascade - different training, may detect faces missed by default alt_path = cv2.data.haarcascades + 'haarcascade_frontalface_alt.xml' if os.path.exists(alt_path): alternative_cascade = cv2.CascadeClassifier(alt_path) if alternative_cascade.empty(): logger.warning(f"Failed to load alternative cascade from {alt_path}") alternative_cascade = None else: logger.warning(f"Alternative cascade file not found: {alt_path}") except Exception as e: logger.error(f"Error initializing cascade classifiers: {str(e)}") default_cascade = None alternative_cascade = None return default_cascade, alternative_cascade @staticmethod def add_padding(image: np.ndarray, face_rect: Tuple[int, int, int, int], padding: int) -> Tuple[np.ndarray, Tuple[int, int, int, int]]: """Add padding around detected face and handle boundaries""" x, y, w, h = face_rect height, width = image.shape[:2] # Calculate padded coordinates x1 = max(0, x - padding) y1 = max(0, y - padding) x2 = min(width, x + w + padding) y2 = min(height, y + h + padding) return image[y1:y2, x1:x2], (x1, y1, x2-x1, y2-y1) @staticmethod def _process_individual_faces(cropped_faces: List[np.ndarray]) -> torch.Tensor: """ Process multiple faces into individual batch items with consistent dimensions. Args: cropped_faces: List of face images as numpy arrays Returns: Tensor with shape [N, H, W, C] where N is the number of faces Note: - Faces are resized to consistent dimensions (max 512px) for proper batching - All faces maintain their aspect ratios during resizing to target dimensions """ # Resize all faces to consistent dimensions for proper batching # Use 512px as maximum to balance quality with memory usage max_height = min(512, max(face.shape[0] for face in cropped_faces)) max_width = min(512, max(face.shape[1] for face in cropped_faces)) # Use the maximum dimensions to ensure consistent sizing target_size = (max_width, max_height) resized_faces = [] for face in cropped_faces: resized = cv2.resize(face, target_size) resized_faces.append(resized) # Stack faces as batch dimension [N, H, W, C] result_batch = np.stack(resized_faces, axis=0) # Ensure correct channel count for each face if result_batch.shape[3] == 1: result_batch = np.repeat(result_batch, 3, axis=3) elif result_batch.shape[3] == 4: result_batch = result_batch[:, :, :, :3] # Convert to tensor with proper dimensions [B, H, W, C] result = torch.from_numpy(result_batch).float() / 255.0 # Validate output tensor assert result.shape[3] == 3, f"Output must have 3 channels, got {result.shape[3]}" return result @classmethod async def execute(cls, image: torch.Tensor, detection_threshold: float, min_face_size: int, padding: int, output_mode: str, face_output_format: str = "strip", classifier_type: str = "default", mask: torch.Tensor = None) -> NodeOutput: # Get cascade classifiers default_cascade, alternative_cascade = cls._get_cascade_classifiers() # Convert input to numpy array for OpenCV processing if isinstance(image, torch.Tensor): logger.debug(f"Processing tensor - Shape: {image.shape}, Type: {image.dtype}") # Ensure 4D tensor [B, H, W, C] and normalize to RGB if len(image.shape) == 3: image = image.unsqueeze(0) elif len(image.shape) != 4: raise ValueError(f"Expected 3D or 4D tensor, got shape: {image.shape}") B, H, W, C = image.shape # Handle different channel configurations if C == 1: image = image.repeat(1, 1, 1, 3) # Grayscale to RGB elif C == 4: image = image[:, :, :, :3] # RGBA to RGB elif C > 4: logger.warning(f"Input has {C} channels, using first 3") image = image[:, :, :, :3] elif C != 3: raise ValueError(f"Cannot handle {C} channels") # Single conversion: tensor -> numpy (uint8) image_np = image[0].cpu().numpy() if image_np.max() <= 1.0: image_np = (image_np * 255).astype(np.uint8) else: image_np = np.clip(image_np, 0, 255).astype(np.uint8) else: # Already numpy array image_np = image # Validate and ensure RGB format if not isinstance(image_np, np.ndarray) or len(image_np.shape) != 3: raise ValueError(f"Expected 3D numpy array, got {type(image_np)} with shape {getattr(image_np, 'shape', 'unknown')}") if image_np.shape[2] != 3: raise ValueError(f"Expected RGB image (3 channels), got {image_np.shape[2]} channels") # Convert to grayscale for face detection gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY) # Select appropriate cascade based on classifier_type if classifier_type == "alternative": if alternative_cascade is None: logger.warning("Alternative Haar cascade not available, falling back to default") if default_cascade is None: logger.error("No cascade classifiers available") return NodeOutput(cropped_faces=torch.zeros((1, 512, 512, 3))) face_cascade = default_cascade else: face_cascade = alternative_cascade else: # default if default_cascade is None: logger.error("Default Haar cascade not available") return NodeOutput(cropped_faces=torch.zeros((1, 512, 512, 3))) face_cascade = default_cascade try: faces = face_cascade.detectMultiScale( gray, scaleFactor=1.1, minNeighbors=5, minSize=(min_face_size, min_face_size) ) except Exception as e: logger.error(f"Face detection failed: {str(e)}") return NodeOutput(cropped_faces=torch.zeros((1, 512, 512, 3))) if len(faces) == 0: logger.warning("No faces detected in image") # Return empty image with correct dimensions [B, H, W, C] return NodeOutput(cropped_faces=torch.zeros((1, 512, 512, 3))) cropped_faces = [] for x, y, w, h in faces: face_img, _ = cls.add_padding(image_np, (x, y, w, h), padding) cropped_faces.append(face_img) if output_mode == "largest_face": largest_face = max(cropped_faces, key=lambda x: x.shape[0] * x.shape[1]) cropped_faces = [largest_face] # Enhanced result handling with support for individual face outputs # Note: face_output_format only applies when output_mode="all_faces" with multiple faces if output_mode == "all_faces" and len(cropped_faces) > 1 and face_output_format == "individual": result = cls._process_individual_faces(cropped_faces) return NodeOutput(cropped_faces=result) elif len(cropped_faces) > 1: # Original strip format - resize all faces to same height while maintaining aspect ratio max_height = min(512, max(face.shape[0] for face in cropped_faces)) resized_faces = [] for face in cropped_faces: aspect_ratio = face.shape[1] / face.shape[0] new_width = int(max_height * aspect_ratio) resized = cv2.resize(face, (new_width, max_height)) resized_faces.append(resized) result = np.hstack(resized_faces) else: result = cropped_faces[0] # Ensure result has correct channel count if result.shape[2] == 1: result = cv2.cvtColor(result, cv2.COLOR_GRAY2RGB) elif result.shape[2] == 4: result = cv2.cvtColor(result, cv2.COLOR_RGBA2RGB) # Convert back to tensor with proper dimensions [B, H, W, C] result = torch.from_numpy(result).float() / 255.0 result = result.unsqueeze(0) # Add batch dimension # Validate output tensor (format: [B, H, W, C]) assert result.shape[3] == 3, f"Output must have 3 channels, got {result.shape[3]}" return NodeOutput(cropped_faces=result) @classmethod def IS_CHANGED(cls, **kwargs): return False # Backward compatibility wrapper for v1/v2 class FaceDetectionNodeV1: """Backward compatibility wrapper for ComfyUI v1/v2""" @classmethod def INPUT_TYPES(s): return { "required": { "image": ("IMAGE",), "detection_threshold": ("FLOAT", { "default": 0.8, "min": 0.1, "max": 1.0, "step": 0.1 }), "min_face_size": ("INT", { "default": 64, "min": 32, "max": 512, "step": 8 }), "padding": ("INT", { "default": 32, "min": 0, "max": 256, "step": 8 }), "output_mode": (["largest_face", "all_faces"],), }, "optional": { "face_output_format": (["strip", "individual"], {"default": "strip"}), "classifier_type": (["default", "alternative"], {"default": "default"}), } } RETURN_TYPES = ("IMAGE",) RETURN_NAMES = ("Cropped Faces",) FUNCTION = "detect_and_crop_faces" CATEGORY = "image/processing" def __init__(self): self.default_cascade = None self.alternative_cascade = None try: # Default Haar cascade - most commonly used and well-tested default_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml' if os.path.exists(default_path): self.default_cascade = cv2.CascadeClassifier(default_path) if self.default_cascade.empty(): logger.error(f"Failed to load cascade from {default_path}") self.default_cascade = None else: logger.error(f"Default cascade file not found: {default_path}") # Alternative Haar cascade - different training, may detect faces missed by default alt_path = cv2.data.haarcascades + 'haarcascade_frontalface_alt.xml' if os.path.exists(alt_path): self.alternative_cascade = cv2.CascadeClassifier(alt_path) if self.alternative_cascade.empty(): logger.warning(f"Failed to load alternative cascade from {alt_path}") self.alternative_cascade = None else: logger.warning(f"Alternative cascade file not found: {alt_path}") except Exception as e: logger.error(f"Error initializing cascade classifiers: {str(e)}") self.default_cascade = None self.alternative_cascade = None def add_padding(self, image: np.ndarray, face_rect: Tuple[int, int, int, int], padding: int) -> Tuple[np.ndarray, Tuple[int, int, int, int]]: """Add padding around detected face and handle boundaries""" x, y, w, h = face_rect height, width = image.shape[:2] # Calculate padded coordinates x1 = max(0, x - padding) y1 = max(0, y - padding) x2 = min(width, x + w + padding) y2 = min(height, y + h + padding) return image[y1:y2, x1:x2], (x1, y1, x2-x1, y2-y1) def _process_individual_faces(self, cropped_faces: List[np.ndarray]) -> torch.Tensor: """ Process multiple faces into individual batch items with consistent dimensions. Shared logic between v1/v2 and v3 implementations. """ if COMFY_V3_AVAILABLE: # Use the static method from the v3 class return FaceDetectionNode._process_individual_faces(cropped_faces) else: # Fallback implementation for v1/v2 only environments max_height = min(512, max(face.shape[0] for face in cropped_faces)) max_width = min(512, max(face.shape[1] for face in cropped_faces)) target_size = (max_width, max_height) resized_faces = [] for face in cropped_faces: resized = cv2.resize(face, target_size) resized_faces.append(resized) result_batch = np.stack(resized_faces, axis=0) if result_batch.shape[3] == 1: result_batch = np.repeat(result_batch, 3, axis=3) elif result_batch.shape[3] == 4: result_batch = result_batch[:, :, :, :3] result = torch.from_numpy(result_batch).float() / 255.0 assert result.shape[3] == 3, f"Output must have 3 channels, got {result.shape[3]}" return result def detect_and_crop_faces(self, image, detection_threshold, min_face_size, padding, output_mode, face_output_format="strip", classifier_type="default"): """Legacy method for v1/v2 compatibility""" # Convert input to numpy array for OpenCV processing if isinstance(image, torch.Tensor): logger.debug(f"Processing tensor - Shape: {image.shape}, Type: {image.dtype}") # Ensure 4D tensor [B, H, W, C] and normalize to RGB if len(image.shape) == 3: image = image.unsqueeze(0) elif len(image.shape) != 4: raise ValueError(f"Expected 3D or 4D tensor, got shape: {image.shape}") B, H, W, C = image.shape # Handle different channel configurations if C == 1: image = image.repeat(1, 1, 1, 3) # Grayscale to RGB elif C == 4: image = image[:, :, :, :3] # RGBA to RGB elif C > 4: logger.warning(f"Input has {C} channels, using first 3") image = image[:, :, :, :3] elif C != 3: raise ValueError(f"Cannot handle {C} channels") # Single conversion: tensor -> numpy (uint8) image_np = image[0].cpu().numpy() if image_np.max() <= 1.0: image_np = (image_np * 255).astype(np.uint8) else: image_np = np.clip(image_np, 0, 255).astype(np.uint8) else: # Already numpy array image_np = image # Validate and ensure RGB format if not isinstance(image_np, np.ndarray) or len(image_np.shape) != 3: raise ValueError(f"Expected 3D numpy array, got {type(image_np)} with shape {getattr(image_np, 'shape', 'unknown')}") if image_np.shape[2] != 3: raise ValueError(f"Expected RGB image (3 channels), got {image_np.shape[2]} channels") # Convert to grayscale for face detection gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY) # Select appropriate cascade based on classifier_type if classifier_type == "alternative": if self.alternative_cascade is None: logger.warning("Alternative Haar cascade not available, falling back to default") if self.default_cascade is None: logger.error("No cascade classifiers available") return (torch.zeros((1, 512, 512, 3)),) face_cascade = self.default_cascade else: face_cascade = self.alternative_cascade else: # default if self.default_cascade is None: logger.error("Default Haar cascade not available") return (torch.zeros((1, 512, 512, 3)),) face_cascade = self.default_cascade try: faces = face_cascade.detectMultiScale( gray, scaleFactor=1.1, minNeighbors=5, minSize=(min_face_size, min_face_size) ) except Exception as e: logger.error(f"Face detection failed: {str(e)}") return (torch.zeros((1, 512, 512, 3)),) if len(faces) == 0: logger.warning("No faces detected in image") # Return empty image with correct dimensions [B, H, W, C] return (torch.zeros((1, 512, 512, 3)),) cropped_faces = [] for x, y, w, h in faces: face_img, _ = self.add_padding(image_np, (x, y, w, h), padding) cropped_faces.append(face_img) if output_mode == "largest_face": largest_face = max(cropped_faces, key=lambda x: x.shape[0] * x.shape[1]) cropped_faces = [largest_face] # Enhanced result handling with support for individual face outputs # Note: face_output_format only applies when output_mode="all_faces" with multiple faces if output_mode == "all_faces" and len(cropped_faces) > 1 and face_output_format == "individual": result = self._process_individual_faces(cropped_faces) return (result,) elif len(cropped_faces) > 1: # Original strip format - resize all faces to same height while maintaining aspect ratio max_height = min(512, max(face.shape[0] for face in cropped_faces)) resized_faces = [] for face in cropped_faces: aspect_ratio = face.shape[1] / face.shape[0] new_width = int(max_height * aspect_ratio) resized = cv2.resize(face, (new_width, max_height)) resized_faces.append(resized) result = np.hstack(resized_faces) else: result = cropped_faces[0] # Ensure result has correct channel count if result.shape[2] == 1: result = cv2.cvtColor(result, cv2.COLOR_GRAY2RGB) elif result.shape[2] == 4: result = cv2.cvtColor(result, cv2.COLOR_RGBA2RGB) # Convert back to tensor with proper dimensions [B, H, W, C] result = torch.from_numpy(result).float() / 255.0 result = result.unsqueeze(0) # Add batch dimension # Validate output tensor (format: [B, H, W, C]) assert result.shape[3] == 3, f"Output must have 3 channels, got {result.shape[3]}" return (result,) @classmethod def IS_CHANGED(s, **kwargs): return False # Export appropriate node class based on ComfyUI version if COMFY_V3_AVAILABLE: # v3 available, use new node NODE_CLASS_MAPPINGS = { "FaceDetectionNode": FaceDetectionNode } else: # Fall back to v1/v2 compatibility NODE_CLASS_MAPPINGS = { "FaceDetectionNode": FaceDetectionNodeV1 } NODE_DISPLAY_NAME_MAPPINGS = { "FaceDetectionNode": "Face Detection and Crop" }