jaceraimi's picture
Upload 3411 files
b4760b6 verified
import cv2
import numpy as np
import torch
import logging
import os
from PIL import Image
from typing import Tuple, List, Optional
try:
from comfy_api.v0_0_3_io import (
ComfyNode, Schema, InputBehavior, NumberDisplay,
IntegerInput, MaskInput, ImageInput, ImageOutput, ComboInput, CustomInput,
IntegerOutput, NodeOutput,
)
COMFY_V3_AVAILABLE = True
except ImportError:
# Mock classes for v1/v2 compatibility
ComfyNode = object
Schema = None
InputBehavior = None
NumberDisplay = None
ImageInput = None
ImageOutput = None
ComboInput = None
CustomInput = None
IntegerInput = None
NodeOutput = None
COMFY_V3_AVAILABLE = False
# Configure logging level from environment variable
log_level = os.getenv('COMFYUI_FACE_DETECTION_LOG_LEVEL', 'INFO').upper()
logging.basicConfig(level=getattr(logging, log_level, logging.INFO))
logger = logging.getLogger(__name__)
if COMFY_V3_AVAILABLE:
class FaceDetectionNode(ComfyNode):
@classmethod
def DEFINE_SCHEMA(cls):
return Schema(
node_id="FaceDetectionNode",
display_name="Face Detection and Crop",
description="Detect and crop faces from images using Haar cascades.",
category="image/processing",
inputs=[
ImageInput("image", display_name="Input Image"),
CustomInput("detection_threshold", io_type="FLOAT",
min=0.1, max=1.0, default=0.8,
tooltip="Confidence threshold for face detection",
display_mode=NumberDisplay.slider),
IntegerInput("min_face_size", display_name="Min Face Size",
min=32, max=512, default=64,
tooltip="Minimum size for detected faces",
display_mode=NumberDisplay.slider),
IntegerInput("padding", display_name="Padding",
min=0, max=256, default=32,
tooltip="Padding around detected faces",
display_mode=NumberDisplay.slider),
ComboInput("output_mode", options=["largest_face", "all_faces"],
tooltip="Output mode for detected faces"),
ComboInput("face_output_format", options=["strip", "individual"],
tooltip="Format for multiple faces: strip (horizontal layout) or individual (separate batch items). Only applies when output_mode='all_faces'. Max size: 512px.",
behavior=InputBehavior.optional),
ComboInput("classifier_type", options=["default", "alternative"],
behavior=InputBehavior.optional),
],
outputs=[
ImageOutput("cropped_faces", display_name="Cropped Faces",
tooltip="Detected and cropped faces"),
],
is_output_node=False,
)
@staticmethod
def _get_cascade_classifiers():
"""Get cascade classifiers - static method for stateless execution"""
default_cascade = None
alternative_cascade = None
try:
# Default Haar cascade - most commonly used and well-tested
default_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
if os.path.exists(default_path):
default_cascade = cv2.CascadeClassifier(default_path)
if default_cascade.empty():
logger.error(f"Failed to load cascade from {default_path}")
default_cascade = None
else:
logger.error(f"Default cascade file not found: {default_path}")
# Alternative Haar cascade - different training, may detect faces missed by default
alt_path = cv2.data.haarcascades + 'haarcascade_frontalface_alt.xml'
if os.path.exists(alt_path):
alternative_cascade = cv2.CascadeClassifier(alt_path)
if alternative_cascade.empty():
logger.warning(f"Failed to load alternative cascade from {alt_path}")
alternative_cascade = None
else:
logger.warning(f"Alternative cascade file not found: {alt_path}")
except Exception as e:
logger.error(f"Error initializing cascade classifiers: {str(e)}")
default_cascade = None
alternative_cascade = None
return default_cascade, alternative_cascade
@staticmethod
def add_padding(image: np.ndarray, face_rect: Tuple[int, int, int, int], padding: int) -> Tuple[np.ndarray, Tuple[int, int, int, int]]:
"""Add padding around detected face and handle boundaries"""
x, y, w, h = face_rect
height, width = image.shape[:2]
# Calculate padded coordinates
x1 = max(0, x - padding)
y1 = max(0, y - padding)
x2 = min(width, x + w + padding)
y2 = min(height, y + h + padding)
return image[y1:y2, x1:x2], (x1, y1, x2-x1, y2-y1)
@staticmethod
def _process_individual_faces(cropped_faces: List[np.ndarray]) -> torch.Tensor:
"""
Process multiple faces into individual batch items with consistent dimensions.
Args:
cropped_faces: List of face images as numpy arrays
Returns:
Tensor with shape [N, H, W, C] where N is the number of faces
Note:
- Faces are resized to consistent dimensions (max 512px) for proper batching
- All faces maintain their aspect ratios during resizing to target dimensions
"""
# Resize all faces to consistent dimensions for proper batching
# Use 512px as maximum to balance quality with memory usage
max_height = min(512, max(face.shape[0] for face in cropped_faces))
max_width = min(512, max(face.shape[1] for face in cropped_faces))
# Use the maximum dimensions to ensure consistent sizing
target_size = (max_width, max_height)
resized_faces = []
for face in cropped_faces:
resized = cv2.resize(face, target_size)
resized_faces.append(resized)
# Stack faces as batch dimension [N, H, W, C]
result_batch = np.stack(resized_faces, axis=0)
# Ensure correct channel count for each face
if result_batch.shape[3] == 1:
result_batch = np.repeat(result_batch, 3, axis=3)
elif result_batch.shape[3] == 4:
result_batch = result_batch[:, :, :, :3]
# Convert to tensor with proper dimensions [B, H, W, C]
result = torch.from_numpy(result_batch).float() / 255.0
# Validate output tensor
assert result.shape[3] == 3, f"Output must have 3 channels, got {result.shape[3]}"
return result
@classmethod
async def execute(cls, image: torch.Tensor, detection_threshold: float, min_face_size: int,
padding: int, output_mode: str, face_output_format: str = "strip",
classifier_type: str = "default", mask: torch.Tensor = None) -> NodeOutput:
# Get cascade classifiers
default_cascade, alternative_cascade = cls._get_cascade_classifiers()
# Convert input to numpy array for OpenCV processing
if isinstance(image, torch.Tensor):
logger.debug(f"Processing tensor - Shape: {image.shape}, Type: {image.dtype}")
# Ensure 4D tensor [B, H, W, C] and normalize to RGB
if len(image.shape) == 3:
image = image.unsqueeze(0)
elif len(image.shape) != 4:
raise ValueError(f"Expected 3D or 4D tensor, got shape: {image.shape}")
B, H, W, C = image.shape
# Handle different channel configurations
if C == 1:
image = image.repeat(1, 1, 1, 3) # Grayscale to RGB
elif C == 4:
image = image[:, :, :, :3] # RGBA to RGB
elif C > 4:
logger.warning(f"Input has {C} channels, using first 3")
image = image[:, :, :, :3]
elif C != 3:
raise ValueError(f"Cannot handle {C} channels")
# Single conversion: tensor -> numpy (uint8)
image_np = image[0].cpu().numpy()
if image_np.max() <= 1.0:
image_np = (image_np * 255).astype(np.uint8)
else:
image_np = np.clip(image_np, 0, 255).astype(np.uint8)
else:
# Already numpy array
image_np = image
# Validate and ensure RGB format
if not isinstance(image_np, np.ndarray) or len(image_np.shape) != 3:
raise ValueError(f"Expected 3D numpy array, got {type(image_np)} with shape {getattr(image_np, 'shape', 'unknown')}")
if image_np.shape[2] != 3:
raise ValueError(f"Expected RGB image (3 channels), got {image_np.shape[2]} channels")
# Convert to grayscale for face detection
gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
# Select appropriate cascade based on classifier_type
if classifier_type == "alternative":
if alternative_cascade is None:
logger.warning("Alternative Haar cascade not available, falling back to default")
if default_cascade is None:
logger.error("No cascade classifiers available")
return NodeOutput(cropped_faces=torch.zeros((1, 512, 512, 3)))
face_cascade = default_cascade
else:
face_cascade = alternative_cascade
else: # default
if default_cascade is None:
logger.error("Default Haar cascade not available")
return NodeOutput(cropped_faces=torch.zeros((1, 512, 512, 3)))
face_cascade = default_cascade
try:
faces = face_cascade.detectMultiScale(
gray,
scaleFactor=1.1,
minNeighbors=5,
minSize=(min_face_size, min_face_size)
)
except Exception as e:
logger.error(f"Face detection failed: {str(e)}")
return NodeOutput(cropped_faces=torch.zeros((1, 512, 512, 3)))
if len(faces) == 0:
logger.warning("No faces detected in image")
# Return empty image with correct dimensions [B, H, W, C]
return NodeOutput(cropped_faces=torch.zeros((1, 512, 512, 3)))
cropped_faces = []
for x, y, w, h in faces:
face_img, _ = cls.add_padding(image_np, (x, y, w, h), padding)
cropped_faces.append(face_img)
if output_mode == "largest_face":
largest_face = max(cropped_faces, key=lambda x: x.shape[0] * x.shape[1])
cropped_faces = [largest_face]
# Enhanced result handling with support for individual face outputs
# Note: face_output_format only applies when output_mode="all_faces" with multiple faces
if output_mode == "all_faces" and len(cropped_faces) > 1 and face_output_format == "individual":
result = cls._process_individual_faces(cropped_faces)
return NodeOutput(cropped_faces=result)
elif len(cropped_faces) > 1:
# Original strip format - resize all faces to same height while maintaining aspect ratio
max_height = min(512, max(face.shape[0] for face in cropped_faces))
resized_faces = []
for face in cropped_faces:
aspect_ratio = face.shape[1] / face.shape[0]
new_width = int(max_height * aspect_ratio)
resized = cv2.resize(face, (new_width, max_height))
resized_faces.append(resized)
result = np.hstack(resized_faces)
else:
result = cropped_faces[0]
# Ensure result has correct channel count
if result.shape[2] == 1:
result = cv2.cvtColor(result, cv2.COLOR_GRAY2RGB)
elif result.shape[2] == 4:
result = cv2.cvtColor(result, cv2.COLOR_RGBA2RGB)
# Convert back to tensor with proper dimensions [B, H, W, C]
result = torch.from_numpy(result).float() / 255.0
result = result.unsqueeze(0) # Add batch dimension
# Validate output tensor (format: [B, H, W, C])
assert result.shape[3] == 3, f"Output must have 3 channels, got {result.shape[3]}"
return NodeOutput(cropped_faces=result)
@classmethod
def IS_CHANGED(cls, **kwargs):
return False
# Backward compatibility wrapper for v1/v2
class FaceDetectionNodeV1:
"""Backward compatibility wrapper for ComfyUI v1/v2"""
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"image": ("IMAGE",),
"detection_threshold": ("FLOAT", {
"default": 0.8,
"min": 0.1,
"max": 1.0,
"step": 0.1
}),
"min_face_size": ("INT", {
"default": 64,
"min": 32,
"max": 512,
"step": 8
}),
"padding": ("INT", {
"default": 32,
"min": 0,
"max": 256,
"step": 8
}),
"output_mode": (["largest_face", "all_faces"],),
},
"optional": {
"face_output_format": (["strip", "individual"], {"default": "strip"}),
"classifier_type": (["default", "alternative"], {"default": "default"}),
}
}
RETURN_TYPES = ("IMAGE",)
RETURN_NAMES = ("Cropped Faces",)
FUNCTION = "detect_and_crop_faces"
CATEGORY = "image/processing"
def __init__(self):
self.default_cascade = None
self.alternative_cascade = None
try:
# Default Haar cascade - most commonly used and well-tested
default_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
if os.path.exists(default_path):
self.default_cascade = cv2.CascadeClassifier(default_path)
if self.default_cascade.empty():
logger.error(f"Failed to load cascade from {default_path}")
self.default_cascade = None
else:
logger.error(f"Default cascade file not found: {default_path}")
# Alternative Haar cascade - different training, may detect faces missed by default
alt_path = cv2.data.haarcascades + 'haarcascade_frontalface_alt.xml'
if os.path.exists(alt_path):
self.alternative_cascade = cv2.CascadeClassifier(alt_path)
if self.alternative_cascade.empty():
logger.warning(f"Failed to load alternative cascade from {alt_path}")
self.alternative_cascade = None
else:
logger.warning(f"Alternative cascade file not found: {alt_path}")
except Exception as e:
logger.error(f"Error initializing cascade classifiers: {str(e)}")
self.default_cascade = None
self.alternative_cascade = None
def add_padding(self, image: np.ndarray, face_rect: Tuple[int, int, int, int], padding: int) -> Tuple[np.ndarray, Tuple[int, int, int, int]]:
"""Add padding around detected face and handle boundaries"""
x, y, w, h = face_rect
height, width = image.shape[:2]
# Calculate padded coordinates
x1 = max(0, x - padding)
y1 = max(0, y - padding)
x2 = min(width, x + w + padding)
y2 = min(height, y + h + padding)
return image[y1:y2, x1:x2], (x1, y1, x2-x1, y2-y1)
def _process_individual_faces(self, cropped_faces: List[np.ndarray]) -> torch.Tensor:
"""
Process multiple faces into individual batch items with consistent dimensions.
Shared logic between v1/v2 and v3 implementations.
"""
if COMFY_V3_AVAILABLE:
# Use the static method from the v3 class
return FaceDetectionNode._process_individual_faces(cropped_faces)
else:
# Fallback implementation for v1/v2 only environments
max_height = min(512, max(face.shape[0] for face in cropped_faces))
max_width = min(512, max(face.shape[1] for face in cropped_faces))
target_size = (max_width, max_height)
resized_faces = []
for face in cropped_faces:
resized = cv2.resize(face, target_size)
resized_faces.append(resized)
result_batch = np.stack(resized_faces, axis=0)
if result_batch.shape[3] == 1:
result_batch = np.repeat(result_batch, 3, axis=3)
elif result_batch.shape[3] == 4:
result_batch = result_batch[:, :, :, :3]
result = torch.from_numpy(result_batch).float() / 255.0
assert result.shape[3] == 3, f"Output must have 3 channels, got {result.shape[3]}"
return result
def detect_and_crop_faces(self, image, detection_threshold, min_face_size, padding, output_mode, face_output_format="strip", classifier_type="default"):
"""Legacy method for v1/v2 compatibility"""
# Convert input to numpy array for OpenCV processing
if isinstance(image, torch.Tensor):
logger.debug(f"Processing tensor - Shape: {image.shape}, Type: {image.dtype}")
# Ensure 4D tensor [B, H, W, C] and normalize to RGB
if len(image.shape) == 3:
image = image.unsqueeze(0)
elif len(image.shape) != 4:
raise ValueError(f"Expected 3D or 4D tensor, got shape: {image.shape}")
B, H, W, C = image.shape
# Handle different channel configurations
if C == 1:
image = image.repeat(1, 1, 1, 3) # Grayscale to RGB
elif C == 4:
image = image[:, :, :, :3] # RGBA to RGB
elif C > 4:
logger.warning(f"Input has {C} channels, using first 3")
image = image[:, :, :, :3]
elif C != 3:
raise ValueError(f"Cannot handle {C} channels")
# Single conversion: tensor -> numpy (uint8)
image_np = image[0].cpu().numpy()
if image_np.max() <= 1.0:
image_np = (image_np * 255).astype(np.uint8)
else:
image_np = np.clip(image_np, 0, 255).astype(np.uint8)
else:
# Already numpy array
image_np = image
# Validate and ensure RGB format
if not isinstance(image_np, np.ndarray) or len(image_np.shape) != 3:
raise ValueError(f"Expected 3D numpy array, got {type(image_np)} with shape {getattr(image_np, 'shape', 'unknown')}")
if image_np.shape[2] != 3:
raise ValueError(f"Expected RGB image (3 channels), got {image_np.shape[2]} channels")
# Convert to grayscale for face detection
gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
# Select appropriate cascade based on classifier_type
if classifier_type == "alternative":
if self.alternative_cascade is None:
logger.warning("Alternative Haar cascade not available, falling back to default")
if self.default_cascade is None:
logger.error("No cascade classifiers available")
return (torch.zeros((1, 512, 512, 3)),)
face_cascade = self.default_cascade
else:
face_cascade = self.alternative_cascade
else: # default
if self.default_cascade is None:
logger.error("Default Haar cascade not available")
return (torch.zeros((1, 512, 512, 3)),)
face_cascade = self.default_cascade
try:
faces = face_cascade.detectMultiScale(
gray,
scaleFactor=1.1,
minNeighbors=5,
minSize=(min_face_size, min_face_size)
)
except Exception as e:
logger.error(f"Face detection failed: {str(e)}")
return (torch.zeros((1, 512, 512, 3)),)
if len(faces) == 0:
logger.warning("No faces detected in image")
# Return empty image with correct dimensions [B, H, W, C]
return (torch.zeros((1, 512, 512, 3)),)
cropped_faces = []
for x, y, w, h in faces:
face_img, _ = self.add_padding(image_np, (x, y, w, h), padding)
cropped_faces.append(face_img)
if output_mode == "largest_face":
largest_face = max(cropped_faces, key=lambda x: x.shape[0] * x.shape[1])
cropped_faces = [largest_face]
# Enhanced result handling with support for individual face outputs
# Note: face_output_format only applies when output_mode="all_faces" with multiple faces
if output_mode == "all_faces" and len(cropped_faces) > 1 and face_output_format == "individual":
result = self._process_individual_faces(cropped_faces)
return (result,)
elif len(cropped_faces) > 1:
# Original strip format - resize all faces to same height while maintaining aspect ratio
max_height = min(512, max(face.shape[0] for face in cropped_faces))
resized_faces = []
for face in cropped_faces:
aspect_ratio = face.shape[1] / face.shape[0]
new_width = int(max_height * aspect_ratio)
resized = cv2.resize(face, (new_width, max_height))
resized_faces.append(resized)
result = np.hstack(resized_faces)
else:
result = cropped_faces[0]
# Ensure result has correct channel count
if result.shape[2] == 1:
result = cv2.cvtColor(result, cv2.COLOR_GRAY2RGB)
elif result.shape[2] == 4:
result = cv2.cvtColor(result, cv2.COLOR_RGBA2RGB)
# Convert back to tensor with proper dimensions [B, H, W, C]
result = torch.from_numpy(result).float() / 255.0
result = result.unsqueeze(0) # Add batch dimension
# Validate output tensor (format: [B, H, W, C])
assert result.shape[3] == 3, f"Output must have 3 channels, got {result.shape[3]}"
return (result,)
@classmethod
def IS_CHANGED(s, **kwargs):
return False
# Export appropriate node class based on ComfyUI version
if COMFY_V3_AVAILABLE:
# v3 available, use new node
NODE_CLASS_MAPPINGS = {
"FaceDetectionNode": FaceDetectionNode
}
else:
# Fall back to v1/v2 compatibility
NODE_CLASS_MAPPINGS = {
"FaceDetectionNode": FaceDetectionNodeV1
}
NODE_DISPLAY_NAME_MAPPINGS = {
"FaceDetectionNode": "Face Detection and Crop"
}