Spaces:
Runtime error
Runtime error
| """ | |
| Utility functions for Pixagram AI Pixel Art Generator | |
| UPDATED VERSION with type safety helpers | |
| """ | |
| import numpy as np | |
| import cv2 | |
| import math | |
| from PIL import Image, ImageEnhance, ImageFilter, ImageDraw | |
| from config import COLOR_MATCH_CONFIG, FACE_MASK_CONFIG, AGE_BRACKETS | |
| def ensure_int(value): | |
| """Convert numpy.int64 or similar to Python int""" | |
| if isinstance(value, (int, float)): | |
| return int(value) | |
| return int(value.item()) if hasattr(value, 'item') else int(value) | |
| def safe_image_size(image): | |
| """Get image size as pure Python ints to avoid numpy.int64 issues""" | |
| return (ensure_int(image.width), ensure_int(image.height)) | |
| def sanitize_text(text): | |
| """ | |
| Remove or replace problematic characters (emojis, special unicode) | |
| that might cause encoding errors. | |
| """ | |
| if not text: | |
| return text | |
| try: | |
| # Encode/decode to remove invalid bytes | |
| text = text.encode('utf-8', errors='ignore').decode('utf-8') | |
| # Keep only characters within safe unicode range | |
| text = ''.join(char for char in text if ord(char) < 65536) | |
| except Exception as e: | |
| print(f"[WARNING] Text sanitization warning: {e}") | |
| return text | |
| def color_match_lab(target, source, preserve_saturation=True): | |
| """ | |
| LAB color space matching for better skin tones with saturation preservation. | |
| GENTLE version to prevent color fading. | |
| Args: | |
| target: Target image to adjust | |
| source: Source image to match colors from | |
| preserve_saturation: If True, preserves original saturation levels | |
| """ | |
| try: | |
| target_lab = cv2.cvtColor(target.astype(np.uint8), cv2.COLOR_RGB2LAB).astype(np.float32) | |
| source_lab = cv2.cvtColor(source.astype(np.uint8), cv2.COLOR_RGB2LAB).astype(np.float32) | |
| result_lab = np.copy(target_lab) | |
| # Very gentle L channel matching | |
| t_mean, t_std = target_lab[:,:,0].mean(), target_lab[:,:,0].std() | |
| s_mean, s_std = source_lab[:,:,0].mean(), source_lab[:,:,0].std() | |
| if t_std > 1e-6: | |
| matched = (target_lab[:,:,0] - t_mean) * (s_std / t_std) * 0.5 + s_mean | |
| result_lab[:,:,0] = target_lab[:,:,0] * (1 - COLOR_MATCH_CONFIG['lab_lightness_blend']) + matched * COLOR_MATCH_CONFIG['lab_lightness_blend'] | |
| if preserve_saturation: | |
| # Minimal adjustment to A and B channels | |
| for i in [1, 2]: | |
| t_mean, t_std = target_lab[:,:,i].mean(), target_lab[:,:,i].std() | |
| s_mean, s_std = source_lab[:,:,i].mean(), source_lab[:,:,i].std() | |
| if t_std > 1e-6: | |
| matched = (target_lab[:,:,i] - t_mean) * (s_std / t_std) + s_mean | |
| blend_factor = COLOR_MATCH_CONFIG['lab_color_blend_preserved'] | |
| result_lab[:,:,i] = target_lab[:,:,i] * (1 - blend_factor) + matched * blend_factor | |
| else: | |
| # Gentle full matching | |
| for i in [1, 2]: | |
| t_mean, t_std = target_lab[:,:,i].mean(), target_lab[:,:,i].std() | |
| s_mean, s_std = source_lab[:,:,i].mean(), source_lab[:,:,i].std() | |
| if t_std > 1e-6: | |
| matched = (target_lab[:,:,i] - t_mean) * (s_std / t_std) + s_mean | |
| blend_factor = COLOR_MATCH_CONFIG['lab_color_blend_full'] | |
| result_lab[:,:,i] = target_lab[:,:,i] * (1 - blend_factor) + matched * blend_factor | |
| return cv2.cvtColor(result_lab.astype(np.uint8), cv2.COLOR_LAB2RGB) | |
| except Exception as e: | |
| print(f"LAB conversion error: {e}") | |
| return target.astype(np.uint8) | |
| def enhance_saturation(image, boost=1.05): | |
| """ | |
| Minimal saturation enhancement (disabled by default). | |
| Args: | |
| image: PIL Image | |
| boost: Saturation multiplier (1.0 = no change, >1.0 = more saturated) | |
| """ | |
| if boost <= 1.0: | |
| return image | |
| enhancer = ImageEnhance.Color(image) | |
| return enhancer.enhance(boost) | |
| def enhanced_color_match(target_img, source_img, face_bbox=None, preserve_vibrance=False): | |
| """ | |
| Enhanced color matching with face-aware processing. | |
| Very gentle to prevent color fading. | |
| Args: | |
| target_img: Generated image to adjust | |
| source_img: Original image to match colors from | |
| face_bbox: Optional [x1, y1, x2, y2] for face region | |
| preserve_vibrance: If True, adds minimal saturation boost (disabled by default) | |
| """ | |
| try: | |
| target = np.array(target_img).astype(np.float32) | |
| source = np.array(source_img).astype(np.float32) | |
| if face_bbox is not None: | |
| # Create face mask | |
| x1, y1, x2, y2 = [int(c) for c in face_bbox] | |
| x1, y1 = max(0, x1), max(0, y1) | |
| x2, y2 = min(target.shape[1], x2), min(target.shape[0], y2) | |
| face_mask = np.zeros((target.shape[0], target.shape[1]), dtype=np.float32) | |
| face_mask[y1:y2, x1:x2] = 1.0 | |
| # Blur mask for smooth transition | |
| face_mask = cv2.GaussianBlur( | |
| face_mask, | |
| COLOR_MATCH_CONFIG['gaussian_blur_kernel'], | |
| COLOR_MATCH_CONFIG['gaussian_blur_sigma'] | |
| ) | |
| face_mask = face_mask[:, :, np.newaxis] | |
| # Match colors for face region with saturation preservation | |
| if y2 > y1 and x2 > x1: | |
| face_result = color_match_lab( | |
| target[y1:y2, x1:x2], | |
| source[y1:y2, x1:x2], | |
| preserve_saturation=True | |
| ) | |
| target[y1:y2, x1:x2] = face_result | |
| # Blend with original using mask | |
| result = target * face_mask + target * (1 - face_mask) | |
| else: | |
| result = color_match_lab(target, source, preserve_saturation=True) | |
| else: | |
| # Standard LAB color matching with saturation preservation | |
| result = color_match_lab(target, source, preserve_saturation=True) | |
| result_img = Image.fromarray(result.astype(np.uint8)) | |
| # NO saturation boost by default | |
| if preserve_vibrance: | |
| result_img = enhance_saturation(result_img, boost=COLOR_MATCH_CONFIG['saturation_boost']) | |
| return result_img | |
| except Exception as e: | |
| print(f"Enhanced color matching failed: {e}, returning target image") | |
| return target_img | |
| def color_match(target_img, source_img, mode='mkl'): | |
| """ | |
| Legacy color matching function - kept for compatibility. | |
| Use enhanced_color_match for better results. | |
| """ | |
| try: | |
| target = np.array(target_img).astype(np.float32) | |
| source = np.array(source_img).astype(np.float32) | |
| if mode == 'simple': | |
| result = np.zeros_like(target) | |
| for i in range(3): | |
| t_mean, t_std = target[:,:,i].mean(), target[:,:,i].std() | |
| s_mean, s_std = source[:,:,i].mean(), source[:,:,i].std() | |
| result[:,:,i] = (target[:,:,i] - t_mean) * (s_std / (t_std + 1e-6)) + s_mean | |
| result[:,:,i] = np.clip(result[:,:,i], 0, 255) | |
| elif mode == 'mkl': | |
| result = color_match_lab(target, source) | |
| else: # pdf mode | |
| result = np.zeros_like(target) | |
| for i in range(3): | |
| result[:,:,i] = np.interp( | |
| target[:,:,i].flatten(), | |
| np.linspace(target[:,:,i].min(), target[:,:,i].max(), 256), | |
| np.linspace(source[:,:,i].min(), source[:,:,i].max(), 256) | |
| ).reshape(target[:,:,i].shape) | |
| return Image.fromarray(result.astype(np.uint8)) | |
| except Exception as e: | |
| print(f"Color matching failed: {e}, returning target image") | |
| return target_img | |
| def create_face_mask(image, face_bbox, feather=None): | |
| """ | |
| Create a soft mask around the detected face for better blending. | |
| Args: | |
| image: PIL Image | |
| face_bbox: [x1, y1, x2, y2] | |
| feather: blur radius for soft edges (uses config default if None) | |
| """ | |
| if feather is None: | |
| feather = FACE_MASK_CONFIG['feather'] | |
| mask = Image.new('L', image.size, 0) | |
| draw = ImageDraw.Draw(mask) | |
| # Expand bbox slightly | |
| x1, y1, x2, y2 = face_bbox | |
| padding = int((x2 - x1) * FACE_MASK_CONFIG['padding']) | |
| x1 = max(0, x1 - padding) | |
| y1 = max(0, y1 - padding) | |
| x2 = min(image.width, x2 + padding) | |
| y2 = min(image.height, y2 + padding) | |
| # Draw ellipse for more natural face shape | |
| draw.ellipse([x1, y1, x2, y2], fill=255) | |
| # Apply gaussian blur for soft edges | |
| mask = mask.filter(ImageFilter.GaussianBlur(feather)) | |
| return mask | |
| def draw_kps(image_pil, kps, color_list=[(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255)]): | |
| """Draw facial keypoints on image for InstantID ControlNet""" | |
| stickwidth = 4 | |
| limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]]) | |
| kps = np.array(kps) | |
| w, h = image_pil.size | |
| out_img = np.zeros([h, w, 3]) | |
| for i in range(len(limbSeq)): | |
| index = limbSeq[i] | |
| color = color_list[index[0]] | |
| x = kps[index][:, 0] | |
| y = kps[index][:, 1] | |
| length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5 | |
| angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1])) | |
| polygon = cv2.ellipse2Poly( | |
| (int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1 | |
| ) | |
| out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color) | |
| out_img = (out_img * 0.6).astype(np.uint8) | |
| for idx_kp, kp in enumerate(kps): | |
| color = color_list[idx_kp] | |
| x, y = kp | |
| out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1) | |
| out_img_pil = Image.fromarray(out_img.astype(np.uint8)) | |
| return out_img_pil | |
| def get_facial_attributes(face): | |
| """ | |
| Extract comprehensive facial attributes. | |
| Returns dict with age, gender, expression, quality metrics. | |
| """ | |
| attributes = { | |
| 'age': None, | |
| 'gender': None, | |
| 'expression': None, | |
| 'quality': 1.0, | |
| 'pose_angle': 0, | |
| 'description': [] | |
| } | |
| # Age extraction | |
| try: | |
| if hasattr(face, 'age'): | |
| age = int(face.age) | |
| attributes['age'] = age | |
| for min_age, max_age, label in AGE_BRACKETS: | |
| if min_age <= age < max_age: | |
| attributes['description'].append(label) | |
| break | |
| except (ValueError, TypeError, AttributeError) as e: | |
| print(f"[WARNING] Age extraction failed: {e}") | |
| # Gender extraction | |
| try: | |
| if hasattr(face, 'gender'): | |
| gender_code = int(face.gender) | |
| attributes['gender'] = gender_code | |
| if gender_code == 1: | |
| attributes['description'].append("male") | |
| elif gender_code == 0: | |
| attributes['description'].append("female") | |
| except (ValueError, TypeError, AttributeError) as e: | |
| print(f"[WARNING] Gender extraction failed: {e}") | |
| # Expression/emotion detection (if available) | |
| try: | |
| if hasattr(face, 'emotion'): | |
| # Some InsightFace models provide emotion | |
| emotion = face.emotion | |
| if isinstance(emotion, (list, tuple)) and len(emotion) > 0: | |
| emotions = ['neutral', 'happiness', 'surprise', 'sadness', 'anger', 'disgust', 'fear'] | |
| emotion_idx = int(np.argmax(emotion)) | |
| emotion_name = emotions[emotion_idx] if emotion_idx < len(emotions) else 'neutral' | |
| confidence = float(emotion[emotion_idx]) | |
| if confidence > 0.4: # Only add if confident | |
| if emotion_name == 'happiness': | |
| attributes['expression'] = 'smiling' | |
| attributes['description'].append('smiling') | |
| elif emotion_name not in ['neutral']: | |
| attributes['expression'] = emotion_name | |
| except (ValueError, TypeError, AttributeError, IndexError) as e: | |
| # Expression not available in this model | |
| pass | |
| # Pose angle (profile detection) | |
| try: | |
| if hasattr(face, 'pose'): | |
| pose = face.pose | |
| if len(pose) > 1: | |
| yaw = float(pose[1]) | |
| attributes['pose_angle'] = abs(yaw) | |
| except (ValueError, TypeError, AttributeError, IndexError): | |
| pass | |
| # Detection quality | |
| try: | |
| if hasattr(face, 'det_score'): | |
| attributes['quality'] = float(face.det_score) | |
| except (ValueError, TypeError, AttributeError): | |
| pass | |
| return attributes | |
| def build_enhanced_prompt(base_prompt, facial_attributes, trigger_word): | |
| """ | |
| Build enhanced prompt with facial attributes intelligently integrated. | |
| """ | |
| prompt = base_prompt | |
| descriptions = facial_attributes['description'] | |
| if not descriptions: | |
| return base_prompt | |
| # Check if demographics already in prompt | |
| prompt_lower = prompt.lower() | |
| has_demographics = any(desc.lower() in prompt_lower for desc in descriptions) | |
| if not has_demographics: | |
| # Insert after trigger word for better integration | |
| demographic_str = ", ".join(descriptions) + " person" | |
| prompt = prompt.replace( | |
| trigger_word, | |
| f"{trigger_word}, {demographic_str}", | |
| 1 | |
| ) | |
| age = facial_attributes.get('age') | |
| quality = facial_attributes.get('quality') | |
| expression = facial_attributes.get('expression') | |
| print(f"[FACE] Detected: {', '.join(descriptions)}") | |
| print(f" Age: {age if age else 'N/A'}, Quality: {quality:.2f}") | |
| if expression: | |
| print(f" Expression: {expression}") | |
| return prompt | |
| def get_demographic_description(age, gender_code): | |
| """ | |
| Legacy function - kept for compatibility. | |
| Use get_facial_attributes() for new code. | |
| """ | |
| demo_desc = [] | |
| if age is not None: | |
| try: | |
| age_int = int(age) | |
| for min_age, max_age, label in AGE_BRACKETS: | |
| if min_age <= age_int < max_age: | |
| demo_desc.append(label) | |
| break | |
| except (ValueError, TypeError): | |
| pass | |
| if gender_code is not None: | |
| try: | |
| if int(gender_code) == 1: | |
| demo_desc.append("male") | |
| elif int(gender_code) == 0: | |
| demo_desc.append("female") | |
| except (ValueError, TypeError): | |
| pass | |
| return demo_desc | |
| def calculate_optimal_size(original_width, original_height, recommended_sizes=None, max_dimension=1536): | |
| """ | |
| Calculate optimal size maintaining aspect ratio with dimensions as multiples of 64. | |
| This updated version supports ANY aspect ratio (not just predefined ones), | |
| while ensuring dimensions are multiples of 64 and keeping total pixels reasonable. | |
| Args: | |
| original_width: Original image width | |
| original_height: Original image height | |
| recommended_sizes: Optional list of (width, height) tuples (legacy support) | |
| max_dimension: Maximum allowed dimension (default 1536) | |
| Returns: | |
| Tuple of (optimal_width, optimal_height) as multiples of 64 | |
| """ | |
| # Ensure pure Python ints to avoid numpy.int64 issues | |
| original_width = ensure_int(original_width) | |
| original_height = ensure_int(original_height) | |
| aspect_ratio = original_width / original_height | |
| # Legacy mode: use recommended sizes if provided | |
| if recommended_sizes is not None: | |
| best_match = None | |
| best_diff = float('inf') | |
| for width, height in recommended_sizes: | |
| rec_aspect = width / height | |
| diff = abs(rec_aspect - aspect_ratio) | |
| if diff < best_diff: | |
| best_diff = diff | |
| best_match = (width, height) | |
| # Ensure dimensions are multiples of 64 | |
| width, height = best_match | |
| width = int((width // 64) * 64) | |
| height = int((height // 64) * 64) | |
| return width, height | |
| # NEW: Support any aspect ratio | |
| # Strategy: Keep aspect ratio, scale to reasonable total pixels, round to multiples of 64 | |
| # Target total pixels (around 1 megapixel for SDXL, adjustable) | |
| target_pixels = 1024 * 1024 # ~1MP, good balance for SDXL | |
| # Calculate dimensions that maintain aspect ratio and hit target pixels | |
| # width * height = target_pixels | |
| # width / height = aspect_ratio | |
| # => width = aspect_ratio * height | |
| # => aspect_ratio * height^2 = target_pixels | |
| # => height = sqrt(target_pixels / aspect_ratio) | |
| optimal_height = math.sqrt(target_pixels / aspect_ratio) | |
| optimal_width = optimal_height * aspect_ratio | |
| # Ensure we don't exceed max_dimension | |
| if optimal_width > max_dimension: | |
| optimal_width = max_dimension | |
| optimal_height = optimal_width / aspect_ratio | |
| if optimal_height > max_dimension: | |
| optimal_height = max_dimension | |
| optimal_width = optimal_height * aspect_ratio | |
| # Round to nearest multiple of 64 | |
| width = int(round(optimal_width / 64) * 64) | |
| height = int(round(optimal_height / 64) * 64) | |
| # Ensure minimum size (at least 512 on shortest side) | |
| min_dimension = 512 | |
| if min(width, height) < min_dimension: | |
| if width < height: | |
| width = min_dimension | |
| height = int(round((width / aspect_ratio) / 64) * 64) | |
| else: | |
| height = min_dimension | |
| width = int(round((height * aspect_ratio) / 64) * 64) | |
| # Final safety check: ensure multiples of 64 | |
| width = max(64, int((width // 64) * 64)) | |
| height = max(64, int((height // 64) * 64)) | |
| print(f"[SIZING] Aspect ratio: {aspect_ratio:.3f}, Output: {width}x{height} ({width*height/1e6:.2f}MP)") | |
| return width, height | |
| def enhance_face_crop(face_crop): | |
| """ | |
| Multi-stage enhancement for better feature preservation. | |
| Args: | |
| face_crop: PIL Image of face region | |
| Returns: | |
| Enhanced PIL Image | |
| """ | |
| # Stage 1: Resize to optimal size for CLIP (224x224) | |
| face_crop_resized = face_crop.resize((224, 224), Image.LANCZOS) | |
| # Stage 2: Enhance sharpness (helps with facial features) | |
| enhancer = ImageEnhance.Sharpness(face_crop_resized) | |
| face_crop_sharp = enhancer.enhance(1.5) | |
| # Stage 3: Enhance contrast slightly (helps with lighting) | |
| enhancer = ImageEnhance.Contrast(face_crop_sharp) | |
| face_crop_enhanced = enhancer.enhance(1.1) | |
| # Stage 4: Slight brightness adjustment to normalize lighting | |
| enhancer = ImageEnhance.Brightness(face_crop_enhanced) | |
| face_crop_final = enhancer.enhance(1.05) | |
| return face_crop_final | |
| print("[OK] Utilities loaded") | |