Spaces:

primerz
/

pixagram-stable

Runtime error

App Files Files Community

primerz commited on Oct 30, 2025

Commit

7c0c2dc

verified ·

1 Parent(s): d48111e

Upload 12 files

Browse files

Files changed (6) hide show

app.py +3 -3
config.py +5 -1
generator.py +72 -40
gitattributes (1) +35 -0
models.py +40 -13
utils.py +39 -20

app.py CHANGED Viewed

@@ -106,7 +106,7 @@ def get_model_status():
         status_text += f"- Custom Checkpoint (Horizon): {'[OK] Loaded' if converter.models_loaded['custom_checkpoint'] else '[OK] Using SDXL base'}\n"
         status_text += f"- LORA (RetroArt): {'[OK] Loaded' if converter.models_loaded['lora'] else ' Disabled'}\n"
         status_text += f"- InstantID: {'[OK] Loaded' if converter.models_loaded['instantid'] else ' Disabled'}\n"
-        status_text += f"- Zoe Depth: {'[OK] Loaded' if converter.models_loaded['zoe_depth'] else ' Fallback'}\n"
         status_text += f"- IP-Adapter (Face Embeddings): {'[OK] Loaded' if converter.models_loaded.get('ip_adapter', False) else ' Keypoints only'}\n"
         return status_text
     return "**Model status unavailable**"
@@ -351,7 +351,7 @@ with gr.Blocks(title="Pixagram - AI Pixel Art Generator", theme=gr.themes.Soft()
             **[ADAPTIVE] Automatic Adjustments:**
             - Small faces (< 50K px): Boosts identity preservation to 1.8
             - Low confidence (< 80%): Increases identity control to 0.9
-            - Profile views (> 20° yaw): Enhances preservation to 1.7
             - Good quality faces: Uses your selected parameters
             **[PARAMETERS] Parameter Relationships:**
@@ -452,4 +452,4 @@ if __name__ == "__main__":
         server_port=7860,
         share=True,
         show_api=True
-    )

         status_text += f"- Custom Checkpoint (Horizon): {'[OK] Loaded' if converter.models_loaded['custom_checkpoint'] else '[OK] Using SDXL base'}\n"
         status_text += f"- LORA (RetroArt): {'[OK] Loaded' if converter.models_loaded['lora'] else ' Disabled'}\n"
         status_text += f"- InstantID: {'[OK] Loaded' if converter.models_loaded['instantid'] else ' Disabled'}\n"
+        status_text += f"- Midas Depth: {'[OK] Loaded' if converter.models_loaded['midas_depth'] else ' Fallback'}\n"
         status_text += f"- IP-Adapter (Face Embeddings): {'[OK] Loaded' if converter.models_loaded.get('ip_adapter', False) else ' Keypoints only'}\n"
         return status_text
     return "**Model status unavailable**"
             **[ADAPTIVE] Automatic Adjustments:**
             - Small faces (< 50K px): Boosts identity preservation to 1.8
             - Low confidence (< 80%): Increases identity control to 0.9
+            - Profile views (> 20Â° yaw): Enhances preservation to 1.7
             - Good quality faces: Uses your selected parameters
             **[PARAMETERS] Parameter Relationships:**
         server_port=7860,
         share=True,
         show_api=True
+    )

config.py CHANGED Viewed

@@ -29,7 +29,11 @@ FACE_DETECTION_CONFIG = {
     "ctx_id": 0
 }
-# Recommended resolutions
 RECOMMENDED_SIZES = [
     (896, 1152),   # Portrait
     (1152, 896),   # Landscape

     "ctx_id": 0
 }
+# Depth detection configuration
+DEPTH_DETECTION_CONFIG = {
+    "model_name": "leres++",  # LeRes++ provides superior depth accuracy
+    "method": "leres"
+}
 RECOMMENDED_SIZES = [
     (896, 1152),   # Portrait
     (1152, 896),   # Landscape

generator.py CHANGED Viewed

@@ -33,16 +33,16 @@ class RetroArtConverter:
             'custom_checkpoint': False,
             'lora': False,
             'instantid': False,
-            'zoe_depth': False,
             'ip_adapter': False
         }
         # Initialize face analysis
         self.face_app, self.face_detection_enabled = load_face_analysis()
-        # Load Zoe Depth detector
-        self.zoe_depth, zoe_success = load_depth_detector()
-        self.models_loaded['zoe_depth'] = zoe_success
         # Load ControlNets
         controlnet_depth, self.controlnet_instantid, instantid_success = load_controlnets()
@@ -146,34 +146,54 @@ class RetroArtConverter:
         print("============================\n")
     def get_depth_map(self, image):
-        """Generate depth map using Zoe Depth"""
-        if self.zoe_depth is not None:
-            try:
-                # Ensure clean PIL Image
-                if image.mode != 'RGB':
-                    image = image.convert('RGB')
-                # Get dimensions and ensure they're Python ints
-                width, height = image.size
-                width, height = int(width), int(height)
-                # Create a fresh image to avoid numpy type issues
-                image_array = np.array(image)
-                clean_image = Image.fromarray(image_array.astype(np.uint8))
-                # Use Zoe detector
-                depth_image = self.zoe_depth(clean_image)
-                return depth_image
-            except Exception as e:
-                print(f"Warning: ZoeDetector failed ({e}), falling back to grayscale depth")
                 gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
                 depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
-                return Image.fromarray(depth_colored)
-        else:
-            # Fallback to simple grayscale
-            gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
-            depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
-            return Image.fromarray(depth_colored)
     def add_trigger_word(self, prompt):
         """Add trigger word to prompt if not present"""
@@ -447,7 +467,7 @@ class RetroArtConverter:
         resized_image = input_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
         # Generate depth map
-        print("Generating Zoe depth map...")
         depth_image = self.get_depth_map(resized_image)
         if depth_image.size != (target_width, target_height):
             depth_image = depth_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
@@ -463,7 +483,11 @@ class RetroArtConverter:
         if using_multiple_controlnets and self.face_app is not None:
             print("Detecting faces and extracting keypoints...")
             img_array = cv2.cvtColor(np.array(resized_image), cv2.COLOR_RGB2BGR)
-            faces = self.face_app.get(img_array)
             if len(faces) > 0:
                 has_detected_faces = True
@@ -531,7 +555,8 @@ class RetroArtConverter:
         # Set LORA scale
         if hasattr(self.pipe, 'set_adapters') and self.models_loaded['lora']:
             try:
-                self.pipe.set_adapters(["retroart"], adapter_weights=[lora_scale])
                 print(f"LORA scale: {lora_scale}")
             except Exception as e:
                 print(f"Could not set LORA scale: {e}")
@@ -563,14 +588,21 @@ class RetroArtConverter:
                 conditioning = self.compel(prompt)
                 negative_conditioning = self.compel(negative_prompt)
-                pipe_kwargs["prompt_embeds"] = conditioning[0]
-                pipe_kwargs["pooled_prompt_embeds"] = conditioning[1]
-                pipe_kwargs["negative_prompt_embeds"] = negative_conditioning[0]
-                pipe_kwargs["negative_pooled_prompt_embeds"] = negative_conditioning[1]
                 print("[OK] Using Compel-encoded prompts")
             except Exception as e:
-                print(f"Compel encoding failed, using standard prompts: {e}")
                 pipe_kwargs["prompt"] = prompt
                 pipe_kwargs["negative_prompt"] = negative_prompt
         else:
@@ -604,7 +636,7 @@ class RetroArtConverter:
                     # Reshape for Resampler: [1, 1, 512]
                     face_emb_tensor = face_emb_tensor.reshape(1, -1, 512)
-                    # Pass through Resampler: [1, 1, 512] → [1, 16, 2048]
                     face_proj_embeds = self.image_proj_model(face_emb_tensor)
                     # Scale with identity preservation
@@ -692,4 +724,4 @@ class RetroArtConverter:
         return generated_image
-print("[OK] Generator class ready")

             'custom_checkpoint': False,
             'lora': False,
             'instantid': False,
+            'midas_depth': False,
             'ip_adapter': False
         }
         # Initialize face analysis
         self.face_app, self.face_detection_enabled = load_face_analysis()
+        # Load Midas Depth detector
+        self.midas_depth, midas_success = load_depth_detector()
+        self.models_loaded['midas_depth'] = midas_success
         # Load ControlNets
         controlnet_depth, self.controlnet_instantid, instantid_success = load_controlnets()
         print("============================\n")
     def get_depth_map(self, image):
+            """Generate depth map using Midas Depth"""
+            if self.midas_depth is not None:
+                try:
+                    if image.mode != 'RGB':
+                        image = image.convert('RGB')
+                    orig_width, orig_height = image.size
+                    orig_width = int(orig_width)
+                    orig_height = int(orig_height)
+                    # FIXED: Use multiples of 64 (not 32)
+                    target_width = int((orig_width // 64) * 64)
+                    target_height = int((orig_height // 64) * 64)
+                    target_width = int(max(64, target_width))
+                    target_height = int(max(64, target_height))
+                    if target_width != orig_width or target_height != orig_height:
+                        image = image.resize((int(target_width), int(target_height)), Image.LANCZOS)
+                        print(f"[DEPTH] Resized for MidasDetector: {orig_width}x{orig_height} -> {target_width}x{target_height}")
+                    # FIXED: Add torch.no_grad() wrapper
+                    with torch.no_grad():
+                        depth_image = self.midas_depth(image)
+                    depth_width, depth_height = depth_image.size
+                    # Convert numpy int64 to Python int to avoid PIL errors
+                    depth_width = int(depth_width)
+                    depth_height = int(depth_height)
+                    orig_width_int = int(orig_width)
+                    orig_height_int = int(orig_height)
+                    if depth_width != orig_width_int or depth_height != orig_height_int:
+                        depth_image = depth_image.resize((orig_width_int, orig_height_int), Image.LANCZOS)
+                    print(f"[DEPTH] Midas depth map generated: {orig_width}x{orig_height}")
+                    return depth_image
+                except Exception as e:
+                    print(f"[DEPTH] MidasDetector failed ({e}), falling back to grayscale depth")
+                    gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
+                    depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
+                    return Image.fromarray(depth_colored)
+            else:
                 gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
                 depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
+                return Image.fromarray(depth_colored)
     def add_trigger_word(self, prompt):
         """Add trigger word to prompt if not present"""
         resized_image = input_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
         # Generate depth map
+        print("Generating Midas depth map...")
         depth_image = self.get_depth_map(resized_image)
         if depth_image.size != (target_width, target_height):
             depth_image = depth_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
         if using_multiple_controlnets and self.face_app is not None:
             print("Detecting faces and extracting keypoints...")
             img_array = cv2.cvtColor(np.array(resized_image), cv2.COLOR_RGB2BGR)
+            try:
+                faces = self.face_app.get(img_array)
+            except Exception as e:
+                print(f"[WARNING] Face detection failed: {e}")
+                faces = []
             if len(faces) > 0:
                 has_detected_faces = True
         # Set LORA scale
         if hasattr(self.pipe, 'set_adapters') and self.models_loaded['lora']:
             try:
+                # Use correct adapter name - peft uses 'default_0' for single adapters
+                self.pipe.set_adapters(["default_0"], adapter_weights=[lora_scale])
                 print(f"LORA scale: {lora_scale}")
             except Exception as e:
                 print(f"Could not set LORA scale: {e}")
                 conditioning = self.compel(prompt)
                 negative_conditioning = self.compel(negative_prompt)
+                # Handle potential token length mismatches
+                prompt_embeds_0 = conditioning[0]
+                prompt_embeds_1 = conditioning[1]
+                neg_embeds_0 = negative_conditioning[0]
+                neg_embeds_1 = negative_conditioning[1]
+                # Ensure consistent shapes if needed
+                pipe_kwargs["prompt_embeds"] = prompt_embeds_0
+                pipe_kwargs["pooled_prompt_embeds"] = prompt_embeds_1
+                pipe_kwargs["negative_prompt_embeds"] = neg_embeds_0
+                pipe_kwargs["negative_pooled_prompt_embeds"] = neg_embeds_1
                 print("[OK] Using Compel-encoded prompts")
             except Exception as e:
+                print(f"Compel encoding failed ({e}), falling back to standard prompts")
                 pipe_kwargs["prompt"] = prompt
                 pipe_kwargs["negative_prompt"] = negative_prompt
         else:
                     # Reshape for Resampler: [1, 1, 512]
                     face_emb_tensor = face_emb_tensor.reshape(1, -1, 512)
+                    # Pass through Resampler: [1, 1, 512] ÃƒÂ¢Ã¢â‚¬Â Ã¢â‚¬â„¢ [1, 16, 2048]
                     face_proj_embeds = self.image_proj_model(face_emb_tensor)
                     # Scale with identity preservation
         return generated_image
+print("[OK] Generator class ready")

gitattributes (1) ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

models.py CHANGED Viewed

@@ -13,7 +13,7 @@ from diffusers import (
 from diffusers.models.attention_processor import AttnProcessor2_0
 from transformers import CLIPVisionModelWithProjection
 from insightface.app import FaceAnalysis
-from controlnet_aux import ZoeDetector
 from huggingface_hub import hf_hub_download
 from compel import Compel, ReturnedEmbeddingsType
@@ -82,16 +82,25 @@ def load_face_analysis():
 def load_depth_detector():
-    """Load Zoe Depth detector."""
-    print("Loading Zoe Depth detector...")
     try:
-        zoe_depth = ZoeDetector.from_pretrained("lllyasviel/Annotators")
-        zoe_depth.to(device)
-        print("  [OK] Zoe Depth loaded successfully")
-        return zoe_depth, True
     except Exception as e:
-        print(f"  [WARNING] Zoe Depth not available: {e}")
-        return None, False
 def load_controlnets():
@@ -276,7 +285,7 @@ def setup_ip_adapter(pipe, image_encoder):
         print("  [OK] IP-Adapter fully loaded with InstantID architecture")
         print(f"  - Resampler: 4 layers, 20 heads, 16 output tokens")
-        print(f"  - Face embeddings: 512D â†’ 16x2048D")
         return image_proj_model, True
@@ -288,19 +297,37 @@ def setup_ip_adapter(pipe, image_encoder):
 def setup_compel(pipe):
-    """Setup Compel for better SDXL prompt handling."""
     print("Setting up Compel for enhanced prompt processing...")
     try:
         compel = Compel(
             tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
             text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
             returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
-            requires_pooled=[False, True]
         )
-        print("  [OK] Compel loaded successfully")
         return compel, True
     except Exception as e:
         print(f"  [WARNING] Compel not available: {e}")
         return None, False

 from diffusers.models.attention_processor import AttnProcessor2_0
 from transformers import CLIPVisionModelWithProjection
 from insightface.app import FaceAnalysis
+from controlnet_aux import MidasDetector, LeresDetector
 from huggingface_hub import hf_hub_download
 from compel import Compel, ReturnedEmbeddingsType
 def load_depth_detector():
+    """Load LeRes++ Depth detector (superior to Midas/Zoe for detailed depth estimation)."""
+    print("Loading LeRes++ Depth detector...")
     try:
+        from controlnet_aux import LeresDetector
+        leres_depth = LeresDetector.from_pretrained("lllyasviel/Annotators")
+        leres_depth.to(device)
+        print("  [OK] LeRes++ Depth loaded successfully (+15-20% accuracy over Midas/Zoe)")
+        return leres_depth, True
     except Exception as e:
+        print(f"  [WARNING] LeRes++ Depth not available: {e}")
+        print("  Attempting fallback to Midas Depth...")
+        try:
+            midas_depth = MidasDetector.from_pretrained("lllyasviel/Annotators")
+            midas_depth.to(device)
+            print("  [OK] Midas Depth loaded as fallback")
+            return midas_depth, True
+        except Exception as e2:
+            print(f"  [ERROR] All depth detectors failed: {e2}")
+            return None, False
 def load_controlnets():
         print("  [OK] IP-Adapter fully loaded with InstantID architecture")
         print(f"  - Resampler: 4 layers, 20 heads, 16 output tokens")
+        print(f"  - Face embeddings: 512D ÃƒÂ¢Ã¢â‚¬Â Ã¢â‚¬â„¢ 16x2048D")
         return image_proj_model, True
 def setup_compel(pipe):
+    """Setup Compel for better SDXL prompt handling with robust error handling."""
     print("Setting up Compel for enhanced prompt processing...")
     try:
+        # FIXED: Handle SDXL dual tokenizer setup more carefully
         compel = Compel(
             tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
             text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
             returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
+            requires_pooled=[False, True],
+            padding_get_round_multiple=False  # Disable padding that might cause mismatches
         )
+        print("  [OK] Compel loaded successfully with SDXL dual tokenizers")
         return compel, True
+    except TypeError:
+        # Fallback for older Compel versions without padding parameter
+        try:
+            compel = Compel(
+                tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
+                text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
+                returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
+                requires_pooled=[False, True]
+            )
+            print("  [OK] Compel loaded (standard config)")
+            return compel, True
+        except Exception as e:
+            print(f"  [WARNING] Compel not available: {e}")
+            print("  [INFO] Will use standard prompt encoding instead")
+            return None, False
     except Exception as e:
         print(f"  [WARNING] Compel not available: {e}")
+        print("  [INFO] Will use standard prompt encoding instead")
         return None, False

utils.py CHANGED Viewed

@@ -300,11 +300,30 @@ def get_facial_attributes(face):
                 confidence = float(emotion[emotion_idx])
                 if confidence > 0.4:  # Only add if confident
                     if emotion_name == 'happiness':
-                        attributes['expression'] = 'smiling'
-                        attributes['description'].append('smiling')
-                    elif emotion_name not in ['neutral']:
-                        attributes['expression'] = emotion_name
     except (ValueError, TypeError, AttributeError, IndexError) as e:
         # Expression not available in this model
         pass
@@ -395,10 +414,10 @@ def get_demographic_description(age, gender_code):
 def calculate_optimal_size(original_width, original_height, recommended_sizes=None, max_dimension=1536):
     """
-    Calculate optimal size maintaining aspect ratio with dimensions as multiples of 8.
     This updated version supports ANY aspect ratio (not just predefined ones),
-    while ensuring dimensions are multiples of 8 and keeping total pixels reasonable.
     Args:
         original_width: Original image width
@@ -407,7 +426,7 @@ def calculate_optimal_size(original_width, original_height, recommended_sizes=No
         max_dimension: Maximum allowed dimension (default 1536)
     Returns:
-        Tuple of (optimal_width, optimal_height) as multiples of 8
     """
     aspect_ratio = original_width / original_height
@@ -423,15 +442,15 @@ def calculate_optimal_size(original_width, original_height, recommended_sizes=No
                 best_diff = diff
                 best_match = (width, height)
-        # Ensure dimensions are multiples of 8
         width, height = best_match
-        width = int((width // 8) * 8)
-        height = int((height // 8) * 8)
         return width, height
     # NEW: Support any aspect ratio
-    # Strategy: Keep aspect ratio, scale to reasonable total pixels, round to multiples of 8
     # Target total pixels (around 1 megapixel for SDXL, adjustable)
     target_pixels = 1024 * 1024  # ~1MP, good balance for SDXL
@@ -455,23 +474,23 @@ def calculate_optimal_size(original_width, original_height, recommended_sizes=No
         optimal_height = max_dimension
         optimal_width = optimal_height * aspect_ratio
-    # Round to nearest multiple of 8
-    width = int(round(optimal_width / 8) * 8)
-    height = int(round(optimal_height / 8) * 8)
     # Ensure minimum size (at least 512 on shortest side)
     min_dimension = 512
     if min(width, height) < min_dimension:
         if width < height:
             width = min_dimension
-            height = int(round((width / aspect_ratio) / 8) * 8)
         else:
             height = min_dimension
-            width = int(round((height * aspect_ratio) / 8) * 8)
-    # Final safety check: ensure multiples of 8
-    width = max(8, int((width // 8) * 8))
-    height = max(8, int((height // 8) * 8))
     print(f"[SIZING] Aspect ratio: {aspect_ratio:.3f}, Output: {width}x{height} ({width*height/1e6:.2f}MP)")
@@ -506,4 +525,4 @@ def enhance_face_crop(face_crop):
     return face_crop_final
-print("[OK] Utilities loaded")

                 confidence = float(emotion[emotion_idx])
                 if confidence > 0.4:  # Only add if confident
+                    expression_desc = None
                     if emotion_name == 'happiness':
+                        expression_desc = 'smiling'
+                    elif emotion_name == 'surprise':
+                        expression_desc = 'surprised expression'
+                    elif emotion_name == 'sadness':
+                        expression_desc = 'sad expression'
+                    elif emotion_name == 'anger':
+                        expression_desc = 'angry expression'
+                    elif emotion_name == 'neutral':
+                        expression_desc = 'neutral expression'
+                    # Add other emotions like 'disgust' or 'fear' if desired
+                    if expression_desc:
+                        attributes['expression'] = expression_desc
+                        # Only add non-neutral expressions to the prompt description
+                        if emotion_name != 'neutral':
+                            if expression_desc not in attributes['description']:
+                                attributes['description'].append(expression_desc)
     except (ValueError, TypeError, AttributeError, IndexError) as e:
         # Expression not available in this model
         pass
 def calculate_optimal_size(original_width, original_height, recommended_sizes=None, max_dimension=1536):
     """
+    Calculate optimal size maintaining aspect ratio with dimensions as multiples of 64.
     This updated version supports ANY aspect ratio (not just predefined ones),
+    while ensuring dimensions are multiples of 64 and keeping total pixels reasonable.
     Args:
         original_width: Original image width
         max_dimension: Maximum allowed dimension (default 1536)
     Returns:
+        Tuple of (optimal_width, optimal_height) as multiples of 64
     """
     aspect_ratio = original_width / original_height
                 best_diff = diff
                 best_match = (width, height)
+        # Ensure dimensions are multiples of 64
         width, height = best_match
+        width = int((width // 64) * 64)
+        height = int((height // 64) * 64)
         return width, height
     # NEW: Support any aspect ratio
+    # Strategy: Keep aspect ratio, scale to reasonable total pixels, round to multiples of 64
     # Target total pixels (around 1 megapixel for SDXL, adjustable)
     target_pixels = 1024 * 1024  # ~1MP, good balance for SDXL
         optimal_height = max_dimension
         optimal_width = optimal_height * aspect_ratio
+    # Round to nearest multiple of 64
+    width = int(round(optimal_width / 64) * 64)
+    height = int(round(optimal_height / 64) * 64)
     # Ensure minimum size (at least 512 on shortest side)
     min_dimension = 512
     if min(width, height) < min_dimension:
         if width < height:
             width = min_dimension
+            height = int(round((width / aspect_ratio) / 64) * 64)
         else:
             height = min_dimension
+            width = int(round((height * aspect_ratio) / 64) * 64)
+    # Final safety check: ensure multiples of 64
+    width = max(64, int((width // 64) * 64))
+    height = max(64, int((height // 64) * 64))
     print(f"[SIZING] Aspect ratio: {aspect_ratio:.3f}, Output: {width}x{height} ({width*height/1e6:.2f}MP)")
     return face_crop_final
+print("[OK] Utilities loaded")