pixagram-dev

Runtime error

App Files Files Community

primerz commited on Oct 27, 2025

Commit

28615cb

verified ·

1 Parent(s): c6815c0

Update app.py

Browse files

Files changed (1) hide show

app.py +210 -170

app.py CHANGED Viewed

@@ -2,18 +2,17 @@ import spaces  # MUST be first, before any CUDA-related imports
 import gradio as gr
 import torch
 from diffusers import (
-    StableDiffusionXLPipeline,
     StableDiffusionXLControlNetPipeline,
     ControlNetModel,
     AutoencoderKL,
-    LCMScheduler  # CORRECT SCHEDULER FOR LCM
 )
 from diffusers.models.attention_processor import AttnProcessor2_0
 from insightface.app import FaceAnalysis
 from PIL import Image
 import numpy as np
 import cv2
-from transformers import pipeline as transformers_pipeline
 from huggingface_hub import hf_hub_download
 import os
@@ -22,12 +21,8 @@ MODEL_REPO = "primerz/pixagram"
 device = "cuda" if torch.cuda.is_available() else "cpu"
 dtype = torch.float16 if device == "cuda" else torch.float32
-# LORA trigger word
-TRIGGER_WORD = "p1x3l4rt, pixel art"
 print(f"Using device: {device}")
 print(f"Loading models from: {MODEL_REPO}")
-print(f"LORA Trigger Word: {TRIGGER_WORD}")
 class RetroArtConverter:
     def __init__(self):
@@ -35,6 +30,7 @@ class RetroArtConverter:
         self.dtype = dtype
         self.models_loaded = {
             'custom_checkpoint': False,
             'lora': False,
             'instantid': False
         }
@@ -62,7 +58,7 @@ class RetroArtConverter:
             torch_dtype=self.dtype
         ).to(self.device)
-        # Load InstantID ControlNet (optional)
         print("Loading InstantID ControlNet...")
         try:
             self.controlnet_instantid = ControlNetModel.from_pretrained(
@@ -78,6 +74,42 @@ class RetroArtConverter:
             self.controlnet_instantid = None
             self.instantid_enabled = False
         # Load depth estimator
         print("Loading depth estimator...")
         self.depth_estimator = transformers_pipeline(
@@ -86,7 +118,7 @@ class RetroArtConverter:
             device=self.device if self.device == "cuda" else -1
         )
-        # Determine which controlnets to use
         if self.instantid_enabled and self.controlnet_instantid is not None:
             controlnets = [self.controlnet_depth, self.controlnet_instantid]
             print(f"Initializing with multiple ControlNets: Depth + InstantID")
@@ -95,8 +127,7 @@ class RetroArtConverter:
             print(f"Initializing with single ControlNet: Depth only")
         # Load SDXL checkpoint from HuggingFace Hub
-        # NOTE: VAE is bundled in the checkpoint, don't load separately!
-        print("Loading SDXL checkpoint (horizon) with bundled VAE from HuggingFace Hub...")
         try:
             model_path = hf_hub_download(
                 repo_id=MODEL_REPO,
@@ -106,17 +137,19 @@ class RetroArtConverter:
             self.pipe = StableDiffusionXLControlNetPipeline.from_single_file(
                 model_path,
                 controlnet=controlnets,
                 torch_dtype=self.dtype,
                 use_safetensors=True
             ).to(self.device)
-            print("✓ Custom checkpoint loaded successfully (VAE bundled)")
             self.models_loaded['custom_checkpoint'] = True
         except Exception as e:
             print(f"⚠️ Could not load custom checkpoint: {e}")
-            print("Using default SDXL base model")
             self.pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
                 "stabilityai/stable-diffusion-xl-base-1.0",
                 controlnet=controlnets,
                 torch_dtype=self.dtype,
                 use_safetensors=True
             ).to(self.device)
@@ -131,23 +164,24 @@ class RetroArtConverter:
                 repo_type="model"
             )
             self.pipe.load_lora_weights(lora_path)
-            print(f"✓ LORA loaded successfully")
-            print(f"  Trigger word: '{TRIGGER_WORD}'")
             self.models_loaded['lora'] = True
         except Exception as e:
             print(f"⚠️ Could not load LORA: {e}")
             self.models_loaded['lora'] = False
-        # CRITICAL: Use LCM Scheduler for this model!
         print("Setting up LCM scheduler...")
         self.pipe.scheduler = LCMScheduler.from_config(
             self.pipe.scheduler.config
         )
-        # Enable attention optimizations
         self.pipe.unet.set_attn_processor(AttnProcessor2_0())
-        # Try to enable xformers
         if self.device == "cuda":
             try:
                 self.pipe.enable_xformers_memory_efficient_attention()
@@ -155,14 +189,8 @@ class RetroArtConverter:
             except Exception as e:
                 print(f"⚠️ xformers not available: {e}")
-        # Set CLIP skip to 2
-        if hasattr(self.pipe, 'text_encoder'):
-            self.clip_skip = 2
-            print(f"✓ CLIP skip set to {self.clip_skip}")
         # Track controlnet configuration
         self.using_multiple_controlnets = isinstance(controlnets, list)
-        print(f"Pipeline initialized with {'multiple' if self.using_multiple_controlnets else 'single'} ControlNet(s)")
         print("\n=== MODEL STATUS ===")
         for model, loaded in self.models_loaded.items():
@@ -170,15 +198,7 @@ class RetroArtConverter:
             print(f"{model}: {status}")
         print("===================\n")
-        print("✓ Model initialization complete!")
-        print("\n=== LCM CONFIGURATION ===")
-        print("Scheduler: LCM")
-        print("Recommended Steps: 12")
-        print("Recommended CFG: 1.0-1.5")
-        print("Recommended Resolution: 896x1152 or 832x1216")
-        print("CLIP Skip: 2")
-        print(f"LORA Trigger: '{TRIGGER_WORD}'")
-        print("=========================\n")
     def get_depth_map(self, image):
         """Generate depth map from input image"""
@@ -195,73 +215,59 @@ class RetroArtConverter:
         # Slight blur to reduce noise
         depth_normalized = cv2.GaussianBlur(depth_normalized, (3, 3), 0)
-        # Convert to RGB
         depth_colored = cv2.cvtColor(depth_normalized, cv2.COLOR_GRAY2RGB)
         return Image.fromarray(depth_colored)
-    def calculate_optimal_size(self, original_width, original_height):
-        """Calculate optimal size from recommended resolutions"""
         aspect_ratio = original_width / original_height
-        # Recommended resolutions for this model
-        recommended_sizes = [
-            (896, 1152),  # Portrait
-            (1152, 896),  # Landscape
-            (832, 1216),  # Tall portrait
-            (1216, 832),  # Wide landscape
-            (1024, 1024)  # Square
-        ]
-        # Find closest matching aspect ratio
-        best_match = None
-        best_diff = float('inf')
-        for width, height in recommended_sizes:
-            rec_aspect = width / height
-            diff = abs(rec_aspect - aspect_ratio)
-            if diff < best_diff:
-                best_diff = diff
-                best_match = (width, height)
-        # Ensure dimensions are multiples of 8
-        width, height = best_match
-        width = (width // 8) * 8
-        height = (height // 8) * 8
-        return width, height
-    def add_trigger_word(self, prompt):
-        """Add trigger word to prompt if not present"""
-        if TRIGGER_WORD.lower() not in prompt.lower():
-            return f"{TRIGGER_WORD}, {prompt}"
-        return prompt
     def generate_retro_art(
         self,
         input_image,
-        prompt="retro game character, vibrant colors, detailed",
-        negative_prompt="blurry, low quality, ugly, distorted",
-        num_inference_steps=12,  # LCM recommended: 12 steps
-        guidance_scale=1.0,       # LCM recommended: 1.0-1.5
-        controlnet_conditioning_scale=0.8,
-        lora_scale=1.0,
-        identity_preservation=0.8,
-        image_scale=0.2
     ):
-        """Generate retro art with correct LCM settings"""
-        # Add trigger word to prompt
-        prompt = self.add_trigger_word(prompt)
-        # Calculate optimal size
         original_width, original_height = input_image.size
-        target_width, target_height = self.calculate_optimal_size(original_width, original_height)
         print(f"Resizing from {original_width}x{original_height} to {target_width}x{target_height}")
-        print(f"Prompt: {prompt}")
-        # Resize with high quality
         resized_image = input_image.resize((target_width, target_height), Image.LANCZOS)
         # Generate depth map
@@ -269,59 +275,81 @@ class RetroArtConverter:
         depth_image = self.get_depth_map(resized_image)
         depth_image = depth_image.resize((target_width, target_height), Image.LANCZOS)
-        # Handle face detection for InstantID
         using_multiple_controlnets = self.using_multiple_controlnets
         face_embeddings = None
         has_detected_faces = False
-        if using_multiple_controlnets:
-            print("Checking for faces...")
             img_array = np.array(resized_image)
-            faces = self.face_app.get(img_array) if self.face_app is not None else []
             if len(faces) > 0:
                 has_detected_faces = True
                 print(f"Detected {len(faces)} face(s)")
                 face = sorted(faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
                 face_embeddings = torch.from_numpy(face.normed_embedding).unsqueeze(0).to(self.device, dtype=self.dtype)
         # Set LORA scale
         if hasattr(self.pipe, 'set_adapters') and self.models_loaded['lora']:
             try:
                 self.pipe.set_adapters(["retroart"], adapter_weights=[lora_scale])
-                print(f"LORA scale: {lora_scale}")
             except Exception as e:
                 print(f"Could not set LORA scale: {e}")
-        # Prepare generation kwargs
         pipe_kwargs = {
             "prompt": prompt,
-            "negative_prompt": negative_prompt,
             "num_inference_steps": num_inference_steps,
             "guidance_scale": guidance_scale,
             "width": target_width,
             "height": target_height,
-            "generator": torch.Generator(device=self.device).manual_seed(42)
         }
-        # Add CLIP skip
-        if hasattr(self.pipe, 'text_encoder'):
-            pipe_kwargs["clip_skip"] = 2
-        # Configure ControlNet inputs
         if using_multiple_controlnets and has_detected_faces:
-            print("Using Depth + InstantID ControlNets")
             control_images = [depth_image, resized_image]
             conditioning_scales = [controlnet_conditioning_scale, image_scale]
             pipe_kwargs["image"] = control_images
             pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
             if face_embeddings is not None:
-                pipe_kwargs["cross_attention_kwargs"] = {"ip_adapter_image_embeds": [face_embeddings]}
         elif using_multiple_controlnets and not has_detected_faces:
-            print("Multiple ControlNets available but no faces detected")
             control_images = [depth_image, depth_image]
             conditioning_scales = [controlnet_conditioning_scale, 0.0]
@@ -334,15 +362,16 @@ class RetroArtConverter:
             pipe_kwargs["controlnet_conditioning_scale"] = controlnet_conditioning_scale
         # Generate
-        print(f"Generating with LCM: Steps={num_inference_steps}, CFG={guidance_scale}")
         result = self.pipe(**pipe_kwargs)
         return result.images[0]
 # Initialize converter
-print("Initializing RetroArt Converter...")
 converter = RetroArtConverter()
 @spaces.GPU
 def process_image(
     image,
@@ -352,7 +381,7 @@ def process_image(
     guidance_scale,
     controlnet_scale,
     lora_scale,
-    identity_preservation,
     image_scale
 ):
     if image is None:
@@ -367,8 +396,9 @@ def process_image(
             guidance_scale=guidance_scale,
             controlnet_conditioning_scale=controlnet_scale,
             lora_scale=lora_scale,
-            identity_preservation=identity_preservation,
-            image_scale=image_scale
         )
         return result
     except Exception as e:
@@ -377,103 +407,100 @@ def process_image(
         traceback.print_exc()
         raise gr.Error(f"Generation failed: {str(e)}")
-# Gradio UI
 with gr.Blocks(title="RetroArt Converter - LCM", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
-    # 🎮 RetroArt Converter (LCM Optimized)
-    Convert images into retro pixel art style using LCM (Latent Consistency Model) for fast, high-quality generation!
-    **✨ Features:**
-    - ⚡ Ultra-fast generation (12 steps!)
-    - 🎨 Custom pixel art LORA with trigger word: `p1x3l4rt, pixel art`
-    - 📐 Optimized resolutions: 896x1152 / 832x1216
-    - 🖼️ Bundled VAE for authentic retro look
-    - 🎯 CLIP Skip 2 for better style
     """)
     # Model status
     if converter.models_loaded:
-        status_text = "**📦 Loaded Models:**\n"
-        status_text += f"- Custom Checkpoint (Horizon): {'✓ Loaded' if converter.models_loaded['custom_checkpoint'] else '✗ Using SDXL base'}\n"
-        status_text += f"- LORA (RetroArt): {'✓ Loaded' if converter.models_loaded['lora'] else '✗ Disabled'}\n"
-        status_text += f"- InstantID: {'✓ Loaded' if converter.models_loaded['instantid'] else '✗ Disabled'}\n"
-        gr.Markdown(status_text)
-    gr.Markdown(f"""
-    **⚙️ LCM Configuration:**
-    - Scheduler: LCM (Latent Consistency Model)
-    - Recommended Steps: **12** (fast!)
-    - Recommended CFG: **1.0-1.5** (lower than normal)
-    - CLIP Skip: **2**
-    - LORA Trigger: `{TRIGGER_WORD}` (auto-added)
-    """)
     with gr.Row():
         with gr.Column():
             input_image = gr.Image(label="Input Image", type="pil")
             prompt = gr.Textbox(
-                label="Prompt (trigger word auto-added)",
-                value="retro game character, vibrant colors, highly detailed",
-                lines=3,
-                info=f"'{TRIGGER_WORD}' will be automatically added"
             )
             negative_prompt = gr.Textbox(
                 label="Negative Prompt",
-                value="blurry, low quality, ugly, distorted, deformed, bad anatomy",
                 lines=2
             )
-            with gr.Accordion("⚡ LCM Settings (Optimized)", open=True):
                 steps = gr.Slider(
                     minimum=4,
                     maximum=20,
                     value=12,
                     step=1,
-                    label="Inference Steps (LCM works great with just 12!)"
                 )
                 guidance_scale = gr.Slider(
-                    minimum=0.5,
                     maximum=3.0,
-                    value=1.0,
                     step=0.1,
-                    label="Guidance Scale (CFG) - LCM uses 1.0-1.5"
                 )
                 controlnet_scale = gr.Slider(
-                    minimum=0.3,
-                    maximum=1.2,
-                    value=0.8,
                     step=0.05,
                     label="ControlNet Depth Scale"
                 )
                 lora_scale = gr.Slider(
-                    minimum=0.5,
-                    maximum=1.5,
-                    value=1.0,
                     step=0.05,
                     label="RetroArt LORA Scale"
                 )
-            with gr.Accordion("🎭 Identity Settings (for portraits)", open=False):
-                identity_preservation = gr.Slider(
-                    minimum=0,
-                    maximum=1.5,
-                    value=0.8,
                     step=0.1,
-                    label="Identity Preservation"
                 )
                 image_scale = gr.Slider(
                     minimum=0,
-                    maximum=1.0,
-                    value=0.2,
                     step=0.05,
-                    label="InstantID Image Scale"
                 )
             generate_btn = gr.Button("🎨 Generate Retro Art", variant="primary", size="lg")
@@ -482,29 +509,42 @@ with gr.Blocks(title="RetroArt Converter - LCM", theme=gr.themes.Soft()) as demo
             output_image = gr.Image(label="Retro Art Output")
             gr.Markdown("""
-            ### 💡 Tips for Best Results:
-            **For LCM Models:**
-            - ✅ Use **12 steps** (already optimized!)
-            - ✅ Keep CFG at **1.0-1.5** (not 7.5!)
-            - ✅ LORA trigger word is **auto-added**
-            - ✅ Resolution auto-optimized to 896x1152 or 832x1216
-            **For Quality:**
-            - Use high-resolution input images
-            - Be specific in prompts: "16-bit game character" vs "character"
-            - Adjust ControlNet scale: lower = more creative, higher = more faithful
-            **For Style:**
-            - Increase LORA scale (1.0-1.5) for stronger pixel art effect
-            - Try prompts like: "SNES style", "16-bit RPG", "Game Boy advance style"
             """)
     generate_btn.click(
         fn=process_image,
         inputs=[
             input_image, prompt, negative_prompt, steps, guidance_scale,
-            controlnet_scale, lora_scale, identity_preservation, image_scale
         ],
         outputs=[output_image]
     )

 import gradio as gr
 import torch
 from diffusers import (
     StableDiffusionXLControlNetPipeline,
     ControlNetModel,
     AutoencoderKL,
+    LCMScheduler
 )
 from diffusers.models.attention_processor import AttnProcessor2_0
 from insightface.app import FaceAnalysis
 from PIL import Image
 import numpy as np
 import cv2
+from transformers import pipeline as transformers_pipeline, CLIPImageProcessor
 from huggingface_hub import hf_hub_download
 import os
 device = "cuda" if torch.cuda.is_available() else "cpu"
 dtype = torch.float16 if device == "cuda" else torch.float32
 print(f"Using device: {device}")
 print(f"Loading models from: {MODEL_REPO}")
 class RetroArtConverter:
     def __init__(self):
         self.dtype = dtype
         self.models_loaded = {
             'custom_checkpoint': False,
+            'custom_vae': False,
             'lora': False,
             'instantid': False
         }
             torch_dtype=self.dtype
         ).to(self.device)
+        # Load InstantID ControlNet
         print("Loading InstantID ControlNet...")
         try:
             self.controlnet_instantid = ControlNetModel.from_pretrained(
             self.controlnet_instantid = None
             self.instantid_enabled = False
+        # Load IP-Adapter for InstantID
+        print("Loading IP-Adapter for InstantID...")
+        try:
+            from transformers import CLIPVisionModelWithProjection
+            self.image_encoder = CLIPVisionModelWithProjection.from_pretrained(
+                "laion/CLIP-ViT-H-14-laion2B-s32B-b79K",
+                torch_dtype=self.dtype
+            ).to(self.device)
+            print("✓ IP-Adapter image encoder loaded")
+        except Exception as e:
+            print(f"⚠️ IP-Adapter not available: {e}")
+            self.image_encoder = None
+        # Load custom VAE from HuggingFace Hub
+        print("Loading custom VAE (pixelate) from HuggingFace Hub...")
+        try:
+            vae_path = hf_hub_download(
+                repo_id=MODEL_REPO,
+                filename="pixelate.safetensors",
+                repo_type="model"
+            )
+            self.vae = AutoencoderKL.from_single_file(
+                vae_path,
+                torch_dtype=self.dtype
+            ).to(self.device)
+            print("✓ Custom VAE loaded successfully")
+            self.models_loaded['custom_vae'] = True
+        except Exception as e:
+            print(f"⚠️ Could not load custom VAE: {e}")
+            print("Using high-quality SDXL VAE")
+            self.vae = AutoencoderKL.from_pretrained(
+                "madebyollin/sdxl-vae-fp16-fix",
+                torch_dtype=self.dtype
+            ).to(self.device)
+            self.models_loaded['custom_vae'] = False
         # Load depth estimator
         print("Loading depth estimator...")
         self.depth_estimator = transformers_pipeline(
             device=self.device if self.device == "cuda" else -1
         )
+        # Determine controlnets configuration
         if self.instantid_enabled and self.controlnet_instantid is not None:
             controlnets = [self.controlnet_depth, self.controlnet_instantid]
             print(f"Initializing with multiple ControlNets: Depth + InstantID")
             print(f"Initializing with single ControlNet: Depth only")
         # Load SDXL checkpoint from HuggingFace Hub
+        print("Loading SDXL checkpoint (horizon) from HuggingFace Hub...")
         try:
             model_path = hf_hub_download(
                 repo_id=MODEL_REPO,
             self.pipe = StableDiffusionXLControlNetPipeline.from_single_file(
                 model_path,
                 controlnet=controlnets,
+                vae=self.vae,
                 torch_dtype=self.dtype,
                 use_safetensors=True
             ).to(self.device)
+            print("✓ Custom checkpoint loaded successfully")
             self.models_loaded['custom_checkpoint'] = True
         except Exception as e:
             print(f"⚠️ Could not load custom checkpoint: {e}")
+            print("Using default SDXL")
             self.pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
                 "stabilityai/stable-diffusion-xl-base-1.0",
                 controlnet=controlnets,
+                vae=self.vae,
                 torch_dtype=self.dtype,
                 use_safetensors=True
             ).to(self.device)
                 repo_type="model"
             )
             self.pipe.load_lora_weights(lora_path)
+            print("✓ LORA loaded successfully")
             self.models_loaded['lora'] = True
         except Exception as e:
             print(f"⚠️ Could not load LORA: {e}")
             self.models_loaded['lora'] = False
+        # CRITICAL: Set LCM Scheduler for fast generation
         print("Setting up LCM scheduler...")
         self.pipe.scheduler = LCMScheduler.from_config(
             self.pipe.scheduler.config
         )
+        # Disable VAE slicing for better quality
+        # self.pipe.enable_vae_slicing()
+        # Enable memory optimizations
         self.pipe.unet.set_attn_processor(AttnProcessor2_0())
         if self.device == "cuda":
             try:
                 self.pipe.enable_xformers_memory_efficient_attention()
             except Exception as e:
                 print(f"⚠️ xformers not available: {e}")
         # Track controlnet configuration
         self.using_multiple_controlnets = isinstance(controlnets, list)
         print("\n=== MODEL STATUS ===")
         for model, loaded in self.models_loaded.items():
             print(f"{model}: {status}")
         print("===================\n")
+        print("Model initialization complete!")
     def get_depth_map(self, image):
         """Generate depth map from input image"""
         # Slight blur to reduce noise
         depth_normalized = cv2.GaussianBlur(depth_normalized, (3, 3), 0)
         depth_colored = cv2.cvtColor(depth_normalized, cv2.COLOR_GRAY2RGB)
         return Image.fromarray(depth_colored)
+    def calculate_target_size(self, original_width, original_height, preferred_resolution="896x1152"):
+        """Calculate target size based on recommended SDXL resolutions"""
+        # Recommended resolutions for this model
+        resolutions = {
+            "896x1152": (896, 1152),  # Portrait
+            "832x1216": (832, 1216),  # Tall portrait
+            "1152x896": (1152, 896),  # Landscape
+            "1216x832": (1216, 832),  # Wide landscape
+            "1024x1024": (1024, 1024) # Square
+        }
         aspect_ratio = original_width / original_height
+        # Choose resolution based on aspect ratio
+        if aspect_ratio < 0.85:  # Tall portrait
+            target_width, target_height = resolutions["832x1216"]
+        elif aspect_ratio < 1.15:  # Portrait to square
+            if aspect_ratio < 1.0:
+                target_width, target_height = resolutions["896x1152"]
+            else:
+                target_width, target_height = resolutions["1024x1024"]
+        elif aspect_ratio < 1.35:  # Landscape
+            target_width, target_height = resolutions["1152x896"]
+        else:  # Wide landscape
+            target_width, target_height = resolutions["1216x832"]
+        return target_width, target_height
     def generate_retro_art(
         self,
         input_image,
+        prompt="retro pixel art game, 16-bit style, vibrant colors",
+        negative_prompt="blurry, low quality, modern, photorealistic, 3d render",
+        num_inference_steps=12,  # LCM default: 12 steps
+        guidance_scale=1.5,  # LCM default: 1-1.5
+        controlnet_conditioning_scale=0.6,
+        lora_scale=0.85,
+        identity_scale=0.9,  # Stronger identity preservation
+        image_scale=0.5,  # Stronger InstantID influence
+        clip_skip=2  # SDXL clip skip
     ):
+        """Main generation function with LCM optimization"""
+        # Calculate target size
         original_width, original_height = input_image.size
+        target_width, target_height = self.calculate_target_size(original_width, original_height)
         print(f"Resizing from {original_width}x{original_height} to {target_width}x{target_height}")
         resized_image = input_image.resize((target_width, target_height), Image.LANCZOS)
         # Generate depth map
         depth_image = self.get_depth_map(resized_image)
         depth_image = depth_image.resize((target_width, target_height), Image.LANCZOS)
+        # IMPORTANT: Add LORA trigger word
+        lora_trigger = "p1x3l4rt, pixel art"
+        if lora_trigger not in prompt:
+            prompt = f"{lora_trigger}, {prompt}"
+            print(f"Added LORA trigger word: {lora_trigger}")
+        # Check if using multiple controlnets
         using_multiple_controlnets = self.using_multiple_controlnets
+        # Extract face embeddings for InstantID
         face_embeddings = None
         has_detected_faces = False
+        if using_multiple_controlnets and self.face_app is not None:
+            print("Extracting face embeddings...")
             img_array = np.array(resized_image)
+            faces = self.face_app.get(img_array)
             if len(faces) > 0:
                 has_detected_faces = True
                 print(f"Detected {len(faces)} face(s)")
+                # Get the largest face
                 face = sorted(faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
+                # Extract embedding
                 face_embeddings = torch.from_numpy(face.normed_embedding).unsqueeze(0).to(self.device, dtype=self.dtype)
+                # Enhance prompt for better face preservation
+                prompt = f"detailed face, portrait, facial features, {prompt}"
+                print(f"Face detected, enhanced prompt for identity preservation")
         # Set LORA scale
         if hasattr(self.pipe, 'set_adapters') and self.models_loaded['lora']:
             try:
                 self.pipe.set_adapters(["retroart"], adapter_weights=[lora_scale])
             except Exception as e:
                 print(f"Could not set LORA scale: {e}")
+        # Enhanced negative prompt
+        full_negative = f"{negative_prompt}, worst quality, normal quality, lowres, watermark, text"
+        # Prepare pipeline kwargs
         pipe_kwargs = {
             "prompt": prompt,
+            "negative_prompt": full_negative,
             "num_inference_steps": num_inference_steps,
             "guidance_scale": guidance_scale,
             "width": target_width,
             "height": target_height,
+            "generator": torch.Generator(device=self.device).manual_seed(42),
+            "clip_skip": clip_skip
         }
+        # Configure control images based on setup
         if using_multiple_controlnets and has_detected_faces:
+            print(f"Using Depth + InstantID (identity_scale={identity_scale}, image_scale={image_scale})")
+            # For InstantID, use the original image
             control_images = [depth_image, resized_image]
             conditioning_scales = [controlnet_conditioning_scale, image_scale]
             pipe_kwargs["image"] = control_images
             pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
+            # Add face embeddings with stronger influence
             if face_embeddings is not None:
+                # Scale up the face embeddings for stronger identity
+                scaled_embeddings = face_embeddings * identity_scale
+                pipe_kwargs["cross_attention_kwargs"] = {
+                    "ip_adapter_image_embeds": [scaled_embeddings]
+                }
         elif using_multiple_controlnets and not has_detected_faces:
+            print("Multiple ControlNets but no faces detected")
             control_images = [depth_image, depth_image]
             conditioning_scales = [controlnet_conditioning_scale, 0.0]
             pipe_kwargs["controlnet_conditioning_scale"] = controlnet_conditioning_scale
         # Generate
+        print(f"Generating with LCM: {num_inference_steps} steps, CFG {guidance_scale}")
         result = self.pipe(**pipe_kwargs)
         return result.images[0]
 # Initialize converter
+print("Initializing RetroArt Converter with LCM...")
 converter = RetroArtConverter()
+# Gradio interface
 @spaces.GPU
 def process_image(
     image,
     guidance_scale,
     controlnet_scale,
     lora_scale,
+    identity_scale,
     image_scale
 ):
     if image is None:
             guidance_scale=guidance_scale,
             controlnet_conditioning_scale=controlnet_scale,
             lora_scale=lora_scale,
+            identity_scale=identity_scale,
+            image_scale=image_scale,
+            clip_skip=2
         )
         return result
     except Exception as e:
         traceback.print_exc()
         raise gr.Error(f"Generation failed: {str(e)}")
+# Create Gradio interface
 with gr.Blocks(title="RetroArt Converter - LCM", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
+    # 🎮 RetroArt Converter - LCM Optimized
+    Convert images to retro pixel art using **LCM (Latent Consistency Model)** for fast generation!
+    **Key Features:**
+    - ⚡ Fast generation (12 steps)
+    - 🎨 LORA trigger: "p1x3l4rt, pixel art" (auto-added)
+    - 👤 Strong InstantID for face preservation
+    - 🎯 Optimized SDXL resolutions (896x1152, 832x1216)
+    - 📐 Clip Skip 2
     """)
     # Model status
     if converter.models_loaded:
+        status_md = "**Model Status:**\n"
+        status_md += f"- Custom Checkpoint: {'✓' if converter.models_loaded['custom_checkpoint'] else '✗ Fallback'}\n"
+        status_md += f"- Custom VAE: {'✓' if converter.models_loaded['custom_vae'] else '✗ Fallback'}\n"
+        status_md += f"- LORA: {'✓' if converter.models_loaded['lora'] else '✗ Fallback'}\n"
+        status_md += f"- InstantID: {'✓' if converter.models_loaded['instantid'] else '✗ Disabled'}\n"
+        gr.Markdown(status_md)
     with gr.Row():
         with gr.Column():
             input_image = gr.Image(label="Input Image", type="pil")
             prompt = gr.Textbox(
+                label='Prompt (trigger "p1x3l4rt, pixel art" auto-added)',
+                value="retro pixel art game, 16-bit style, vibrant colors, detailed",
+                lines=2,
+                info="Don't include trigger word - it's added automatically"
             )
             negative_prompt = gr.Textbox(
                 label="Negative Prompt",
+                value="blurry, low quality, modern, photorealistic, 3d render, ugly, distorted",
                 lines=2
             )
+            gr.Markdown("### ⚡ LCM Settings (Optimized)")
+            with gr.Row():
                 steps = gr.Slider(
                     minimum=4,
                     maximum=20,
                     value=12,
                     step=1,
+                    label="Steps (LCM recommended: 12)"
                 )
                 guidance_scale = gr.Slider(
+                    minimum=1.0,
                     maximum=3.0,
+                    value=1.5,
                     step=0.1,
+                    label="CFG Scale (LCM recommended: 1-1.5)"
                 )
+            with gr.Accordion("Advanced Settings", open=False):
                 controlnet_scale = gr.Slider(
+                    minimum=0,
+                    maximum=1.5,
+                    value=0.6,
                     step=0.05,
                     label="ControlNet Depth Scale"
                 )
                 lora_scale = gr.Slider(
+                    minimum=0,
+                    maximum=2,
+                    value=0.85,
                     step=0.05,
                     label="RetroArt LORA Scale"
                 )
+            gr.Markdown("### 👤 InstantID Settings (Stronger)")
+            with gr.Row():
+                identity_scale = gr.Slider(
+                    minimum=0.5,
+                    maximum=2.0,
+                    value=0.9,
                     step=0.1,
+                    label="Identity Strength (higher = more truthful)"
                 )
                 image_scale = gr.Slider(
                     minimum=0,
+                    maximum=1.5,
+                    value=0.5,
                     step=0.05,
+                    label="InstantID ControlNet Scale"
                 )
             generate_btn = gr.Button("🎨 Generate Retro Art", variant="primary", size="lg")
             output_image = gr.Image(label="Retro Art Output")
             gr.Markdown("""
+            ### ⚡ LCM Quick Tips:
+            - **12 steps** is optimal for LCM (faster than traditional 40-50)
+            - **CFG 1-1.5** works best (not 7-8 like traditional)
+            - LORA trigger **"p1x3l4rt, pixel art"** is auto-added
+            - For stronger identity: increase **Identity Strength** to 1.2-1.5
+            - Resolution auto-selected: 896x1152 (portrait) or 1152x896 (landscape)
+            ### 👤 Face Preservation:
+            - **Identity Strength 0.9-1.2**: Balanced retro + identity
+            - **Identity Strength 1.3-2.0**: Maximum face accuracy
+            - **Image Scale 0.5-0.8**: Strong InstantID influence
             """)
+    gr.Examples(
+        examples=[
+            [
+                "example_portrait.jpg",
+                "retro pixel art portrait, 16-bit game character, detailed face",
+                "blurry, modern, low quality",
+                12, 1.5, 0.6, 0.85, 0.9, 0.5
+            ],
+        ],
+        inputs=[
+            input_image, prompt, negative_prompt, steps, guidance_scale,
+            controlnet_scale, lora_scale, identity_scale, image_scale
+        ],
+        outputs=[output_image],
+        fn=process_image,
+        cache_examples=False
+    )
     generate_btn.click(
         fn=process_image,
         inputs=[
             input_image, prompt, negative_prompt, steps, guidance_scale,
+            controlnet_scale, lora_scale, identity_scale, image_scale
         ],
         outputs=[output_image]
     )