pixagram-dev

Runtime error

App Files Files Community

primerz commited on Oct 27

Commit

962b8c2

verified ·

1 Parent(s): 28615cb

Update app.py

Browse files

Files changed (1) hide show

app.py +170 -210

app.py CHANGED Viewed

@@ -2,17 +2,18 @@ import spaces  # MUST be first, before any CUDA-related imports
 import gradio as gr
 import torch
 from diffusers import (
     StableDiffusionXLControlNetPipeline,
     ControlNetModel,
     AutoencoderKL,
-    LCMScheduler
 )
 from diffusers.models.attention_processor import AttnProcessor2_0
 from insightface.app import FaceAnalysis
 from PIL import Image
 import numpy as np
 import cv2
-from transformers import pipeline as transformers_pipeline, CLIPImageProcessor
 from huggingface_hub import hf_hub_download
 import os
@@ -21,8 +22,12 @@ MODEL_REPO = "primerz/pixagram"
 device = "cuda" if torch.cuda.is_available() else "cpu"
 dtype = torch.float16 if device == "cuda" else torch.float32
 print(f"Using device: {device}")
 print(f"Loading models from: {MODEL_REPO}")
 class RetroArtConverter:
     def __init__(self):
@@ -30,7 +35,6 @@ class RetroArtConverter:
         self.dtype = dtype
         self.models_loaded = {
             'custom_checkpoint': False,
-            'custom_vae': False,
             'lora': False,
             'instantid': False
         }
@@ -58,7 +62,7 @@ class RetroArtConverter:
             torch_dtype=self.dtype
         ).to(self.device)
-        # Load InstantID ControlNet
         print("Loading InstantID ControlNet...")
         try:
             self.controlnet_instantid = ControlNetModel.from_pretrained(
@@ -74,42 +78,6 @@ class RetroArtConverter:
             self.controlnet_instantid = None
             self.instantid_enabled = False
-        # Load IP-Adapter for InstantID
-        print("Loading IP-Adapter for InstantID...")
-        try:
-            from transformers import CLIPVisionModelWithProjection
-            self.image_encoder = CLIPVisionModelWithProjection.from_pretrained(
-                "laion/CLIP-ViT-H-14-laion2B-s32B-b79K",
-                torch_dtype=self.dtype
-            ).to(self.device)
-            print("✓ IP-Adapter image encoder loaded")
-        except Exception as e:
-            print(f"⚠️ IP-Adapter not available: {e}")
-            self.image_encoder = None
-        # Load custom VAE from HuggingFace Hub
-        print("Loading custom VAE (pixelate) from HuggingFace Hub...")
-        try:
-            vae_path = hf_hub_download(
-                repo_id=MODEL_REPO,
-                filename="pixelate.safetensors",
-                repo_type="model"
-            )
-            self.vae = AutoencoderKL.from_single_file(
-                vae_path,
-                torch_dtype=self.dtype
-            ).to(self.device)
-            print("✓ Custom VAE loaded successfully")
-            self.models_loaded['custom_vae'] = True
-        except Exception as e:
-            print(f"⚠️ Could not load custom VAE: {e}")
-            print("Using high-quality SDXL VAE")
-            self.vae = AutoencoderKL.from_pretrained(
-                "madebyollin/sdxl-vae-fp16-fix",
-                torch_dtype=self.dtype
-            ).to(self.device)
-            self.models_loaded['custom_vae'] = False
         # Load depth estimator
         print("Loading depth estimator...")
         self.depth_estimator = transformers_pipeline(
@@ -118,7 +86,7 @@ class RetroArtConverter:
             device=self.device if self.device == "cuda" else -1
         )
-        # Determine controlnets configuration
         if self.instantid_enabled and self.controlnet_instantid is not None:
             controlnets = [self.controlnet_depth, self.controlnet_instantid]
             print(f"Initializing with multiple ControlNets: Depth + InstantID")
@@ -127,7 +95,8 @@ class RetroArtConverter:
             print(f"Initializing with single ControlNet: Depth only")
         # Load SDXL checkpoint from HuggingFace Hub
-        print("Loading SDXL checkpoint (horizon) from HuggingFace Hub...")
         try:
             model_path = hf_hub_download(
                 repo_id=MODEL_REPO,
@@ -137,19 +106,17 @@ class RetroArtConverter:
             self.pipe = StableDiffusionXLControlNetPipeline.from_single_file(
                 model_path,
                 controlnet=controlnets,
-                vae=self.vae,
                 torch_dtype=self.dtype,
                 use_safetensors=True
             ).to(self.device)
-            print("✓ Custom checkpoint loaded successfully")
             self.models_loaded['custom_checkpoint'] = True
         except Exception as e:
             print(f"⚠️ Could not load custom checkpoint: {e}")
-            print("Using default SDXL")
             self.pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
                 "stabilityai/stable-diffusion-xl-base-1.0",
                 controlnet=controlnets,
-                vae=self.vae,
                 torch_dtype=self.dtype,
                 use_safetensors=True
             ).to(self.device)
@@ -164,24 +131,23 @@ class RetroArtConverter:
                 repo_type="model"
             )
             self.pipe.load_lora_weights(lora_path)
-            print("✓ LORA loaded successfully")
             self.models_loaded['lora'] = True
         except Exception as e:
             print(f"⚠️ Could not load LORA: {e}")
             self.models_loaded['lora'] = False
-        # CRITICAL: Set LCM Scheduler for fast generation
         print("Setting up LCM scheduler...")
         self.pipe.scheduler = LCMScheduler.from_config(
             self.pipe.scheduler.config
         )
-        # Disable VAE slicing for better quality
-        # self.pipe.enable_vae_slicing()
-        # Enable memory optimizations
         self.pipe.unet.set_attn_processor(AttnProcessor2_0())
         if self.device == "cuda":
             try:
                 self.pipe.enable_xformers_memory_efficient_attention()
@@ -189,8 +155,14 @@ class RetroArtConverter:
             except Exception as e:
                 print(f"⚠️ xformers not available: {e}")
         # Track controlnet configuration
         self.using_multiple_controlnets = isinstance(controlnets, list)
         print("\n=== MODEL STATUS ===")
         for model, loaded in self.models_loaded.items():
@@ -198,7 +170,15 @@ class RetroArtConverter:
             print(f"{model}: {status}")
         print("===================\n")
-        print("Model initialization complete!")
     def get_depth_map(self, image):
         """Generate depth map from input image"""
@@ -215,59 +195,73 @@ class RetroArtConverter:
         # Slight blur to reduce noise
         depth_normalized = cv2.GaussianBlur(depth_normalized, (3, 3), 0)
         depth_colored = cv2.cvtColor(depth_normalized, cv2.COLOR_GRAY2RGB)
         return Image.fromarray(depth_colored)
-    def calculate_target_size(self, original_width, original_height, preferred_resolution="896x1152"):
-        """Calculate target size based on recommended SDXL resolutions"""
-        # Recommended resolutions for this model
-        resolutions = {
-            "896x1152": (896, 1152),  # Portrait
-            "832x1216": (832, 1216),  # Tall portrait
-            "1152x896": (1152, 896),  # Landscape
-            "1216x832": (1216, 832),  # Wide landscape
-            "1024x1024": (1024, 1024) # Square
-        }
         aspect_ratio = original_width / original_height
-        # Choose resolution based on aspect ratio
-        if aspect_ratio < 0.85:  # Tall portrait
-            target_width, target_height = resolutions["832x1216"]
-        elif aspect_ratio < 1.15:  # Portrait to square
-            if aspect_ratio < 1.0:
-                target_width, target_height = resolutions["896x1152"]
-            else:
-                target_width, target_height = resolutions["1024x1024"]
-        elif aspect_ratio < 1.35:  # Landscape
-            target_width, target_height = resolutions["1152x896"]
-        else:  # Wide landscape
-            target_width, target_height = resolutions["1216x832"]
-        return target_width, target_height
     def generate_retro_art(
         self,
         input_image,
-        prompt="retro pixel art game, 16-bit style, vibrant colors",
-        negative_prompt="blurry, low quality, modern, photorealistic, 3d render",
-        num_inference_steps=12,  # LCM default: 12 steps
-        guidance_scale=1.5,  # LCM default: 1-1.5
-        controlnet_conditioning_scale=0.6,
-        lora_scale=0.85,
-        identity_scale=0.9,  # Stronger identity preservation
-        image_scale=0.5,  # Stronger InstantID influence
-        clip_skip=2  # SDXL clip skip
     ):
-        """Main generation function with LCM optimization"""
-        # Calculate target size
         original_width, original_height = input_image.size
-        target_width, target_height = self.calculate_target_size(original_width, original_height)
         print(f"Resizing from {original_width}x{original_height} to {target_width}x{target_height}")
         resized_image = input_image.resize((target_width, target_height), Image.LANCZOS)
         # Generate depth map
@@ -275,81 +269,59 @@ class RetroArtConverter:
         depth_image = self.get_depth_map(resized_image)
         depth_image = depth_image.resize((target_width, target_height), Image.LANCZOS)
-        # IMPORTANT: Add LORA trigger word
-        lora_trigger = "p1x3l4rt, pixel art"
-        if lora_trigger not in prompt:
-            prompt = f"{lora_trigger}, {prompt}"
-            print(f"Added LORA trigger word: {lora_trigger}")
-        # Check if using multiple controlnets
         using_multiple_controlnets = self.using_multiple_controlnets
-        # Extract face embeddings for InstantID
         face_embeddings = None
         has_detected_faces = False
-        if using_multiple_controlnets and self.face_app is not None:
-            print("Extracting face embeddings...")
             img_array = np.array(resized_image)
-            faces = self.face_app.get(img_array)
             if len(faces) > 0:
                 has_detected_faces = True
                 print(f"Detected {len(faces)} face(s)")
-                # Get the largest face
                 face = sorted(faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
-                # Extract embedding
                 face_embeddings = torch.from_numpy(face.normed_embedding).unsqueeze(0).to(self.device, dtype=self.dtype)
-                # Enhance prompt for better face preservation
-                prompt = f"detailed face, portrait, facial features, {prompt}"
-                print(f"Face detected, enhanced prompt for identity preservation")
         # Set LORA scale
         if hasattr(self.pipe, 'set_adapters') and self.models_loaded['lora']:
             try:
                 self.pipe.set_adapters(["retroart"], adapter_weights=[lora_scale])
             except Exception as e:
                 print(f"Could not set LORA scale: {e}")
-        # Enhanced negative prompt
-        full_negative = f"{negative_prompt}, worst quality, normal quality, lowres, watermark, text"
-        # Prepare pipeline kwargs
         pipe_kwargs = {
             "prompt": prompt,
-            "negative_prompt": full_negative,
             "num_inference_steps": num_inference_steps,
             "guidance_scale": guidance_scale,
             "width": target_width,
             "height": target_height,
-            "generator": torch.Generator(device=self.device).manual_seed(42),
-            "clip_skip": clip_skip
         }
-        # Configure control images based on setup
         if using_multiple_controlnets and has_detected_faces:
-            print(f"Using Depth + InstantID (identity_scale={identity_scale}, image_scale={image_scale})")
-            # For InstantID, use the original image
             control_images = [depth_image, resized_image]
             conditioning_scales = [controlnet_conditioning_scale, image_scale]
             pipe_kwargs["image"] = control_images
             pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
-            # Add face embeddings with stronger influence
             if face_embeddings is not None:
-                # Scale up the face embeddings for stronger identity
-                scaled_embeddings = face_embeddings * identity_scale
-                pipe_kwargs["cross_attention_kwargs"] = {
-                    "ip_adapter_image_embeds": [scaled_embeddings]
-                }
         elif using_multiple_controlnets and not has_detected_faces:
-            print("Multiple ControlNets but no faces detected")
             control_images = [depth_image, depth_image]
             conditioning_scales = [controlnet_conditioning_scale, 0.0]
@@ -362,16 +334,15 @@ class RetroArtConverter:
             pipe_kwargs["controlnet_conditioning_scale"] = controlnet_conditioning_scale
         # Generate
-        print(f"Generating with LCM: {num_inference_steps} steps, CFG {guidance_scale}")
         result = self.pipe(**pipe_kwargs)
         return result.images[0]
 # Initialize converter
-print("Initializing RetroArt Converter with LCM...")
 converter = RetroArtConverter()
-# Gradio interface
 @spaces.GPU
 def process_image(
     image,
@@ -381,7 +352,7 @@ def process_image(
     guidance_scale,
     controlnet_scale,
     lora_scale,
-    identity_scale,
     image_scale
 ):
     if image is None:
@@ -396,9 +367,8 @@ def process_image(
             guidance_scale=guidance_scale,
             controlnet_conditioning_scale=controlnet_scale,
             lora_scale=lora_scale,
-            identity_scale=identity_scale,
-            image_scale=image_scale,
-            clip_skip=2
         )
         return result
     except Exception as e:
@@ -407,100 +377,103 @@ def process_image(
         traceback.print_exc()
         raise gr.Error(f"Generation failed: {str(e)}")
-# Create Gradio interface
 with gr.Blocks(title="RetroArt Converter - LCM", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
-    # 🎮 RetroArt Converter - LCM Optimized
-    Convert images to retro pixel art using **LCM (Latent Consistency Model)** for fast generation!
-    **Key Features:**
-    - ⚡ Fast generation (12 steps)
-    - 🎨 LORA trigger: "p1x3l4rt, pixel art" (auto-added)
-    - 👤 Strong InstantID for face preservation
-    - 🎯 Optimized SDXL resolutions (896x1152, 832x1216)
-    - 📐 Clip Skip 2
     """)
     # Model status
     if converter.models_loaded:
-        status_md = "**Model Status:**\n"
-        status_md += f"- Custom Checkpoint: {'✓' if converter.models_loaded['custom_checkpoint'] else '✗ Fallback'}\n"
-        status_md += f"- Custom VAE: {'✓' if converter.models_loaded['custom_vae'] else '✗ Fallback'}\n"
-        status_md += f"- LORA: {'✓' if converter.models_loaded['lora'] else '✗ Fallback'}\n"
-        status_md += f"- InstantID: {'✓' if converter.models_loaded['instantid'] else '✗ Disabled'}\n"
-        gr.Markdown(status_md)
     with gr.Row():
         with gr.Column():
             input_image = gr.Image(label="Input Image", type="pil")
             prompt = gr.Textbox(
-                label='Prompt (trigger "p1x3l4rt, pixel art" auto-added)',
-                value="retro pixel art game, 16-bit style, vibrant colors, detailed",
-                lines=2,
-                info="Don't include trigger word - it's added automatically"
             )
             negative_prompt = gr.Textbox(
                 label="Negative Prompt",
-                value="blurry, low quality, modern, photorealistic, 3d render, ugly, distorted",
                 lines=2
             )
-            gr.Markdown("### ⚡ LCM Settings (Optimized)")
-            with gr.Row():
                 steps = gr.Slider(
                     minimum=4,
                     maximum=20,
                     value=12,
                     step=1,
-                    label="Steps (LCM recommended: 12)"
                 )
                 guidance_scale = gr.Slider(
-                    minimum=1.0,
                     maximum=3.0,
-                    value=1.5,
                     step=0.1,
-                    label="CFG Scale (LCM recommended: 1-1.5)"
                 )
-            with gr.Accordion("Advanced Settings", open=False):
                 controlnet_scale = gr.Slider(
-                    minimum=0,
-                    maximum=1.5,
-                    value=0.6,
                     step=0.05,
                     label="ControlNet Depth Scale"
                 )
                 lora_scale = gr.Slider(
-                    minimum=0,
-                    maximum=2,
-                    value=0.85,
                     step=0.05,
                     label="RetroArt LORA Scale"
                 )
-            gr.Markdown("### 👤 InstantID Settings (Stronger)")
-            with gr.Row():
-                identity_scale = gr.Slider(
-                    minimum=0.5,
-                    maximum=2.0,
-                    value=0.9,
                     step=0.1,
-                    label="Identity Strength (higher = more truthful)"
                 )
                 image_scale = gr.Slider(
                     minimum=0,
-                    maximum=1.5,
-                    value=0.5,
                     step=0.05,
-                    label="InstantID ControlNet Scale"
                 )
             generate_btn = gr.Button("🎨 Generate Retro Art", variant="primary", size="lg")
@@ -509,42 +482,29 @@ with gr.Blocks(title="RetroArt Converter - LCM", theme=gr.themes.Soft()) as demo
             output_image = gr.Image(label="Retro Art Output")
             gr.Markdown("""
-            ### ⚡ LCM Quick Tips:
-            - **12 steps** is optimal for LCM (faster than traditional 40-50)
-            - **CFG 1-1.5** works best (not 7-8 like traditional)
-            - LORA trigger **"p1x3l4rt, pixel art"** is auto-added
-            - For stronger identity: increase **Identity Strength** to 1.2-1.5
-            - Resolution auto-selected: 896x1152 (portrait) or 1152x896 (landscape)
-            ### 👤 Face Preservation:
-            - **Identity Strength 0.9-1.2**: Balanced retro + identity
-            - **Identity Strength 1.3-2.0**: Maximum face accuracy
-            - **Image Scale 0.5-0.8**: Strong InstantID influence
             """)
-    gr.Examples(
-        examples=[
-            [
-                "example_portrait.jpg",
-                "retro pixel art portrait, 16-bit game character, detailed face",
-                "blurry, modern, low quality",
-                12, 1.5, 0.6, 0.85, 0.9, 0.5
-            ],
-        ],
-        inputs=[
-            input_image, prompt, negative_prompt, steps, guidance_scale,
-            controlnet_scale, lora_scale, identity_scale, image_scale
-        ],
-        outputs=[output_image],
-        fn=process_image,
-        cache_examples=False
-    )
     generate_btn.click(
         fn=process_image,
         inputs=[
             input_image, prompt, negative_prompt, steps, guidance_scale,
-            controlnet_scale, lora_scale, identity_scale, image_scale
         ],
         outputs=[output_image]
     )

 import gradio as gr
 import torch
 from diffusers import (
+    StableDiffusionXLPipeline,
     StableDiffusionXLControlNetPipeline,
     ControlNetModel,
     AutoencoderKL,
+    LCMScheduler  # CORRECT SCHEDULER FOR LCM
 )
 from diffusers.models.attention_processor import AttnProcessor2_0
 from insightface.app import FaceAnalysis
 from PIL import Image
 import numpy as np
 import cv2
+from transformers import pipeline as transformers_pipeline
 from huggingface_hub import hf_hub_download
 import os
 device = "cuda" if torch.cuda.is_available() else "cpu"
 dtype = torch.float16 if device == "cuda" else torch.float32
+# LORA trigger word
+TRIGGER_WORD = "p1x3l4rt, pixel art"
 print(f"Using device: {device}")
 print(f"Loading models from: {MODEL_REPO}")
+print(f"LORA Trigger Word: {TRIGGER_WORD}")
 class RetroArtConverter:
     def __init__(self):
         self.dtype = dtype
         self.models_loaded = {
             'custom_checkpoint': False,
             'lora': False,
             'instantid': False
         }
             torch_dtype=self.dtype
         ).to(self.device)
+        # Load InstantID ControlNet (optional)
         print("Loading InstantID ControlNet...")
         try:
             self.controlnet_instantid = ControlNetModel.from_pretrained(
             self.controlnet_instantid = None
             self.instantid_enabled = False
         # Load depth estimator
         print("Loading depth estimator...")
         self.depth_estimator = transformers_pipeline(
             device=self.device if self.device == "cuda" else -1
         )
+        # Determine which controlnets to use
         if self.instantid_enabled and self.controlnet_instantid is not None:
             controlnets = [self.controlnet_depth, self.controlnet_instantid]
             print(f"Initializing with multiple ControlNets: Depth + InstantID")
             print(f"Initializing with single ControlNet: Depth only")
         # Load SDXL checkpoint from HuggingFace Hub
+        # NOTE: VAE is bundled in the checkpoint, don't load separately!
+        print("Loading SDXL checkpoint (horizon) with bundled VAE from HuggingFace Hub...")
         try:
             model_path = hf_hub_download(
                 repo_id=MODEL_REPO,
             self.pipe = StableDiffusionXLControlNetPipeline.from_single_file(
                 model_path,
                 controlnet=controlnets,
                 torch_dtype=self.dtype,
                 use_safetensors=True
             ).to(self.device)
+            print("✓ Custom checkpoint loaded successfully (VAE bundled)")
             self.models_loaded['custom_checkpoint'] = True
         except Exception as e:
             print(f"⚠️ Could not load custom checkpoint: {e}")
+            print("Using default SDXL base model")
             self.pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
                 "stabilityai/stable-diffusion-xl-base-1.0",
                 controlnet=controlnets,
                 torch_dtype=self.dtype,
                 use_safetensors=True
             ).to(self.device)
                 repo_type="model"
             )
             self.pipe.load_lora_weights(lora_path)
+            print(f"✓ LORA loaded successfully")
+            print(f"  Trigger word: '{TRIGGER_WORD}'")
             self.models_loaded['lora'] = True
         except Exception as e:
             print(f"⚠️ Could not load LORA: {e}")
             self.models_loaded['lora'] = False
+        # CRITICAL: Use LCM Scheduler for this model!
         print("Setting up LCM scheduler...")
         self.pipe.scheduler = LCMScheduler.from_config(
             self.pipe.scheduler.config
         )
+        # Enable attention optimizations
         self.pipe.unet.set_attn_processor(AttnProcessor2_0())
+        # Try to enable xformers
         if self.device == "cuda":
             try:
                 self.pipe.enable_xformers_memory_efficient_attention()
             except Exception as e:
                 print(f"⚠️ xformers not available: {e}")
+        # Set CLIP skip to 2
+        if hasattr(self.pipe, 'text_encoder'):
+            self.clip_skip = 2
+            print(f"✓ CLIP skip set to {self.clip_skip}")
         # Track controlnet configuration
         self.using_multiple_controlnets = isinstance(controlnets, list)
+        print(f"Pipeline initialized with {'multiple' if self.using_multiple_controlnets else 'single'} ControlNet(s)")
         print("\n=== MODEL STATUS ===")
         for model, loaded in self.models_loaded.items():
             print(f"{model}: {status}")
         print("===================\n")
+        print("✓ Model initialization complete!")
+        print("\n=== LCM CONFIGURATION ===")
+        print("Scheduler: LCM")
+        print("Recommended Steps: 12")
+        print("Recommended CFG: 1.0-1.5")
+        print("Recommended Resolution: 896x1152 or 832x1216")
+        print("CLIP Skip: 2")
+        print(f"LORA Trigger: '{TRIGGER_WORD}'")
+        print("=========================\n")
     def get_depth_map(self, image):
         """Generate depth map from input image"""
         # Slight blur to reduce noise
         depth_normalized = cv2.GaussianBlur(depth_normalized, (3, 3), 0)
+        # Convert to RGB
         depth_colored = cv2.cvtColor(depth_normalized, cv2.COLOR_GRAY2RGB)
         return Image.fromarray(depth_colored)
+    def calculate_optimal_size(self, original_width, original_height):
+        """Calculate optimal size from recommended resolutions"""
         aspect_ratio = original_width / original_height
+        # Recommended resolutions for this model
+        recommended_sizes = [
+            (896, 1152),  # Portrait
+            (1152, 896),  # Landscape
+            (832, 1216),  # Tall portrait
+            (1216, 832),  # Wide landscape
+            (1024, 1024)  # Square
+        ]
+        # Find closest matching aspect ratio
+        best_match = None
+        best_diff = float('inf')
+        for width, height in recommended_sizes:
+            rec_aspect = width / height
+            diff = abs(rec_aspect - aspect_ratio)
+            if diff < best_diff:
+                best_diff = diff
+                best_match = (width, height)
+        # Ensure dimensions are multiples of 8
+        width, height = best_match
+        width = (width // 8) * 8
+        height = (height // 8) * 8
+        return width, height
+    def add_trigger_word(self, prompt):
+        """Add trigger word to prompt if not present"""
+        if TRIGGER_WORD.lower() not in prompt.lower():
+            return f"{TRIGGER_WORD}, {prompt}"
+        return prompt
     def generate_retro_art(
         self,
         input_image,
+        prompt="retro game character, vibrant colors, detailed",
+        negative_prompt="blurry, low quality, ugly, distorted",
+        num_inference_steps=12,  # LCM recommended: 12 steps
+        guidance_scale=1.0,       # LCM recommended: 1.0-1.5
+        controlnet_conditioning_scale=0.8,
+        lora_scale=1.0,
+        identity_preservation=0.8,
+        image_scale=0.2
     ):
+        """Generate retro art with correct LCM settings"""
+        # Add trigger word to prompt
+        prompt = self.add_trigger_word(prompt)
+        # Calculate optimal size
         original_width, original_height = input_image.size
+        target_width, target_height = self.calculate_optimal_size(original_width, original_height)
         print(f"Resizing from {original_width}x{original_height} to {target_width}x{target_height}")
+        print(f"Prompt: {prompt}")
+        # Resize with high quality
         resized_image = input_image.resize((target_width, target_height), Image.LANCZOS)
         # Generate depth map
         depth_image = self.get_depth_map(resized_image)
         depth_image = depth_image.resize((target_width, target_height), Image.LANCZOS)
+        # Handle face detection for InstantID
         using_multiple_controlnets = self.using_multiple_controlnets
         face_embeddings = None
         has_detected_faces = False
+        if using_multiple_controlnets:
+            print("Checking for faces...")
             img_array = np.array(resized_image)
+            faces = self.face_app.get(img_array) if self.face_app is not None else []
             if len(faces) > 0:
                 has_detected_faces = True
                 print(f"Detected {len(faces)} face(s)")
                 face = sorted(faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
                 face_embeddings = torch.from_numpy(face.normed_embedding).unsqueeze(0).to(self.device, dtype=self.dtype)
         # Set LORA scale
         if hasattr(self.pipe, 'set_adapters') and self.models_loaded['lora']:
             try:
                 self.pipe.set_adapters(["retroart"], adapter_weights=[lora_scale])
+                print(f"LORA scale: {lora_scale}")
             except Exception as e:
                 print(f"Could not set LORA scale: {e}")
+        # Prepare generation kwargs
         pipe_kwargs = {
             "prompt": prompt,
+            "negative_prompt": negative_prompt,
             "num_inference_steps": num_inference_steps,
             "guidance_scale": guidance_scale,
             "width": target_width,
             "height": target_height,
+            "generator": torch.Generator(device=self.device).manual_seed(42)
         }
+        # Add CLIP skip
+        if hasattr(self.pipe, 'text_encoder'):
+            pipe_kwargs["clip_skip"] = 2
+        # Configure ControlNet inputs
         if using_multiple_controlnets and has_detected_faces:
+            print("Using Depth + InstantID ControlNets")
             control_images = [depth_image, resized_image]
             conditioning_scales = [controlnet_conditioning_scale, image_scale]
             pipe_kwargs["image"] = control_images
             pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
             if face_embeddings is not None:
+                pipe_kwargs["cross_attention_kwargs"] = {"ip_adapter_image_embeds": [face_embeddings]}
         elif using_multiple_controlnets and not has_detected_faces:
+            print("Multiple ControlNets available but no faces detected")
             control_images = [depth_image, depth_image]
             conditioning_scales = [controlnet_conditioning_scale, 0.0]
             pipe_kwargs["controlnet_conditioning_scale"] = controlnet_conditioning_scale
         # Generate
+        print(f"Generating with LCM: Steps={num_inference_steps}, CFG={guidance_scale}")
         result = self.pipe(**pipe_kwargs)
         return result.images[0]
 # Initialize converter
+print("Initializing RetroArt Converter...")
 converter = RetroArtConverter()
 @spaces.GPU
 def process_image(
     image,
     guidance_scale,
     controlnet_scale,
     lora_scale,
+    identity_preservation,
     image_scale
 ):
     if image is None:
             guidance_scale=guidance_scale,
             controlnet_conditioning_scale=controlnet_scale,
             lora_scale=lora_scale,
+            identity_preservation=identity_preservation,
+            image_scale=image_scale
         )
         return result
     except Exception as e:
         traceback.print_exc()
         raise gr.Error(f"Generation failed: {str(e)}")
+# Gradio UI
 with gr.Blocks(title="RetroArt Converter - LCM", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
+    # 🎮 RetroArt Converter (LCM Optimized)
+    Convert images into retro pixel art style using LCM (Latent Consistency Model) for fast, high-quality generation!
+    **✨ Features:**
+    - ⚡ Ultra-fast generation (12 steps!)
+    - 🎨 Custom pixel art LORA with trigger word: `p1x3l4rt, pixel art`
+    - 📐 Optimized resolutions: 896x1152 / 832x1216
+    - 🖼️ Bundled VAE for authentic retro look
+    - 🎯 CLIP Skip 2 for better style
     """)
     # Model status
     if converter.models_loaded:
+        status_text = "**📦 Loaded Models:**\n"
+        status_text += f"- Custom Checkpoint (Horizon): {'✓ Loaded' if converter.models_loaded['custom_checkpoint'] else '✗ Using SDXL base'}\n"
+        status_text += f"- LORA (RetroArt): {'✓ Loaded' if converter.models_loaded['lora'] else '✗ Disabled'}\n"
+        status_text += f"- InstantID: {'✓ Loaded' if converter.models_loaded['instantid'] else '✗ Disabled'}\n"
+        gr.Markdown(status_text)
+    gr.Markdown(f"""
+    **⚙️ LCM Configuration:**
+    - Scheduler: LCM (Latent Consistency Model)
+    - Recommended Steps: **12** (fast!)
+    - Recommended CFG: **1.0-1.5** (lower than normal)
+    - CLIP Skip: **2**
+    - LORA Trigger: `{TRIGGER_WORD}` (auto-added)
+    """)
     with gr.Row():
         with gr.Column():
             input_image = gr.Image(label="Input Image", type="pil")
             prompt = gr.Textbox(
+                label="Prompt (trigger word auto-added)",
+                value="retro game character, vibrant colors, highly detailed",
+                lines=3,
+                info=f"'{TRIGGER_WORD}' will be automatically added"
             )
             negative_prompt = gr.Textbox(
                 label="Negative Prompt",
+                value="blurry, low quality, ugly, distorted, deformed, bad anatomy",
                 lines=2
             )
+            with gr.Accordion("⚡ LCM Settings (Optimized)", open=True):
                 steps = gr.Slider(
                     minimum=4,
                     maximum=20,
                     value=12,
                     step=1,
+                    label="Inference Steps (LCM works great with just 12!)"
                 )
                 guidance_scale = gr.Slider(
+                    minimum=0.5,
                     maximum=3.0,
+                    value=1.0,
                     step=0.1,
+                    label="Guidance Scale (CFG) - LCM uses 1.0-1.5"
                 )
                 controlnet_scale = gr.Slider(
+                    minimum=0.3,
+                    maximum=1.2,
+                    value=0.8,
                     step=0.05,
                     label="ControlNet Depth Scale"
                 )
                 lora_scale = gr.Slider(
+                    minimum=0.5,
+                    maximum=1.5,
+                    value=1.0,
                     step=0.05,
                     label="RetroArt LORA Scale"
                 )
+            with gr.Accordion("🎭 Identity Settings (for portraits)", open=False):
+                identity_preservation = gr.Slider(
+                    minimum=0,
+                    maximum=1.5,
+                    value=0.8,
                     step=0.1,
+                    label="Identity Preservation"
                 )
                 image_scale = gr.Slider(
                     minimum=0,
+                    maximum=1.0,
+                    value=0.2,
                     step=0.05,
+                    label="InstantID Image Scale"
                 )
             generate_btn = gr.Button("🎨 Generate Retro Art", variant="primary", size="lg")
             output_image = gr.Image(label="Retro Art Output")
             gr.Markdown("""
+            ### 💡 Tips for Best Results:
+            **For LCM Models:**
+            - ✅ Use **12 steps** (already optimized!)
+            - ✅ Keep CFG at **1.0-1.5** (not 7.5!)
+            - ✅ LORA trigger word is **auto-added**
+            - ✅ Resolution auto-optimized to 896x1152 or 832x1216
+            **For Quality:**
+            - Use high-resolution input images
+            - Be specific in prompts: "16-bit game character" vs "character"
+            - Adjust ControlNet scale: lower = more creative, higher = more faithful
+            **For Style:**
+            - Increase LORA scale (1.0-1.5) for stronger pixel art effect
+            - Try prompts like: "SNES style", "16-bit RPG", "Game Boy advance style"
             """)
     generate_btn.click(
         fn=process_image,
         inputs=[
             input_image, prompt, negative_prompt, steps, guidance_scale,
+            controlnet_scale, lora_scale, identity_preservation, image_scale
         ],
         outputs=[output_image]
     )