pixagram-dev

Runtime error

App Files Files Community

primerz commited on Oct 30, 2025

Commit

62ea67e

verified ·

1 Parent(s): dc38476

Upload 2 files

Browse files

Files changed (2) hide show

generator.py +35 -81
models.py +30 -29

generator.py CHANGED Viewed

@@ -18,7 +18,7 @@ from utils import (
 )
 from models import (
     load_face_analysis, load_depth_detector, load_controlnets, load_image_encoder,
-    load_sdxl_pipeline, load_lora, setup_ip_adapter, setup_compel,
     setup_scheduler, optimize_pipeline, load_caption_model, set_clip_skip
 )
@@ -33,16 +33,16 @@ class RetroArtConverter:
             'custom_checkpoint': False,
             'lora': False,
             'instantid': False,
-            'zoe_depth': False,
             'ip_adapter': False
         }
         # Initialize face analysis
         self.face_app, self.face_detection_enabled = load_face_analysis()
-        # Load Zoe Depth detector
-        self.zoe_depth, zoe_success = load_depth_detector()
-        self.models_loaded['zoe_depth'] = zoe_success
         # Load ControlNets
         controlnet_depth, self.controlnet_instantid, instantid_success = load_controlnets()
@@ -82,7 +82,12 @@ class RetroArtConverter:
             self.image_proj_model = None
         # Setup Compel
-        self.compel, self.use_compel = setup_compel(self.pipe)
         # Setup LCM scheduler
         setup_scheduler(self.pipe)
@@ -146,23 +151,29 @@ class RetroArtConverter:
         print("============================\n")
     def get_depth_map(self, image):
-        """Generate depth map using Zoe Depth"""
-        if self.zoe_depth is not None:
             try:
                 # Ensure RGB mode
                 if image.mode != 'RGB':
                     image = image.convert('RGB')
-                # CRITICAL: ZoeDetector must be called with torch.no_grad()
                 with torch.no_grad():
-                    depth_image = self.zoe_depth(image)
-                # Use .width and .height properties (always Python ints, not numpy types)
-                print(f"[DEPTH] Zoe depth map generated: {image.width}x{image.height}")
                 return depth_image
             except Exception as e:
-                print(f"[DEPTH] ZoeDetector failed ({e}), falling back to grayscale depth")
                 gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
                 depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
                 return Image.fromarray(depth_colored)
@@ -444,7 +455,7 @@ class RetroArtConverter:
         resized_image = input_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
         # Generate depth map
-        print("Generating Zoe depth map...")
         depth_image = self.get_depth_map(resized_image)
         if depth_image.size != (target_width, target_height):
             depth_image = depth_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
@@ -528,47 +539,20 @@ class RetroArtConverter:
         # Set LORA scale
         if hasattr(self.pipe, 'set_adapters') and self.models_loaded['lora']:
             try:
-                # Get list of actually available adapters
-                available_adapters = []
-                if hasattr(self.pipe, 'get_list_adapters'):
-                    try:
-                        available_adapters = self.pipe.get_list_adapters()
-                        print(f"[LORA] Available adapters: {available_adapters}")
-                    except:
-                        pass
-                if available_adapters:
-                    # Use first available adapter (could be 'retroart', 'default_0', etc.)
-                    adapter_name = available_adapters[0]
-                    self.pipe.set_adapters([adapter_name], adapter_weights=[lora_scale])
-                    print(f"[LORA] Using adapter '{adapter_name}' with scale: {lora_scale}")
-                else:
-                    # No get_list_adapters or empty list - try common names
-                    for name in ["retroart", "default", "default_0"]:
-                        try:
-                            self.pipe.set_adapters([name], adapter_weights=[lora_scale])
-                            print(f"[LORA] Using adapter '{name}' with scale: {lora_scale}")
-                            break
-                        except:
-                            continue
-                    else:
-                        print(f"[WARNING] Could not set LORA adapter scale")
             except Exception as e:
                 print(f"[WARNING] LORA set_adapters failed: {e}")
-                # Try alternative method - fuse_lora
                 try:
                     if hasattr(self.pipe, 'fuse_lora'):
                         self.pipe.fuse_lora(lora_scale=lora_scale)
                         print(f"[LORA] Fused with scale: {lora_scale}")
                 except Exception as e2:
-                    print(f"[WARNING] LORA fuse also failed: {e2}")
-                    # Last resort - set scale directly if possible
-                    try:
-                        self.pipe.set_lora_scale(lora_scale)
-                        print(f"[LORA] Set scale directly: {lora_scale}")
-                    except:
-                        print(f"[INFO] LORA will use default scale")
         # Prepare generation kwargs
         pipe_kwargs = {
@@ -590,40 +574,10 @@ class RetroArtConverter:
         pipe_kwargs["generator"] = generator
-        # Use Compel for prompt encoding if available
-        compel_success = False
-        if self.use_compel and self.compel is not None:
-            try:
-                print("Encoding prompts with Compel...")
-                # Encode prompts (returns tuple: conditioning, pooled)
-                conditioning, pooled = self.compel(prompt)
-                # Encode negative prompt if provided
-                if negative_prompt:
-                    negative_conditioning, negative_pooled = self.compel(negative_prompt)
-                else:
-                    # Empty negative prompt
-                    negative_conditioning, negative_pooled = self.compel("")
-                # DON'T pad - pass embeddings directly (pipeline handles different lengths)
-                pipe_kwargs["prompt_embeds"] = conditioning
-                pipe_kwargs["pooled_prompt_embeds"] = pooled
-                pipe_kwargs["negative_prompt_embeds"] = negative_conditioning
-                pipe_kwargs["negative_pooled_prompt_embeds"] = negative_pooled
-                compel_success = True
-                print(f"[OK] Compel encoded: pos={conditioning.shape}, neg={negative_conditioning.shape}")
-            except Exception as e:
-                print(f"[COMPEL] Failed: {e}")
-                print("[COMPEL] Falling back to standard encoding")
-                compel_success = False
-        # Use standard prompts if Compel failed or not available
-        if not compel_success:
-            pipe_kwargs["prompt"] = prompt
-            pipe_kwargs["negative_prompt"] = negative_prompt
         # Add CLIP skip
         if hasattr(self.pipe, 'text_encoder'):

 )
 from models import (
     load_face_analysis, load_depth_detector, load_controlnets, load_image_encoder,
+    load_sdxl_pipeline, load_lora, setup_ip_adapter,
     setup_scheduler, optimize_pipeline, load_caption_model, set_clip_skip
 )
             'custom_checkpoint': False,
             'lora': False,
             'instantid': False,
+            'leres_depth': False,
             'ip_adapter': False
         }
         # Initialize face analysis
         self.face_app, self.face_detection_enabled = load_face_analysis()
+        # Load Leres Depth detector
+        self.leres_depth, leres_success = load_depth_detector()
+        self.models_loaded['leres_depth'] = leres_success
         # Load ControlNets
         controlnet_depth, self.controlnet_instantid, instantid_success = load_controlnets()
             self.image_proj_model = None
         # Setup Compel
+        # TEMPORARILY DISABLED - SDXL token mismatch issue
+        # Skip Compel - use native SDXL encoding instead
+        self.compel = None
+        self.use_compel = False
+        print("  [INFO] Using native SDXL prompt encoding (more reliable than Compel)")
+        print("  [INFO] Compel temporarily disabled - using standard prompts")
         # Setup LCM scheduler
         setup_scheduler(self.pipe)
         print("============================\n")
     def get_depth_map(self, image):
+        """Generate depth map using Leres Depth for better quality"""
+        if self.leres_depth is not None:
             try:
                 # Ensure RGB mode
                 if image.mode != 'RGB':
                     image = image.convert('RGB')
+                # Get original dimensions
+                orig_width, orig_height = image.size
+                # Generate depth map with Leres (better quality than Zoe)
                 with torch.no_grad():
+                    depth_image = self.leres_depth(image)
+                # Ensure output matches original size
+                if depth_image.size != (orig_width, orig_height):
+                    depth_image = depth_image.resize((orig_width, orig_height), Image.LANCZOS)
+                print(f"[DEPTH] Leres depth map generated: {orig_width}x{orig_height}")
                 return depth_image
             except Exception as e:
+                print(f"[DEPTH] LeresDetector failed ({e}), falling back to grayscale depth")
                 gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
                 depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
                 return Image.fromarray(depth_colored)
         resized_image = input_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
         # Generate depth map
+        print("Generating Leres depth map...")
         depth_image = self.get_depth_map(resized_image)
         if depth_image.size != (target_width, target_height):
             depth_image = depth_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
         # Set LORA scale
         if hasattr(self.pipe, 'set_adapters') and self.models_loaded['lora']:
             try:
+                # For SDXL with LORA, use set_adapters with proper names
+                adapter_names = ["retroart"]  # The adapter name from loading
+                self.pipe.set_adapters(adapter_names, adapter_weights=[lora_scale])
+                print(f"[LORA] Set adapter 'retroart' with scale: {lora_scale}")
             except Exception as e:
                 print(f"[WARNING] LORA set_adapters failed: {e}")
+                # Try fuse_lora as fallback
                 try:
                     if hasattr(self.pipe, 'fuse_lora'):
                         self.pipe.fuse_lora(lora_scale=lora_scale)
                         print(f"[LORA] Fused with scale: {lora_scale}")
                 except Exception as e2:
+                    print(f"[INFO] LORA using default scale")
         # Prepare generation kwargs
         pipe_kwargs = {
         pipe_kwargs["generator"] = generator
+        # Use native SDXL prompt encoding (more reliable than Compel)
+        print("Using native SDXL prompt encoding...")
+        pipe_kwargs["prompt"] = prompt
+        pipe_kwargs["negative_prompt"] = negative_prompt if negative_prompt and negative_prompt.strip() else None
         # Add CLIP skip
         if hasattr(self.pipe, 'text_encoder'):

models.py CHANGED Viewed

@@ -13,9 +13,9 @@ from diffusers import (
 from diffusers.models.attention_processor import AttnProcessor2_0
 from transformers import CLIPVisionModelWithProjection
 from insightface.app import FaceAnalysis
-from controlnet_aux import ZoeDetector
 from huggingface_hub import hf_hub_download
-from compel import Compel, ReturnedEmbeddingsType
 # Use reference implementation's attention processor
 from attention_processor import IPAttnProcessor2_0, AttnProcessor
@@ -82,15 +82,15 @@ def load_face_analysis():
 def load_depth_detector():
-    """Load Zoe Depth detector."""
-    print("Loading Zoe Depth detector...")
     try:
-        zoe_depth = ZoeDetector.from_pretrained("lllyasviel/Annotators")
-        zoe_depth.to(device)
-        print("  [OK] Zoe Depth loaded successfully")
-        return zoe_depth, True
     except Exception as e:
-        print(f"  [WARNING] Zoe Depth not available: {e}")
         return None, False
@@ -160,12 +160,12 @@ def load_sdxl_pipeline(controlnets):
 def load_lora(pipe):
-    """Load LORA from HuggingFace Hub with specific adapter name."""
     print("Loading LORA (retroart) from HuggingFace Hub...")
     try:
         lora_path = download_model_with_retry(MODEL_REPO, MODEL_FILES['lora'])
-        pipe.load_lora_weights(lora_path, adapter_name="retroart")
-        print(f"  [OK] LORA loaded successfully as 'retroart' adapter")
         return True
     except Exception as e:
         print(f"  [WARNING] Could not load LORA: {e}")
@@ -276,7 +276,7 @@ def setup_ip_adapter(pipe, image_encoder):
         print("  [OK] IP-Adapter fully loaded with InstantID architecture")
         print(f"  - Resampler: 4 layers, 20 heads, 16 output tokens")
-        print(f"  - Face embeddings: 512D â†’ 16x2048D")
         return image_proj_model, True
@@ -287,22 +287,23 @@ def setup_ip_adapter(pipe, image_encoder):
         return None, False
-def setup_compel(pipe):
-    """Setup Compel for better SDXL prompt handling with error handling."""
-    print("Setting up Compel for enhanced prompt processing...")
-    try:
-        compel = Compel(
-            tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
-            text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
-            returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
-            requires_pooled=[False, True],
-            truncate_long_prompts=False  # Don't truncate, let us handle length mismatches
-        )
-        print("  [OK] Compel loaded successfully")
-        return compel, True
-    except Exception as e:
-        print(f"  [WARNING] Compel not available: {e}")
-        return None, False
 def setup_scheduler(pipe):

 from diffusers.models.attention_processor import AttnProcessor2_0
 from transformers import CLIPVisionModelWithProjection
 from insightface.app import FaceAnalysis
+from controlnet_aux import LeresDetector
 from huggingface_hub import hf_hub_download
+# removed compel - using native SDXL encoding
 # Use reference implementation's attention processor
 from attention_processor import IPAttnProcessor2_0, AttnProcessor
 def load_depth_detector():
+    """Load Leres Depth detector for better quality."""
+    print("Loading Leres Depth detector...")
     try:
+        leres_depth = LeresDetector.from_pretrained("lllyasviel/Annotators")
+        leres_depth.to(device)
+        print("  [OK] Leres Depth loaded successfully")
+        return leres_depth, True
     except Exception as e:
+        print(f"  [WARNING] Leres Depth not available: {e}")
         return None, False
 def load_lora(pipe):
+    """Load LORA from HuggingFace Hub."""
     print("Loading LORA (retroart) from HuggingFace Hub...")
     try:
         lora_path = download_model_with_retry(MODEL_REPO, MODEL_FILES['lora'])
+        pipe.load_lora_weights(lora_path)
+        print(f"  [OK] LORA loaded successfully")
         return True
     except Exception as e:
         print(f"  [WARNING] Could not load LORA: {e}")
         print("  [OK] IP-Adapter fully loaded with InstantID architecture")
         print(f"  - Resampler: 4 layers, 20 heads, 16 output tokens")
+        print(f"  - Face embeddings: 512D Ã¢â€ â€™ 16x2048D")
         return image_proj_model, True
         return None, False
+# Removed setup_compel - using native SDXL encoding instead
+# def setup_compel(pipe):
+#     """Setup Compel for better SDXL prompt handling."""
+#     print("Setting up Compel for enhanced prompt processing...")
+#     try:
+#         compel = Compel(
+#             tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
+#             text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
+#             returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
+#             requires_pooled=[False, True],
+#             truncate_long_prompts=False  # Important for SDXL compatibility
+#         )
+#         print("  [OK] Compel loaded successfully")
+#         return compel, True
+#     except Exception as e:
+#         print(f"  [WARNING] Compel not available: {e}")
+#         return None, False
 def setup_scheduler(pipe):