pixagram-dev

Runtime error

App Files Files Community

primerz commited on Oct 30

Commit

b2a3100

verified ·

1 Parent(s): 99d58c2

Upload 2 files

Browse files

Files changed (2) hide show

generator.py +31 -31
models.py +23 -6

generator.py CHANGED Viewed

@@ -153,7 +153,6 @@ class RetroArtConverter:
                         image = image.convert('RGB')
                     orig_width, orig_height = image.size
-                    # **FIX 1 START: Ensure all size variables are standard Python int**
                     orig_width = int(orig_width)
                     orig_height = int(orig_height)
@@ -164,25 +163,23 @@ class RetroArtConverter:
                     target_width = int(max(64, target_width))
                     target_height = int(max(64, target_height))
-                    # Create an explicit tuple of standard ints
-                    size_for_depth = (int(target_width), int(target_height))
-                    # Always resize using the explicit int tuple to avoid numpy.int64 issues
-                    # This replaces the conditional resize
-                    image_for_depth = image.resize(size_for_depth, Image.LANCZOS)
                     if target_width != orig_width or target_height != orig_height:
                         print(f"[DEPTH] Resized for ZoeDetector: {orig_width}x{orig_height} -> {target_width}x{target_height}")
                     # FIXED: Add torch.no_grad() wrapper
                     with torch.no_grad():
-                        depth_image = self.zoe_depth(image_for_depth) # Use the correctly-typed resized image
                     depth_width, depth_height = depth_image.size
-                    if depth_width != orig_width or depth_height != orig_height:
-                        # Resize back to the original size that get_depth_map received
-                        depth_image = depth_image.resize((int(orig_width), int(orig_height)), Image.LANCZOS)
-                    # **FIX 1 END**
                     print(f"[DEPTH] Zoe depth map generated: {orig_width}x{orig_height}")
                     return depth_image
@@ -201,10 +198,6 @@ class RetroArtConverter:
     def add_trigger_word(self, prompt):
         """Add trigger word to prompt if not present"""
         if TRIGGER_WORD.lower() not in prompt.lower():
-            # **FIX 3 START: Handle empty or blank prompt**
-            if not prompt or not prompt.strip():
-                return TRIGGER_WORD
-            # **FIX 3 END**
             return f"{TRIGGER_WORD}, {prompt}"
         return prompt
@@ -450,11 +443,6 @@ class RetroArtConverter:
         prompt = sanitize_text(prompt)
         negative_prompt = sanitize_text(negative_prompt)
-        # **FIX 3 START: Ensure blank negative prompts are empty strings for Compel**
-        if not negative_prompt or not negative_prompt.strip():
-            negative_prompt = ""
-        # **FIX 3 END**
         # Apply parameter validation
         if consistency_mode:
             print("\n[CONSISTENCY] Validating and adjusting parameters...")
@@ -464,7 +452,7 @@ class RetroArtConverter:
                     identity_control_scale, depth_control_scale, consistency_mode
                 )
-        # Add trigger word (handles blank prompt fix)
         prompt = self.add_trigger_word(prompt)
         # Calculate optimal size with flexible aspect ratio support
@@ -495,7 +483,11 @@ class RetroArtConverter:
         if using_multiple_controlnets and self.face_app is not None:
             print("Detecting faces and extracting keypoints...")
             img_array = cv2.cvtColor(np.array(resized_image), cv2.COLOR_RGB2BGR)
-            faces = self.face_app.get(img_array)
             if len(faces) > 0:
                 has_detected_faces = True
@@ -563,7 +555,8 @@ class RetroArtConverter:
         # Set LORA scale
         if hasattr(self.pipe, 'set_adapters') and self.models_loaded['lora']:
             try:
-                self.pipe.set_adapters(["retroart"], adapter_weights=[lora_scale])
                 print(f"LORA scale: {lora_scale}")
             except Exception as e:
                 print(f"Could not set LORA scale: {e}")
@@ -595,14 +588,21 @@ class RetroArtConverter:
                 conditioning = self.compel(prompt)
                 negative_conditioning = self.compel(negative_prompt)
-                pipe_kwargs["prompt_embeds"] = conditioning[0]
-                pipe_kwargs["pooled_prompt_embeds"] = conditioning[1]
-                pipe_kwargs["negative_prompt_embeds"] = negative_conditioning[0]
-                pipe_kwargs["negative_pooled_prompt_embeds"] = negative_conditioning[1]
                 print("[OK] Using Compel-encoded prompts")
             except Exception as e:
-                print(f"Compel encoding failed, using standard prompts: {e}")
                 pipe_kwargs["prompt"] = prompt
                 pipe_kwargs["negative_prompt"] = negative_prompt
         else:
@@ -636,7 +636,7 @@ class RetroArtConverter:
                     # Reshape for Resampler: [1, 1, 512]
                     face_emb_tensor = face_emb_tensor.reshape(1, -1, 512)
-                    # Pass through Resampler: [1, 1, 512] â†’ [1, 16, 2048]
                     face_proj_embeds = self.image_proj_model(face_emb_tensor)
                     # Scale with identity preservation

                         image = image.convert('RGB')
                     orig_width, orig_height = image.size
                     orig_width = int(orig_width)
                     orig_height = int(orig_height)
                     target_width = int(max(64, target_width))
                     target_height = int(max(64, target_height))
                     if target_width != orig_width or target_height != orig_height:
+                        image = image.resize((int(target_width), int(target_height)), Image.LANCZOS)
                         print(f"[DEPTH] Resized for ZoeDetector: {orig_width}x{orig_height} -> {target_width}x{target_height}")
                     # FIXED: Add torch.no_grad() wrapper
                     with torch.no_grad():
+                        depth_image = self.zoe_depth(image)
                     depth_width, depth_height = depth_image.size
+                    # Convert numpy int64 to Python int to avoid PIL errors
+                    depth_width = int(depth_width)
+                    depth_height = int(depth_height)
+                    orig_width_int = int(orig_width)
+                    orig_height_int = int(orig_height)
+                    if depth_width != orig_width_int or depth_height != orig_height_int:
+                        depth_image = depth_image.resize((orig_width_int, orig_height_int), Image.LANCZOS)
                     print(f"[DEPTH] Zoe depth map generated: {orig_width}x{orig_height}")
                     return depth_image
     def add_trigger_word(self, prompt):
         """Add trigger word to prompt if not present"""
         if TRIGGER_WORD.lower() not in prompt.lower():
             return f"{TRIGGER_WORD}, {prompt}"
         return prompt
         prompt = sanitize_text(prompt)
         negative_prompt = sanitize_text(negative_prompt)
         # Apply parameter validation
         if consistency_mode:
             print("\n[CONSISTENCY] Validating and adjusting parameters...")
                     identity_control_scale, depth_control_scale, consistency_mode
                 )
+        # Add trigger word
         prompt = self.add_trigger_word(prompt)
         # Calculate optimal size with flexible aspect ratio support
         if using_multiple_controlnets and self.face_app is not None:
             print("Detecting faces and extracting keypoints...")
             img_array = cv2.cvtColor(np.array(resized_image), cv2.COLOR_RGB2BGR)
+            try:
+                faces = self.face_app.get(img_array)
+            except Exception as e:
+                print(f"[WARNING] Face detection failed: {e}")
+                faces = []
             if len(faces) > 0:
                 has_detected_faces = True
         # Set LORA scale
         if hasattr(self.pipe, 'set_adapters') and self.models_loaded['lora']:
             try:
+                # Use correct adapter name - peft uses 'default_0' for single adapters
+                self.pipe.set_adapters(["default_0"], adapter_weights=[lora_scale])
                 print(f"LORA scale: {lora_scale}")
             except Exception as e:
                 print(f"Could not set LORA scale: {e}")
                 conditioning = self.compel(prompt)
                 negative_conditioning = self.compel(negative_prompt)
+                # Handle potential token length mismatches
+                prompt_embeds_0 = conditioning[0]
+                prompt_embeds_1 = conditioning[1]
+                neg_embeds_0 = negative_conditioning[0]
+                neg_embeds_1 = negative_conditioning[1]
+                # Ensure consistent shapes if needed
+                pipe_kwargs["prompt_embeds"] = prompt_embeds_0
+                pipe_kwargs["pooled_prompt_embeds"] = prompt_embeds_1
+                pipe_kwargs["negative_prompt_embeds"] = neg_embeds_0
+                pipe_kwargs["negative_pooled_prompt_embeds"] = neg_embeds_1
                 print("[OK] Using Compel-encoded prompts")
             except Exception as e:
+                print(f"Compel encoding failed ({e}), falling back to standard prompts")
                 pipe_kwargs["prompt"] = prompt
                 pipe_kwargs["negative_prompt"] = negative_prompt
         else:
                     # Reshape for Resampler: [1, 1, 512]
                     face_emb_tensor = face_emb_tensor.reshape(1, -1, 512)
+                    # Pass through Resampler: [1, 1, 512] Ã¢â€ â€™ [1, 16, 2048]
                     face_proj_embeds = self.image_proj_model(face_emb_tensor)
                     # Scale with identity preservation

models.py CHANGED Viewed

@@ -164,8 +164,7 @@ def load_lora(pipe):
     print("Loading LORA (retroart) from HuggingFace Hub...")
     try:
         lora_path = download_model_with_retry(MODEL_REPO, MODEL_FILES['lora'])
-        # **FIX 2: Add adapter_name="retroart"**
-        pipe.load_lora_weights(lora_path, adapter_name="retroart")
         print(f"  [OK] LORA loaded successfully")
         return True
     except Exception as e:
@@ -277,7 +276,7 @@ def setup_ip_adapter(pipe, image_encoder):
         print("  [OK] IP-Adapter fully loaded with InstantID architecture")
         print(f"  - Resampler: 4 layers, 20 heads, 16 output tokens")
-        print(f"  - Face embeddings: 512D â†’ 16x2048D")
         return image_proj_model, True
@@ -289,19 +288,37 @@ def setup_ip_adapter(pipe, image_encoder):
 def setup_compel(pipe):
-    """Setup Compel for better SDXL prompt handling."""
     print("Setting up Compel for enhanced prompt processing...")
     try:
         compel = Compel(
             tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
             text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
             returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
-            requires_pooled=[False, True]
         )
-        print("  [OK] Compel loaded successfully")
         return compel, True
     except Exception as e:
         print(f"  [WARNING] Compel not available: {e}")
         return None, False

     print("Loading LORA (retroart) from HuggingFace Hub...")
     try:
         lora_path = download_model_with_retry(MODEL_REPO, MODEL_FILES['lora'])
+        pipe.load_lora_weights(lora_path)
         print(f"  [OK] LORA loaded successfully")
         return True
     except Exception as e:
         print("  [OK] IP-Adapter fully loaded with InstantID architecture")
         print(f"  - Resampler: 4 layers, 20 heads, 16 output tokens")
+        print(f"  - Face embeddings: 512D Ã¢â€ â€™ 16x2048D")
         return image_proj_model, True
 def setup_compel(pipe):
+    """Setup Compel for better SDXL prompt handling with robust error handling."""
     print("Setting up Compel for enhanced prompt processing...")
     try:
+        # FIXED: Handle SDXL dual tokenizer setup more carefully
         compel = Compel(
             tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
             text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
             returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
+            requires_pooled=[False, True],
+            padding_get_round_multiple=False  # Disable padding that might cause mismatches
         )
+        print("  [OK] Compel loaded successfully with SDXL dual tokenizers")
         return compel, True
+    except TypeError:
+        # Fallback for older Compel versions without padding parameter
+        try:
+            compel = Compel(
+                tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
+                text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
+                returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
+                requires_pooled=[False, True]
+            )
+            print("  [OK] Compel loaded (standard config)")
+            return compel, True
+        except Exception as e:
+            print(f"  [WARNING] Compel not available: {e}")
+            print("  [INFO] Will use standard prompt encoding instead")
+            return None, False
     except Exception as e:
         print(f"  [WARNING] Compel not available: {e}")
+        print("  [INFO] Will use standard prompt encoding instead")
         return None, False