pixagram-dev

Runtime error

App Files Files Community

primerz commited on Nov 1

Commit

d36a1ac

verified ·

1 Parent(s): 8123c81

Update generator.py

Browse files

Files changed (1) hide show

generator.py +45 -38

generator.py CHANGED Viewed

@@ -762,8 +762,7 @@ class RetroArtConverter:
         pipe_kwargs["generator"] = generator
-        # --- START FIX: Correct Compel batching and slicing ---
-        # This fixes the "93 vs 77" error
         if self.use_compel and self.compel is not None:
             try:
                 print("Encoding prompts with Compel...")
@@ -771,23 +770,28 @@ class RetroArtConverter:
                 # Pass both prompts as a list to be batched
                 conditioning_batch, pooled_batch = self.compel([prompt, negative_prompt])
-                # Unpack the batch results using slicing
-                # [0:1] and [1:2] keeps the batch dimension, which is required
-                pipe_kwargs["prompt_embeds"] = conditioning_batch[0:1]
-                pipe_kwargs["pooled_prompt_embeds"] = pooled_batch[0:1]
-                pipe_kwargs["negative_prompt_embeds"] = conditioning_batch[1:2]
-                pipe_kwargs["negative_pooled_prompt_embeds"] = pooled_batch[1:2]
-                print(f"[OK] Compel encoded - Prompt: {pipe_kwargs['prompt_embeds'].shape}, Negative: {pipe_kwargs['negative_prompt_embeds'].shape}")
             except Exception as e:
                 print(f"Compel encoding failed, using standard prompts: {e}")
                 traceback.print_exc()
                 pipe_kwargs["prompt"] = prompt
                 pipe_kwargs["negative_prompt"] = negative_prompt
         else:
             pipe_kwargs["prompt"] = prompt
             pipe_kwargs["negative_prompt"] = negative_prompt
-        # --- END FIX ---
         # Add CLIP skip
         if hasattr(self.pipe, 'text_encoder'):
@@ -811,7 +815,7 @@ class RetroArtConverter:
         # 1. InstantID (Identity)
         if self.instantid_active:
             if has_detected_faces and face_kps_image is not None:
-                # Ensure face keypoints image has correct size
                 face_kps_image = ensure_correct_size(face_kps_image, target_width, target_height, "InstantID")
                 control_images.append(face_kps_image)
                 conditioning_scales.append(identity_control_scale)
@@ -829,44 +833,47 @@ class RetroArtConverter:
                         boosted_scale = identity_preservation * IDENTITY_BOOST_MULTIPLIER
                         face_proj_embeds = face_proj_embeds * boosted_scale
-                        print(f"  - Face embedding: {face_emb_tensor.shape} -> {face_proj_embeds.shape}, Scale: {boosted_scale:.2f}")
-                        # --- START FIX: Pad negative embeds to match face embeds ---
-                        # This fixes the "109 vs 93" error
-                        if 'prompt_embeds' in pipe_kwargs:
-                            original_embeds = pipe_kwargs['prompt_embeds']
-                            # Note: The old CFG check (if original_embeds.shape[0] > 1:) is removed
-                            # as our Compel logic already provides separate cond/uncond embeds.
-                            combined_embeds = torch.cat([original_embeds, face_proj_embeds], dim=1)
-                            pipe_kwargs['prompt_embeds'] = combined_embeds
-                            # CRITICAL: Pad negative_prompt_embeds by the same amount
                             if 'negative_prompt_embeds' in pipe_kwargs:
-                                negative_embeds = pipe_kwargs['negative_prompt_embeds']
-                                neg_padding = torch.zeros(
-                                    (
-                                        negative_embeds.shape[0],      # 1
-                                        face_proj_embeds.shape[1], # 16
-                                        negative_embeds.shape[2],      # 2048
-                                    ),
-                                    device=negative_embeds.device,
-                                    dtype=negative_embeds.dtype
-                                )
-                                pipe_kwargs['negative_prompt_embeds'] = torch.cat([negative_embeds, neg_padding], dim=1)
-                                print(f"  [OK] Negative prompt padded to match: {pipe_kwargs['negative_prompt_embeds'].shape}")
-                            print(f"  [OK] Face embeddings concatenated successfully! Prompt: {combined_embeds.shape}")
                         else:
-                            print(f"  [WARNING] Can't concatenate - no prompt_embeds (use Compel)")
-                        # --- END FIX ---
                 elif has_detected_faces:
                     print("  Face detected but IP-Adapter/embeddings unavailable, using keypoints only")
             else:
-                # No face detected - blank map needed to maintain ControlNet list order
                 print("[INSTANTID] Using blank map (scale=0, no effect on generation)")
                 control_images.append(Image.new("RGB", (target_width, target_height), (0,0,0)))
                 conditioning_scales.append(0.0) # Set scale to 0

         pipe_kwargs["generator"] = generator
+        # --- START FIX 1: Correct Compel batching and slicing ---
         if self.use_compel and self.compel is not None:
             try:
                 print("Encoding prompts with Compel...")
                 # Pass both prompts as a list to be batched
                 conditioning_batch, pooled_batch = self.compel([prompt, negative_prompt])
+                # Store positive and negative embeds separately for now
+                positive_prompt_embeds = conditioning_batch[0:1]
+                positive_pooled_embeds = pooled_batch[0:1]
+                negative_prompt_embeds = conditioning_batch[1:2]
+                negative_pooled_embeds = pooled_batch[1:2]
+                print(f"[OK] Compel encoded - Pos: {positive_prompt_embeds.shape}, Neg: {negative_prompt_embeds.shape}")
+                # Put the positive embeds in pipe_kwargs for the *next* step
+                pipe_kwargs["prompt_embeds"] = positive_prompt_embeds
+                pipe_kwargs["pooled_prompt_embeds"] = positive_pooled_embeds
             except Exception as e:
                 print(f"Compel encoding failed, using standard prompts: {e}")
                 traceback.print_exc()
                 pipe_kwargs["prompt"] = prompt
                 pipe_kwargs["negative_prompt"] = negative_prompt
+                self.use_compel = False # Fallback to standard
         else:
             pipe_kwargs["prompt"] = prompt
             pipe_kwargs["negative_prompt"] = negative_prompt
+        # --- END FIX 1 ---
         # Add CLIP skip
         if hasattr(self.pipe, 'text_encoder'):
         # 1. InstantID (Identity)
         if self.instantid_active:
             if has_detected_faces and face_kps_image is not None:
+                # ... (code to append control_images is unchanged) ...
                 face_kps_image = ensure_correct_size(face_kps_image, target_width, target_height, "InstantID")
                 control_images.append(face_kps_image)
                 conditioning_scales.append(identity_control_scale)
                         boosted_scale = identity_preservation * IDENTITY_BOOST_MULTIPLIER
                         face_proj_embeds = face_proj_embeds * boosted_scale
+                        print(f"  - Face embedding: {face_proj_embeds.shape}, Scale: {boosted_scale:.2f}")
+                        # --- START FIX 2: Correct CFG and Negative Padding ---
+                        if self.use_compel and 'prompt_embeds' in pipe_kwargs:
+                            # 1. Get the Compel-generated embeds
+                            positive_embeds = pipe_kwargs['prompt_embeds']
+                            # 2. Concatenate face embeddings to POSITIVE prompt
+                            final_positive_embeds = torch.cat([positive_embeds, face_proj_embeds], dim=1)
+                            # 3. Create zero padding for NEGATIVE prompt (YOUR FIX)
+                            neg_padding = torch.zeros_like(face_proj_embeds)
+                            # 4. Concatenate zero padding to NEGATIVE prompt
+                            final_negative_embeds = torch.cat([negative_prompt_embeds, neg_padding], dim=1)
+                            # 5. Create the final CFG batch (shape [2, 109, 2048])
+                            pipe_kwargs['prompt_embeds'] = torch.cat([final_negative_embeds, final_positive_embeds], dim=0)
+                            # 6. Do the same for the pooled embeds (shape [2, 1280])
+                            pipe_kwargs['pooled_prompt_embeds'] = torch.cat([negative_pooled_embeds, positive_pooled_embeds], dim=0)
+                            # 7. CRITICAL: Remove the separate negative_prompt_embeds
                             if 'negative_prompt_embeds' in pipe_kwargs:
+                                del pipe_kwargs['negative_prompt_embeds']
+                            print(f"  [OK] CFG batch created! Embeds: {pipe_kwargs['prompt_embeds'].shape}, Pooled: {pipe_kwargs['pooled_prompt_embeds'].shape}")
                         else:
+                            # Fallback if Compel failed
+                            print(f"  [WARNING] Can't concatenate - Compel failed. Using standard prompt.")
+                            pipe_kwargs['prompt'] = prompt
+                            pipe_kwargs['negative_prompt'] = negative_prompt
+                        # --- END FIX 2 ---
                 elif has_detected_faces:
                     print("  Face detected but IP-Adapter/embeddings unavailable, using keypoints only")
             else:
+                # ... (code for no face detected is unchanged) ...
                 print("[INSTANTID] Using blank map (scale=0, no effect on generation)")
                 control_images.append(Image.new("RGB", (target_width, target_height), (0,0,0)))
                 conditioning_scales.append(0.0) # Set scale to 0