Spaces:
Runtime error
Runtime error
Update generator.py
Browse files- generator.py +45 -38
generator.py
CHANGED
|
@@ -762,8 +762,7 @@ class RetroArtConverter:
|
|
| 762 |
|
| 763 |
pipe_kwargs["generator"] = generator
|
| 764 |
|
| 765 |
-
# --- START FIX: Correct Compel batching and slicing ---
|
| 766 |
-
# This fixes the "93 vs 77" error
|
| 767 |
if self.use_compel and self.compel is not None:
|
| 768 |
try:
|
| 769 |
print("Encoding prompts with Compel...")
|
|
@@ -771,23 +770,28 @@ class RetroArtConverter:
|
|
| 771 |
# Pass both prompts as a list to be batched
|
| 772 |
conditioning_batch, pooled_batch = self.compel([prompt, negative_prompt])
|
| 773 |
|
| 774 |
-
#
|
| 775 |
-
|
| 776 |
-
|
| 777 |
-
|
| 778 |
-
|
| 779 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 780 |
|
| 781 |
-
print(f"[OK] Compel encoded - Prompt: {pipe_kwargs['prompt_embeds'].shape}, Negative: {pipe_kwargs['negative_prompt_embeds'].shape}")
|
| 782 |
except Exception as e:
|
| 783 |
print(f"Compel encoding failed, using standard prompts: {e}")
|
| 784 |
traceback.print_exc()
|
| 785 |
pipe_kwargs["prompt"] = prompt
|
| 786 |
pipe_kwargs["negative_prompt"] = negative_prompt
|
|
|
|
| 787 |
else:
|
| 788 |
pipe_kwargs["prompt"] = prompt
|
| 789 |
pipe_kwargs["negative_prompt"] = negative_prompt
|
| 790 |
-
# --- END FIX ---
|
| 791 |
|
| 792 |
# Add CLIP skip
|
| 793 |
if hasattr(self.pipe, 'text_encoder'):
|
|
@@ -811,7 +815,7 @@ class RetroArtConverter:
|
|
| 811 |
# 1. InstantID (Identity)
|
| 812 |
if self.instantid_active:
|
| 813 |
if has_detected_faces and face_kps_image is not None:
|
| 814 |
-
#
|
| 815 |
face_kps_image = ensure_correct_size(face_kps_image, target_width, target_height, "InstantID")
|
| 816 |
control_images.append(face_kps_image)
|
| 817 |
conditioning_scales.append(identity_control_scale)
|
|
@@ -829,44 +833,47 @@ class RetroArtConverter:
|
|
| 829 |
boosted_scale = identity_preservation * IDENTITY_BOOST_MULTIPLIER
|
| 830 |
face_proj_embeds = face_proj_embeds * boosted_scale
|
| 831 |
|
| 832 |
-
print(f" - Face embedding: {
|
| 833 |
|
| 834 |
-
# --- START FIX:
|
| 835 |
-
|
| 836 |
-
|
| 837 |
-
|
| 838 |
|
| 839 |
-
#
|
| 840 |
-
|
| 841 |
|
| 842 |
-
|
| 843 |
-
|
| 844 |
-
|
| 845 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 846 |
if 'negative_prompt_embeds' in pipe_kwargs:
|
| 847 |
-
|
| 848 |
-
neg_padding = torch.zeros(
|
| 849 |
-
(
|
| 850 |
-
negative_embeds.shape[0], # 1
|
| 851 |
-
face_proj_embeds.shape[1], # 16
|
| 852 |
-
negative_embeds.shape[2], # 2048
|
| 853 |
-
),
|
| 854 |
-
device=negative_embeds.device,
|
| 855 |
-
dtype=negative_embeds.dtype
|
| 856 |
-
)
|
| 857 |
-
pipe_kwargs['negative_prompt_embeds'] = torch.cat([negative_embeds, neg_padding], dim=1)
|
| 858 |
-
print(f" [OK] Negative prompt padded to match: {pipe_kwargs['negative_prompt_embeds'].shape}")
|
| 859 |
|
| 860 |
-
print(f" [OK]
|
|
|
|
| 861 |
else:
|
| 862 |
-
|
| 863 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 864 |
|
| 865 |
elif has_detected_faces:
|
| 866 |
print(" Face detected but IP-Adapter/embeddings unavailable, using keypoints only")
|
| 867 |
|
| 868 |
else:
|
| 869 |
-
#
|
| 870 |
print("[INSTANTID] Using blank map (scale=0, no effect on generation)")
|
| 871 |
control_images.append(Image.new("RGB", (target_width, target_height), (0,0,0)))
|
| 872 |
conditioning_scales.append(0.0) # Set scale to 0
|
|
|
|
| 762 |
|
| 763 |
pipe_kwargs["generator"] = generator
|
| 764 |
|
| 765 |
+
# --- START FIX 1: Correct Compel batching and slicing ---
|
|
|
|
| 766 |
if self.use_compel and self.compel is not None:
|
| 767 |
try:
|
| 768 |
print("Encoding prompts with Compel...")
|
|
|
|
| 770 |
# Pass both prompts as a list to be batched
|
| 771 |
conditioning_batch, pooled_batch = self.compel([prompt, negative_prompt])
|
| 772 |
|
| 773 |
+
# Store positive and negative embeds separately for now
|
| 774 |
+
positive_prompt_embeds = conditioning_batch[0:1]
|
| 775 |
+
positive_pooled_embeds = pooled_batch[0:1]
|
| 776 |
+
negative_prompt_embeds = conditioning_batch[1:2]
|
| 777 |
+
negative_pooled_embeds = pooled_batch[1:2]
|
| 778 |
+
|
| 779 |
+
print(f"[OK] Compel encoded - Pos: {positive_prompt_embeds.shape}, Neg: {negative_prompt_embeds.shape}")
|
| 780 |
+
|
| 781 |
+
# Put the positive embeds in pipe_kwargs for the *next* step
|
| 782 |
+
pipe_kwargs["prompt_embeds"] = positive_prompt_embeds
|
| 783 |
+
pipe_kwargs["pooled_prompt_embeds"] = positive_pooled_embeds
|
| 784 |
|
|
|
|
| 785 |
except Exception as e:
|
| 786 |
print(f"Compel encoding failed, using standard prompts: {e}")
|
| 787 |
traceback.print_exc()
|
| 788 |
pipe_kwargs["prompt"] = prompt
|
| 789 |
pipe_kwargs["negative_prompt"] = negative_prompt
|
| 790 |
+
self.use_compel = False # Fallback to standard
|
| 791 |
else:
|
| 792 |
pipe_kwargs["prompt"] = prompt
|
| 793 |
pipe_kwargs["negative_prompt"] = negative_prompt
|
| 794 |
+
# --- END FIX 1 ---
|
| 795 |
|
| 796 |
# Add CLIP skip
|
| 797 |
if hasattr(self.pipe, 'text_encoder'):
|
|
|
|
| 815 |
# 1. InstantID (Identity)
|
| 816 |
if self.instantid_active:
|
| 817 |
if has_detected_faces and face_kps_image is not None:
|
| 818 |
+
# ... (code to append control_images is unchanged) ...
|
| 819 |
face_kps_image = ensure_correct_size(face_kps_image, target_width, target_height, "InstantID")
|
| 820 |
control_images.append(face_kps_image)
|
| 821 |
conditioning_scales.append(identity_control_scale)
|
|
|
|
| 833 |
boosted_scale = identity_preservation * IDENTITY_BOOST_MULTIPLIER
|
| 834 |
face_proj_embeds = face_proj_embeds * boosted_scale
|
| 835 |
|
| 836 |
+
print(f" - Face embedding: {face_proj_embeds.shape}, Scale: {boosted_scale:.2f}")
|
| 837 |
|
| 838 |
+
# --- START FIX 2: Correct CFG and Negative Padding ---
|
| 839 |
+
if self.use_compel and 'prompt_embeds' in pipe_kwargs:
|
| 840 |
+
# 1. Get the Compel-generated embeds
|
| 841 |
+
positive_embeds = pipe_kwargs['prompt_embeds']
|
| 842 |
|
| 843 |
+
# 2. Concatenate face embeddings to POSITIVE prompt
|
| 844 |
+
final_positive_embeds = torch.cat([positive_embeds, face_proj_embeds], dim=1)
|
| 845 |
|
| 846 |
+
# 3. Create zero padding for NEGATIVE prompt (YOUR FIX)
|
| 847 |
+
neg_padding = torch.zeros_like(face_proj_embeds)
|
| 848 |
+
|
| 849 |
+
# 4. Concatenate zero padding to NEGATIVE prompt
|
| 850 |
+
final_negative_embeds = torch.cat([negative_prompt_embeds, neg_padding], dim=1)
|
| 851 |
+
|
| 852 |
+
# 5. Create the final CFG batch (shape [2, 109, 2048])
|
| 853 |
+
pipe_kwargs['prompt_embeds'] = torch.cat([final_negative_embeds, final_positive_embeds], dim=0)
|
| 854 |
+
|
| 855 |
+
# 6. Do the same for the pooled embeds (shape [2, 1280])
|
| 856 |
+
pipe_kwargs['pooled_prompt_embeds'] = torch.cat([negative_pooled_embeds, positive_pooled_embeds], dim=0)
|
| 857 |
+
|
| 858 |
+
# 7. CRITICAL: Remove the separate negative_prompt_embeds
|
| 859 |
if 'negative_prompt_embeds' in pipe_kwargs:
|
| 860 |
+
del pipe_kwargs['negative_prompt_embeds']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 861 |
|
| 862 |
+
print(f" [OK] CFG batch created! Embeds: {pipe_kwargs['prompt_embeds'].shape}, Pooled: {pipe_kwargs['pooled_prompt_embeds'].shape}")
|
| 863 |
+
|
| 864 |
else:
|
| 865 |
+
# Fallback if Compel failed
|
| 866 |
+
print(f" [WARNING] Can't concatenate - Compel failed. Using standard prompt.")
|
| 867 |
+
pipe_kwargs['prompt'] = prompt
|
| 868 |
+
pipe_kwargs['negative_prompt'] = negative_prompt
|
| 869 |
+
|
| 870 |
+
# --- END FIX 2 ---
|
| 871 |
|
| 872 |
elif has_detected_faces:
|
| 873 |
print(" Face detected but IP-Adapter/embeddings unavailable, using keypoints only")
|
| 874 |
|
| 875 |
else:
|
| 876 |
+
# ... (code for no face detected is unchanged) ...
|
| 877 |
print("[INSTANTID] Using blank map (scale=0, no effect on generation)")
|
| 878 |
control_images.append(Image.new("RGB", (target_width, target_height), (0,0,0)))
|
| 879 |
conditioning_scales.append(0.0) # Set scale to 0
|