primerz commited on
Commit
8188bc4
·
verified ·
1 Parent(s): 7f98355

Update generator.py

Browse files
Files changed (1) hide show
  1. generator.py +18 -51
generator.py CHANGED
@@ -153,15 +153,11 @@ class RetroArtConverter:
153
  if image.mode != 'RGB':
154
  image = image.convert('RGB')
155
 
156
- # CRITICAL FIX: Convert to numpy and back to ensure clean PIL Image
157
- # This removes any numpy int64 contamination in image.size
158
- image_array = np.array(image)
159
- clean_image = Image.fromarray(image_array)
160
 
161
- # Now ZoeDetector receives clean PIL Image with Python int dimensions
162
- depth_image = self.zoe_depth(clean_image)
163
-
164
- print(f"[DEPTH] Zoe depth map generated: {clean_image.size[0]}x{clean_image.size[1]}")
165
  return depth_image
166
 
167
  except Exception as e:
@@ -599,52 +595,23 @@ class RetroArtConverter:
599
  try:
600
  print("Encoding prompts with Compel...")
601
 
602
- # Encode prompts separately (Compel handles dual text encoders internally)
603
- conditioning = self.compel(prompt)
604
- negative_conditioning = self.compel(negative_prompt)
605
-
606
- # Extract embeddings
607
- prompt_embeds = conditioning[0]
608
- pooled_prompt_embeds = conditioning[1]
609
- negative_prompt_embeds = negative_conditioning[0]
610
- negative_pooled_prompt_embeds = negative_conditioning[1]
611
-
612
- # Force to 77 tokens (SDXL standard) to ensure compatibility
613
- target_length = 77
614
-
615
- # Truncate or pad positive embeddings
616
- if prompt_embeds.shape[1] > target_length:
617
- prompt_embeds = prompt_embeds[:, :target_length, :]
618
- elif prompt_embeds.shape[1] < target_length:
619
- padding = torch.zeros(
620
- prompt_embeds.shape[0],
621
- target_length - prompt_embeds.shape[1],
622
- prompt_embeds.shape[2],
623
- dtype=prompt_embeds.dtype,
624
- device=prompt_embeds.device
625
- )
626
- prompt_embeds = torch.cat([prompt_embeds, padding], dim=1)
627
-
628
- # Truncate or pad negative embeddings
629
- if negative_prompt_embeds.shape[1] > target_length:
630
- negative_prompt_embeds = negative_prompt_embeds[:, :target_length, :]
631
- elif negative_prompt_embeds.shape[1] < target_length:
632
- padding = torch.zeros(
633
- negative_prompt_embeds.shape[0],
634
- target_length - negative_prompt_embeds.shape[1],
635
- negative_prompt_embeds.shape[2],
636
- dtype=negative_prompt_embeds.dtype,
637
- device=negative_prompt_embeds.device
638
- )
639
- negative_prompt_embeds = torch.cat([negative_prompt_embeds, padding], dim=1)
640
 
641
- pipe_kwargs["prompt_embeds"] = prompt_embeds
642
- pipe_kwargs["pooled_prompt_embeds"] = pooled_prompt_embeds
643
- pipe_kwargs["negative_prompt_embeds"] = negative_prompt_embeds
644
- pipe_kwargs["negative_pooled_prompt_embeds"] = negative_pooled_prompt_embeds
645
 
646
  compel_success = True
647
- print(f"[OK] Compel encoded: pos={prompt_embeds.shape}, neg={negative_prompt_embeds.shape}")
648
 
649
  except Exception as e:
650
  # Compel encoding failed - fall back to standard encoding
 
153
  if image.mode != 'RGB':
154
  image = image.convert('RGB')
155
 
156
+ # ZoeDetector handles everything internally - just pass the image
157
+ depth_image = self.zoe_depth(image)
 
 
158
 
159
+ # Use .width and .height properties (always Python ints, not numpy types)
160
+ print(f"[DEPTH] Zoe depth map generated: {image.width}x{image.height}")
 
 
161
  return depth_image
162
 
163
  except Exception as e:
 
595
  try:
596
  print("Encoding prompts with Compel...")
597
 
598
+ # Encode prompts (Compel returns tuple: (embeddings, pooled_embeddings))
599
+ conditioning, pooled = self.compel(prompt)
600
+ negative_conditioning, negative_pooled = self.compel(negative_prompt)
601
+
602
+ # OFFICIAL FIX: Use Compel's built-in padding method to handle length mismatches
603
+ # This is the recommended way from Compel documentation for SDXL
604
+ [conditioning, negative_conditioning] = self.compel.pad_conditioning_tensors_to_same_length([
605
+ conditioning, negative_conditioning
606
+ ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
607
 
608
+ pipe_kwargs["prompt_embeds"] = conditioning
609
+ pipe_kwargs["pooled_prompt_embeds"] = pooled
610
+ pipe_kwargs["negative_prompt_embeds"] = negative_conditioning
611
+ pipe_kwargs["negative_pooled_prompt_embeds"] = negative_pooled
612
 
613
  compel_success = True
614
+ print(f"[OK] Compel encoded: pos={conditioning.shape}, neg={negative_conditioning.shape}")
615
 
616
  except Exception as e:
617
  # Compel encoding failed - fall back to standard encoding