primerz commited on
Commit
1daefa9
·
verified ·
1 Parent(s): bdae0e1

Upload 2 files

Browse files
Files changed (2) hide show
  1. config.py +1 -1
  2. generator.py +50 -17
config.py CHANGED
@@ -24,7 +24,7 @@ TRIGGER_WORD = "p1x3l4rt, pixel art"
24
 
25
  # Face detection configuration
26
  FACE_DETECTION_CONFIG = {
27
- "model_name": "antelopev2",
28
  "det_size": (640, 640),
29
  "ctx_id": 0
30
  }
 
24
 
25
  # Face detection configuration
26
  FACE_DETECTION_CONFIG = {
27
+ "model_name": "buffalo_l", # Most accurate, auto-downloads
28
  "det_size": (640, 640),
29
  "ctx_id": 0
30
  }
generator.py CHANGED
@@ -153,17 +153,15 @@ class RetroArtConverter:
153
  if image.mode != 'RGB':
154
  image = image.convert('RGB')
155
 
156
- # Get original dimensions
157
- orig_width, orig_height = image.size
 
 
158
 
159
- # ZoeDetector handles sizing internally - just pass the image
160
- depth_image = self.zoe_depth(image)
161
 
162
- # Ensure output matches original size
163
- if depth_image.size != (orig_width, orig_height):
164
- depth_image = depth_image.resize((orig_width, orig_height), Image.LANCZOS)
165
-
166
- print(f"[DEPTH] Zoe depth map generated: {orig_width}x{orig_height}")
167
  return depth_image
168
 
169
  except Exception as e:
@@ -601,22 +599,57 @@ class RetroArtConverter:
601
  try:
602
  print("Encoding prompts with Compel...")
603
 
604
- # Encode prompts with Compel
605
  conditioning = self.compel(prompt)
606
  negative_conditioning = self.compel(negative_prompt)
607
 
608
  # Extract embeddings
609
- pipe_kwargs["prompt_embeds"] = conditioning[0]
610
- pipe_kwargs["pooled_prompt_embeds"] = conditioning[1]
611
- pipe_kwargs["negative_prompt_embeds"] = negative_conditioning[0]
612
- pipe_kwargs["negative_pooled_prompt_embeds"] = negative_conditioning[1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
613
 
614
  compel_success = True
615
- print("[OK] Using Compel-encoded prompts")
616
 
617
  except Exception as e:
618
- print(f"[COMPEL] Encoding failed: {e}")
619
- print(f"[COMPEL] Using standard prompt encoding instead")
 
620
  compel_success = False
621
 
622
  # Use standard prompts if Compel failed or not available
 
153
  if image.mode != 'RGB':
154
  image = image.convert('RGB')
155
 
156
+ # CRITICAL FIX: Convert to numpy and back to ensure clean PIL Image
157
+ # This removes any numpy int64 contamination in image.size
158
+ image_array = np.array(image)
159
+ clean_image = Image.fromarray(image_array)
160
 
161
+ # Now ZoeDetector receives clean PIL Image with Python int dimensions
162
+ depth_image = self.zoe_depth(clean_image)
163
 
164
+ print(f"[DEPTH] Zoe depth map generated: {clean_image.size[0]}x{clean_image.size[1]}")
 
 
 
 
165
  return depth_image
166
 
167
  except Exception as e:
 
599
  try:
600
  print("Encoding prompts with Compel...")
601
 
602
+ # Encode prompts separately (Compel handles dual text encoders internally)
603
  conditioning = self.compel(prompt)
604
  negative_conditioning = self.compel(negative_prompt)
605
 
606
  # Extract embeddings
607
+ prompt_embeds = conditioning[0]
608
+ pooled_prompt_embeds = conditioning[1]
609
+ negative_prompt_embeds = negative_conditioning[0]
610
+ negative_pooled_prompt_embeds = negative_conditioning[1]
611
+
612
+ # Force to 77 tokens (SDXL standard) to ensure compatibility
613
+ target_length = 77
614
+
615
+ # Truncate or pad positive embeddings
616
+ if prompt_embeds.shape[1] > target_length:
617
+ prompt_embeds = prompt_embeds[:, :target_length, :]
618
+ elif prompt_embeds.shape[1] < target_length:
619
+ padding = torch.zeros(
620
+ prompt_embeds.shape[0],
621
+ target_length - prompt_embeds.shape[1],
622
+ prompt_embeds.shape[2],
623
+ dtype=prompt_embeds.dtype,
624
+ device=prompt_embeds.device
625
+ )
626
+ prompt_embeds = torch.cat([prompt_embeds, padding], dim=1)
627
+
628
+ # Truncate or pad negative embeddings
629
+ if negative_prompt_embeds.shape[1] > target_length:
630
+ negative_prompt_embeds = negative_prompt_embeds[:, :target_length, :]
631
+ elif negative_prompt_embeds.shape[1] < target_length:
632
+ padding = torch.zeros(
633
+ negative_prompt_embeds.shape[0],
634
+ target_length - negative_prompt_embeds.shape[1],
635
+ negative_prompt_embeds.shape[2],
636
+ dtype=negative_prompt_embeds.dtype,
637
+ device=negative_prompt_embeds.device
638
+ )
639
+ negative_prompt_embeds = torch.cat([negative_prompt_embeds, padding], dim=1)
640
+
641
+ pipe_kwargs["prompt_embeds"] = prompt_embeds
642
+ pipe_kwargs["pooled_prompt_embeds"] = pooled_prompt_embeds
643
+ pipe_kwargs["negative_prompt_embeds"] = negative_prompt_embeds
644
+ pipe_kwargs["negative_pooled_prompt_embeds"] = negative_pooled_prompt_embeds
645
 
646
  compel_success = True
647
+ print(f"[OK] Compel encoded: pos={prompt_embeds.shape}, neg={negative_prompt_embeds.shape}")
648
 
649
  except Exception as e:
650
+ # Compel encoding failed - fall back to standard encoding
651
+ print(f"[COMPEL] Failed: {e}")
652
+ print("[COMPEL] Falling back to standard encoding")
653
  compel_success = False
654
 
655
  # Use standard prompts if Compel failed or not available