primerz commited on
Commit
8aa8600
·
verified ·
1 Parent(s): 29a6101

Upload 2 files

Browse files
Files changed (2) hide show
  1. generator.py +57 -12
  2. models.py +3 -3
generator.py CHANGED
@@ -149,20 +149,29 @@ class RetroArtConverter:
149
  """Generate depth map using Zoe Depth"""
150
  if self.zoe_depth is not None:
151
  try:
152
- # Ensure clean PIL Image
153
  if image.mode != 'RGB':
154
  image = image.convert('RGB')
155
 
156
- # Get dimensions and ensure they're Python ints
157
  width, height = image.size
158
  width, height = int(width), int(height)
159
 
160
- # Create a fresh image to avoid numpy type issues
161
- image_array = np.array(image)
162
- clean_image = Image.fromarray(image_array.astype(np.uint8))
 
 
 
 
 
 
 
 
 
 
 
163
 
164
- # Use Zoe detector
165
- depth_image = self.zoe_depth(clean_image)
166
  return depth_image
167
  except Exception as e:
168
  print(f"Warning: ZoeDetector failed ({e}), falling back to grayscale depth")
@@ -563,14 +572,50 @@ class RetroArtConverter:
563
  conditioning = self.compel(prompt)
564
  negative_conditioning = self.compel(negative_prompt)
565
 
566
- pipe_kwargs["prompt_embeds"] = conditioning[0]
567
- pipe_kwargs["pooled_prompt_embeds"] = conditioning[1]
568
- pipe_kwargs["negative_prompt_embeds"] = negative_conditioning[0]
569
- pipe_kwargs["negative_pooled_prompt_embeds"] = negative_conditioning[1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
570
 
571
  print("[OK] Using Compel-encoded prompts")
572
  except Exception as e:
573
- print(f"Compel encoding failed, using standard prompts: {e}")
574
  pipe_kwargs["prompt"] = prompt
575
  pipe_kwargs["negative_prompt"] = negative_prompt
576
  else:
 
149
  """Generate depth map using Zoe Depth"""
150
  if self.zoe_depth is not None:
151
  try:
152
+ # Ensure clean PIL Image with proper dimensions
153
  if image.mode != 'RGB':
154
  image = image.convert('RGB')
155
 
156
+ # Get dimensions and ensure they're Python ints (not numpy ints)
157
  width, height = image.size
158
  width, height = int(width), int(height)
159
 
160
+ # Resize to dimensions ZoeDetector expects (multiples of 32 work best)
161
+ # This avoids numpy type issues in the detector
162
+ target_width = (width // 32) * 32
163
+ target_height = (height // 32) * 32
164
+
165
+ if target_width != width or target_height != height:
166
+ image = image.resize((target_width, target_height), Image.LANCZOS)
167
+
168
+ # Use Zoe detector - now with safe dimensions
169
+ depth_image = self.zoe_depth(image)
170
+
171
+ # Resize back to original if needed
172
+ if depth_image.size != (width, height):
173
+ depth_image = depth_image.resize((width, height), Image.LANCZOS)
174
 
 
 
175
  return depth_image
176
  except Exception as e:
177
  print(f"Warning: ZoeDetector failed ({e}), falling back to grayscale depth")
 
572
  conditioning = self.compel(prompt)
573
  negative_conditioning = self.compel(negative_prompt)
574
 
575
+ prompt_embeds = conditioning[0]
576
+ pooled_prompt_embeds = conditioning[1]
577
+ negative_prompt_embeds = negative_conditioning[0]
578
+ negative_pooled_prompt_embeds = negative_conditioning[1]
579
+
580
+ # Handle token length mismatch by padding/truncating to 77 tokens (SDXL standard)
581
+ target_length = 77
582
+
583
+ if prompt_embeds.shape[1] != target_length or negative_prompt_embeds.shape[1] != target_length:
584
+ print(f"Adjusting token lengths: pos={prompt_embeds.shape[1]}, neg={negative_prompt_embeds.shape[1]} -> {target_length}")
585
+
586
+ # Truncate or pad to target length
587
+ if prompt_embeds.shape[1] > target_length:
588
+ prompt_embeds = prompt_embeds[:, :target_length, :]
589
+ elif prompt_embeds.shape[1] < target_length:
590
+ padding = torch.zeros(
591
+ prompt_embeds.shape[0],
592
+ target_length - prompt_embeds.shape[1],
593
+ prompt_embeds.shape[2],
594
+ dtype=prompt_embeds.dtype,
595
+ device=prompt_embeds.device
596
+ )
597
+ prompt_embeds = torch.cat([prompt_embeds, padding], dim=1)
598
+
599
+ if negative_prompt_embeds.shape[1] > target_length:
600
+ negative_prompt_embeds = negative_prompt_embeds[:, :target_length, :]
601
+ elif negative_prompt_embeds.shape[1] < target_length:
602
+ padding = torch.zeros(
603
+ negative_prompt_embeds.shape[0],
604
+ target_length - negative_prompt_embeds.shape[1],
605
+ negative_prompt_embeds.shape[2],
606
+ dtype=negative_prompt_embeds.dtype,
607
+ device=negative_prompt_embeds.device
608
+ )
609
+ negative_prompt_embeds = torch.cat([negative_prompt_embeds, padding], dim=1)
610
+
611
+ pipe_kwargs["prompt_embeds"] = prompt_embeds
612
+ pipe_kwargs["pooled_prompt_embeds"] = pooled_prompt_embeds
613
+ pipe_kwargs["negative_prompt_embeds"] = negative_prompt_embeds
614
+ pipe_kwargs["negative_pooled_prompt_embeds"] = negative_pooled_prompt_embeds
615
 
616
  print("[OK] Using Compel-encoded prompts")
617
  except Exception as e:
618
+ print(f"Compel encoding failed: {e}, using standard prompts")
619
  pipe_kwargs["prompt"] = prompt
620
  pipe_kwargs["negative_prompt"] = negative_prompt
621
  else:
models.py CHANGED
@@ -160,12 +160,12 @@ def load_sdxl_pipeline(controlnets):
160
 
161
 
162
  def load_lora(pipe):
163
- """Load LORA from HuggingFace Hub."""
164
  print("Loading LORA (retroart) from HuggingFace Hub...")
165
  try:
166
  lora_path = download_model_with_retry(MODEL_REPO, MODEL_FILES['lora'])
167
- pipe.load_lora_weights(lora_path)
168
- print(f" [OK] LORA loaded successfully")
169
  return True
170
  except Exception as e:
171
  print(f" [WARNING] Could not load LORA: {e}")
 
160
 
161
 
162
  def load_lora(pipe):
163
+ """Load LORA from HuggingFace Hub with specific adapter name."""
164
  print("Loading LORA (retroart) from HuggingFace Hub...")
165
  try:
166
  lora_path = download_model_with_retry(MODEL_REPO, MODEL_FILES['lora'])
167
+ pipe.load_lora_weights(lora_path, adapter_name="retroart")
168
+ print(f" [OK] LORA loaded successfully as 'retroart' adapter")
169
  return True
170
  except Exception as e:
171
  print(f" [WARNING] Could not load LORA: {e}")