primerz commited on
Commit
4eaa1be
·
verified ·
1 Parent(s): 0cf4b86

Update generator.py

Browse files
Files changed (1) hide show
  1. generator.py +110 -24
generator.py CHANGED
@@ -148,9 +148,27 @@ class RetroArtConverter:
148
  self.zoe_depth = self.zoe_depth.to(self.device)
149
  self.depth_detector = self.zoe_depth # Keep alias in sync
150
 
151
- # Generate depth map
152
- depth_array = self.zoe_depth(image_for_depth, detect_resolution=512, image_resolution=1024)
153
- depth_image = Image.fromarray(depth_array)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
  # Move back to CPU to free GPU memory
156
  if torch.cuda.is_available():
@@ -163,8 +181,24 @@ class RetroArtConverter:
163
  # Ensure model is on CPU and try again
164
  self.zoe_depth = self.zoe_depth.to("cpu")
165
  self.depth_detector = self.zoe_depth # Keep alias in sync
166
- depth_array = self.zoe_depth(image_for_depth, detect_resolution=512, image_resolution=1024)
167
- depth_image = Image.fromarray(depth_array)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
  if depth_image.size != image.size:
170
  depth_image = depth_image.resize(image.size, Image.LANCZOS)
@@ -173,10 +207,12 @@ class RetroArtConverter:
173
  return depth_image, depth_array
174
  except Exception as e:
175
  print(f"[DEPTH] Generation failed: {e}, using grayscale fallback")
176
- return image.convert('L').convert('RGB'), None
 
177
  else:
178
  print("[DEPTH] Detector not available, using grayscale")
179
- return image.convert('L').convert('RGB'), None
 
180
 
181
  def add_trigger_word(self, prompt):
182
  """Add trigger word to prompt if not present"""
@@ -294,12 +330,18 @@ class RetroArtConverter:
294
  self.zoe_depth = None
295
  self.depth_detector = None # Also set alias
296
  if not hasattr(self, 'face_app'):
297
- print("[ERROR] self.face_app not found, initializing")
298
  self.face_app = FaceAnalysisWrapper(None)
299
  self.face_detection_enabled = False
300
  if not hasattr(self, 'memory_manager'):
301
  print("[ERROR] self.memory_manager not found, initializing")
302
  self.memory_manager = MemoryManager(device=self.device, dtype=self.dtype, verbose=False)
 
 
 
 
 
 
303
 
304
  # Add trigger word
305
  prompt = self.add_trigger_word(prompt)
@@ -414,9 +456,25 @@ class RetroArtConverter:
414
  pipe_kwargs["negative_prompt"] = negative_prompt
415
 
416
  # Configure ControlNets + IP-Adapter (SIMPLIFIED!)
417
- if has_detected_faces and face_kps_image is not None:
 
 
 
 
 
 
 
 
 
418
  print("Using InstantID (keypoints + embeddings) + Depth ControlNets")
419
 
 
 
 
 
 
 
 
420
  # Control images: [face keypoints, depth map]
421
  pipe_kwargs["control_image"] = [face_kps_image, depth_image]
422
 
@@ -432,21 +490,34 @@ class RetroArtConverter:
432
  pipe_kwargs["control_guidance_start"] = [0.0, 0.0]
433
  pipe_kwargs["control_guidance_end"] = [1.0, 1.0]
434
 
435
- # IP-Adapter face embeddings (SIMPLE - pipeline handles everything!)
436
- if face_embeddings is not None:
437
- print(f"Adding face embeddings for IP-Adapter...")
438
-
439
- # Just pass the embeddings - pipeline does the rest!
440
- pipe_kwargs["image_embeds"] = face_embeddings
441
-
442
- # Control IP-Adapter strength
443
- pipe_kwargs["ip_adapter_scale"] = identity_preservation
444
-
445
- print(f" - Face embeddings shape: {face_embeddings.shape}")
446
- print(f" - IP-Adapter scale: {identity_preservation}")
447
- print(f" [OK] Face embeddings configured")
448
- else:
449
- print(" [WARNING] No face embeddings - using keypoints only")
 
 
 
 
 
 
 
 
 
 
 
 
 
450
 
451
  else:
452
  print("No faces detected - using Depth ControlNet only")
@@ -458,9 +529,24 @@ class RetroArtConverter:
458
  # Control guidance timing for both slots
459
  pipe_kwargs["control_guidance_start"] = [0.0, 0.0]
460
  pipe_kwargs["control_guidance_end"] = [1.0, 1.0]
 
 
 
461
 
462
  # Generate
463
  print(f"Generating: Steps={num_inference_steps}, CFG={guidance_scale}, Strength={strength}")
 
 
 
 
 
 
 
 
 
 
 
 
464
  result = self.pipe(**pipe_kwargs)
465
 
466
  generated_image = result.images[0]
 
148
  self.zoe_depth = self.zoe_depth.to(self.device)
149
  self.depth_detector = self.zoe_depth # Keep alias in sync
150
 
151
+ # Generate depth map
152
+ # ZoeDetector from controlnet_aux expects PIL Image, not numpy array
153
+ depth_output = self.zoe_depth(image_for_depth, detect_resolution=512, image_resolution=1024)
154
+
155
+ # Handle different output types
156
+ if isinstance(depth_output, Image.Image):
157
+ depth_image = depth_output
158
+ depth_array = np.array(depth_output)
159
+ elif isinstance(depth_output, np.ndarray):
160
+ depth_array = depth_output
161
+ depth_image = Image.fromarray(depth_array.astype(np.uint8))
162
+ elif isinstance(depth_output, torch.Tensor):
163
+ depth_array = depth_output.cpu().numpy()
164
+ if depth_array.ndim == 3 and depth_array.shape[0] == 3:
165
+ # CHW to HWC
166
+ depth_array = depth_array.transpose(1, 2, 0)
167
+ depth_image = Image.fromarray((depth_array * 255).astype(np.uint8))
168
+ else:
169
+ print(f"[DEPTH] Unexpected output type: {type(depth_output)}")
170
+ depth_image = image_for_depth.convert('L').convert('RGB')
171
+ depth_array = np.array(depth_image)
172
 
173
  # Move back to CPU to free GPU memory
174
  if torch.cuda.is_available():
 
181
  # Ensure model is on CPU and try again
182
  self.zoe_depth = self.zoe_depth.to("cpu")
183
  self.depth_detector = self.zoe_depth # Keep alias in sync
184
+
185
+ # Try again on CPU
186
+ depth_output = self.zoe_depth(image_for_depth, detect_resolution=512, image_resolution=1024)
187
+
188
+ # Handle different output types
189
+ if isinstance(depth_output, Image.Image):
190
+ depth_image = depth_output
191
+ depth_array = np.array(depth_output)
192
+ elif isinstance(depth_output, np.ndarray):
193
+ depth_array = depth_output
194
+ depth_image = Image.fromarray(depth_array.astype(np.uint8))
195
+ else:
196
+ depth_image = image_for_depth.convert('L').convert('RGB')
197
+ depth_array = np.array(depth_image)
198
+
199
+ # Ensure depth image is RGB (some detectors return grayscale)
200
+ if depth_image.mode != 'RGB':
201
+ depth_image = depth_image.convert('RGB')
202
 
203
  if depth_image.size != image.size:
204
  depth_image = depth_image.resize(image.size, Image.LANCZOS)
 
207
  return depth_image, depth_array
208
  except Exception as e:
209
  print(f"[DEPTH] Generation failed: {e}, using grayscale fallback")
210
+ fallback = image.convert('L').convert('RGB')
211
+ return fallback, np.array(fallback)
212
  else:
213
  print("[DEPTH] Detector not available, using grayscale")
214
+ fallback = image.convert('L').convert('RGB')
215
+ return fallback, np.array(fallback)
216
 
217
  def add_trigger_word(self, prompt):
218
  """Add trigger word to prompt if not present"""
 
330
  self.zoe_depth = None
331
  self.depth_detector = None # Also set alias
332
  if not hasattr(self, 'face_app'):
333
+ print("[ERROR] self.face_app not found, initializing wrapper")
334
  self.face_app = FaceAnalysisWrapper(None)
335
  self.face_detection_enabled = False
336
  if not hasattr(self, 'memory_manager'):
337
  print("[ERROR] self.memory_manager not found, initializing")
338
  self.memory_manager = MemoryManager(device=self.device, dtype=self.dtype, verbose=False)
339
+ if not hasattr(self, 'pipe'):
340
+ raise RuntimeError("Pipeline not initialized. RetroArtConverter may have failed to initialize properly.")
341
+
342
+ # Ensure depth_detector alias exists
343
+ if hasattr(self, 'zoe_depth') and not hasattr(self, 'depth_detector'):
344
+ self.depth_detector = self.zoe_depth
345
 
346
  # Add trigger word
347
  prompt = self.add_trigger_word(prompt)
 
456
  pipe_kwargs["negative_prompt"] = negative_prompt
457
 
458
  # Configure ControlNets + IP-Adapter (SIMPLIFIED!)
459
+ # First, check if we need to disable IP-Adapter
460
+ if not (has_detected_faces and face_embeddings is not None):
461
+ # No faces or embeddings - disable IP-Adapter completely
462
+ try:
463
+ self.pipe.set_ip_adapter_scale(0.0)
464
+ print("[IP-ADAPTER] Disabled (scale set to 0) - no faces detected")
465
+ except Exception as e:
466
+ print(f"[IP-ADAPTER] Could not disable: {e}")
467
+
468
+ if has_detected_faces and face_kps_image is not None and face_embeddings is not None:
469
  print("Using InstantID (keypoints + embeddings) + Depth ControlNets")
470
 
471
+ # Re-enable IP-Adapter with proper scale
472
+ try:
473
+ self.pipe.set_ip_adapter_scale(identity_preservation)
474
+ print(f"[IP-ADAPTER] Enabled with scale: {identity_preservation}")
475
+ except Exception as e:
476
+ print(f"[IP-ADAPTER] Could not set scale: {e}")
477
+
478
  # Control images: [face keypoints, depth map]
479
  pipe_kwargs["control_image"] = [face_kps_image, depth_image]
480
 
 
490
  pipe_kwargs["control_guidance_start"] = [0.0, 0.0]
491
  pipe_kwargs["control_guidance_end"] = [1.0, 1.0]
492
 
493
+ # IP-Adapter face embeddings
494
+ print(f"Adding face embeddings for IP-Adapter...")
495
+
496
+ # Pass the embeddings
497
+ pipe_kwargs["image_embeds"] = face_embeddings
498
+
499
+ print(f" - Face embeddings shape: {face_embeddings.shape}")
500
+ print(f" [OK] Face embeddings configured")
501
+
502
+ elif has_detected_faces and face_kps_image is not None:
503
+ # Have keypoints but no embeddings (shouldn't happen but handle it)
504
+ print("Using keypoints only + Depth ControlNets (no embeddings)")
505
+
506
+ # Control images: [face keypoints, depth map]
507
+ pipe_kwargs["control_image"] = [face_kps_image, depth_image]
508
+
509
+ # Conditioning scales: [identity, depth]
510
+ pipe_kwargs["controlnet_conditioning_scale"] = [
511
+ identity_control_scale,
512
+ depth_control_scale
513
+ ]
514
+
515
+ # Control guidance timing
516
+ pipe_kwargs["control_guidance_start"] = [0.0, 0.0]
517
+ pipe_kwargs["control_guidance_end"] = [1.0, 1.0]
518
+
519
+ # DO NOT set ip_adapter_scale or image_embeds
520
+ print(" [WARNING] No face embeddings - using keypoints only")
521
 
522
  else:
523
  print("No faces detected - using Depth ControlNet only")
 
529
  # Control guidance timing for both slots
530
  pipe_kwargs["control_guidance_start"] = [0.0, 0.0]
531
  pipe_kwargs["control_guidance_end"] = [1.0, 1.0]
532
+
533
+ # IMPORTANT: Don't pass ANY IP-Adapter related parameters
534
+ # No image_embeds, no ip_adapter_scale
535
 
536
  # Generate
537
  print(f"Generating: Steps={num_inference_steps}, CFG={guidance_scale}, Strength={strength}")
538
+
539
+ # Debug: Print what IP-Adapter params we're passing
540
+ if "image_embeds" in pipe_kwargs:
541
+ print(f"[DEBUG] Passing image_embeds: shape={pipe_kwargs['image_embeds'].shape}")
542
+ else:
543
+ print("[DEBUG] No image_embeds in pipeline kwargs")
544
+
545
+ if "ip_adapter_scale" in pipe_kwargs:
546
+ print(f"[DEBUG] IP-Adapter scale: {pipe_kwargs['ip_adapter_scale']}")
547
+ else:
548
+ print("[DEBUG] No ip_adapter_scale in pipeline kwargs")
549
+
550
  result = self.pipe(**pipe_kwargs)
551
 
552
  generated_image = result.images[0]