pixagram-neo-backup

Runtime error

App Files Files Community

primerz commited on Nov 2, 2025

Commit

4eaa1be

verified ·

1 Parent(s): 0cf4b86

Update generator.py

Browse files

Files changed (1) hide show

generator.py +110 -24

generator.py CHANGED Viewed

@@ -148,9 +148,27 @@ class RetroArtConverter:
                         self.zoe_depth = self.zoe_depth.to(self.device)
                         self.depth_detector = self.zoe_depth  # Keep alias in sync
-                    # Generate depth map
-                    depth_array = self.zoe_depth(image_for_depth, detect_resolution=512, image_resolution=1024)
-                    depth_image = Image.fromarray(depth_array)
                     # Move back to CPU to free GPU memory
                     if torch.cuda.is_available():
@@ -163,8 +181,24 @@ class RetroArtConverter:
                     # Ensure model is on CPU and try again
                     self.zoe_depth = self.zoe_depth.to("cpu")
                     self.depth_detector = self.zoe_depth  # Keep alias in sync
-                    depth_array = self.zoe_depth(image_for_depth, detect_resolution=512, image_resolution=1024)
-                    depth_image = Image.fromarray(depth_array)
                 if depth_image.size != image.size:
                     depth_image = depth_image.resize(image.size, Image.LANCZOS)
@@ -173,10 +207,12 @@ class RetroArtConverter:
                 return depth_image, depth_array
             except Exception as e:
                 print(f"[DEPTH] Generation failed: {e}, using grayscale fallback")
-                return image.convert('L').convert('RGB'), None
         else:
             print("[DEPTH] Detector not available, using grayscale")
-            return image.convert('L').convert('RGB'), None
     def add_trigger_word(self, prompt):
         """Add trigger word to prompt if not present"""
@@ -294,12 +330,18 @@ class RetroArtConverter:
                 self.zoe_depth = None
                 self.depth_detector = None  # Also set alias
             if not hasattr(self, 'face_app'):
-                print("[ERROR] self.face_app not found, initializing")
                 self.face_app = FaceAnalysisWrapper(None)
                 self.face_detection_enabled = False
             if not hasattr(self, 'memory_manager'):
                 print("[ERROR] self.memory_manager not found, initializing")
                 self.memory_manager = MemoryManager(device=self.device, dtype=self.dtype, verbose=False)
             # Add trigger word
             prompt = self.add_trigger_word(prompt)
@@ -414,9 +456,25 @@ class RetroArtConverter:
                 pipe_kwargs["negative_prompt"] = negative_prompt
             # Configure ControlNets + IP-Adapter (SIMPLIFIED!)
-            if has_detected_faces and face_kps_image is not None:
                 print("Using InstantID (keypoints + embeddings) + Depth ControlNets")
                 # Control images: [face keypoints, depth map]
                 pipe_kwargs["control_image"] = [face_kps_image, depth_image]
@@ -432,21 +490,34 @@ class RetroArtConverter:
                 pipe_kwargs["control_guidance_start"] = [0.0, 0.0]
                 pipe_kwargs["control_guidance_end"] = [1.0, 1.0]
-                # IP-Adapter face embeddings (SIMPLE - pipeline handles everything!)
-                if face_embeddings is not None:
-                    print(f"Adding face embeddings for IP-Adapter...")
-                    # Just pass the embeddings - pipeline does the rest!
-                    pipe_kwargs["image_embeds"] = face_embeddings
-                    # Control IP-Adapter strength
-                    pipe_kwargs["ip_adapter_scale"] = identity_preservation
-                    print(f"  - Face embeddings shape: {face_embeddings.shape}")
-                    print(f"  - IP-Adapter scale: {identity_preservation}")
-                    print(f"  [OK] Face embeddings configured")
-                else:
-                    print("  [WARNING] No face embeddings - using keypoints only")
             else:
                 print("No faces detected - using Depth ControlNet only")
@@ -458,9 +529,24 @@ class RetroArtConverter:
                 # Control guidance timing for both slots
                 pipe_kwargs["control_guidance_start"] = [0.0, 0.0]
                 pipe_kwargs["control_guidance_end"] = [1.0, 1.0]
             # Generate
             print(f"Generating: Steps={num_inference_steps}, CFG={guidance_scale}, Strength={strength}")
             result = self.pipe(**pipe_kwargs)
             generated_image = result.images[0]

                         self.zoe_depth = self.zoe_depth.to(self.device)
                         self.depth_detector = self.zoe_depth  # Keep alias in sync
+                    # Generate depth map
+                    # ZoeDetector from controlnet_aux expects PIL Image, not numpy array
+                    depth_output = self.zoe_depth(image_for_depth, detect_resolution=512, image_resolution=1024)
+                    # Handle different output types
+                    if isinstance(depth_output, Image.Image):
+                        depth_image = depth_output
+                        depth_array = np.array(depth_output)
+                    elif isinstance(depth_output, np.ndarray):
+                        depth_array = depth_output
+                        depth_image = Image.fromarray(depth_array.astype(np.uint8))
+                    elif isinstance(depth_output, torch.Tensor):
+                        depth_array = depth_output.cpu().numpy()
+                        if depth_array.ndim == 3 and depth_array.shape[0] == 3:
+                            # CHW to HWC
+                            depth_array = depth_array.transpose(1, 2, 0)
+                        depth_image = Image.fromarray((depth_array * 255).astype(np.uint8))
+                    else:
+                        print(f"[DEPTH] Unexpected output type: {type(depth_output)}")
+                        depth_image = image_for_depth.convert('L').convert('RGB')
+                        depth_array = np.array(depth_image)
                     # Move back to CPU to free GPU memory
                     if torch.cuda.is_available():
                     # Ensure model is on CPU and try again
                     self.zoe_depth = self.zoe_depth.to("cpu")
                     self.depth_detector = self.zoe_depth  # Keep alias in sync
+                    # Try again on CPU
+                    depth_output = self.zoe_depth(image_for_depth, detect_resolution=512, image_resolution=1024)
+                    # Handle different output types
+                    if isinstance(depth_output, Image.Image):
+                        depth_image = depth_output
+                        depth_array = np.array(depth_output)
+                    elif isinstance(depth_output, np.ndarray):
+                        depth_array = depth_output
+                        depth_image = Image.fromarray(depth_array.astype(np.uint8))
+                    else:
+                        depth_image = image_for_depth.convert('L').convert('RGB')
+                        depth_array = np.array(depth_image)
+                # Ensure depth image is RGB (some detectors return grayscale)
+                if depth_image.mode != 'RGB':
+                    depth_image = depth_image.convert('RGB')
                 if depth_image.size != image.size:
                     depth_image = depth_image.resize(image.size, Image.LANCZOS)
                 return depth_image, depth_array
             except Exception as e:
                 print(f"[DEPTH] Generation failed: {e}, using grayscale fallback")
+                fallback = image.convert('L').convert('RGB')
+                return fallback, np.array(fallback)
         else:
             print("[DEPTH] Detector not available, using grayscale")
+            fallback = image.convert('L').convert('RGB')
+            return fallback, np.array(fallback)
     def add_trigger_word(self, prompt):
         """Add trigger word to prompt if not present"""
                 self.zoe_depth = None
                 self.depth_detector = None  # Also set alias
             if not hasattr(self, 'face_app'):
+                print("[ERROR] self.face_app not found, initializing wrapper")
                 self.face_app = FaceAnalysisWrapper(None)
                 self.face_detection_enabled = False
             if not hasattr(self, 'memory_manager'):
                 print("[ERROR] self.memory_manager not found, initializing")
                 self.memory_manager = MemoryManager(device=self.device, dtype=self.dtype, verbose=False)
+            if not hasattr(self, 'pipe'):
+                raise RuntimeError("Pipeline not initialized. RetroArtConverter may have failed to initialize properly.")
+            # Ensure depth_detector alias exists
+            if hasattr(self, 'zoe_depth') and not hasattr(self, 'depth_detector'):
+                self.depth_detector = self.zoe_depth
             # Add trigger word
             prompt = self.add_trigger_word(prompt)
                 pipe_kwargs["negative_prompt"] = negative_prompt
             # Configure ControlNets + IP-Adapter (SIMPLIFIED!)
+            # First, check if we need to disable IP-Adapter
+            if not (has_detected_faces and face_embeddings is not None):
+                # No faces or embeddings - disable IP-Adapter completely
+                try:
+                    self.pipe.set_ip_adapter_scale(0.0)
+                    print("[IP-ADAPTER] Disabled (scale set to 0) - no faces detected")
+                except Exception as e:
+                    print(f"[IP-ADAPTER] Could not disable: {e}")
+            if has_detected_faces and face_kps_image is not None and face_embeddings is not None:
                 print("Using InstantID (keypoints + embeddings) + Depth ControlNets")
+                # Re-enable IP-Adapter with proper scale
+                try:
+                    self.pipe.set_ip_adapter_scale(identity_preservation)
+                    print(f"[IP-ADAPTER] Enabled with scale: {identity_preservation}")
+                except Exception as e:
+                    print(f"[IP-ADAPTER] Could not set scale: {e}")
                 # Control images: [face keypoints, depth map]
                 pipe_kwargs["control_image"] = [face_kps_image, depth_image]
                 pipe_kwargs["control_guidance_start"] = [0.0, 0.0]
                 pipe_kwargs["control_guidance_end"] = [1.0, 1.0]
+                # IP-Adapter face embeddings
+                print(f"Adding face embeddings for IP-Adapter...")
+                # Pass the embeddings
+                pipe_kwargs["image_embeds"] = face_embeddings
+                print(f"  - Face embeddings shape: {face_embeddings.shape}")
+                print(f"  [OK] Face embeddings configured")
+            elif has_detected_faces and face_kps_image is not None:
+                # Have keypoints but no embeddings (shouldn't happen but handle it)
+                print("Using keypoints only + Depth ControlNets (no embeddings)")
+                # Control images: [face keypoints, depth map]
+                pipe_kwargs["control_image"] = [face_kps_image, depth_image]
+                # Conditioning scales: [identity, depth]
+                pipe_kwargs["controlnet_conditioning_scale"] = [
+                    identity_control_scale,
+                    depth_control_scale
+                ]
+                # Control guidance timing
+                pipe_kwargs["control_guidance_start"] = [0.0, 0.0]
+                pipe_kwargs["control_guidance_end"] = [1.0, 1.0]
+                # DO NOT set ip_adapter_scale or image_embeds
+                print("  [WARNING] No face embeddings - using keypoints only")
             else:
                 print("No faces detected - using Depth ControlNet only")
                 # Control guidance timing for both slots
                 pipe_kwargs["control_guidance_start"] = [0.0, 0.0]
                 pipe_kwargs["control_guidance_end"] = [1.0, 1.0]
+                # IMPORTANT: Don't pass ANY IP-Adapter related parameters
+                # No image_embeds, no ip_adapter_scale
             # Generate
             print(f"Generating: Steps={num_inference_steps}, CFG={guidance_scale}, Strength={strength}")
+            # Debug: Print what IP-Adapter params we're passing
+            if "image_embeds" in pipe_kwargs:
+                print(f"[DEBUG] Passing image_embeds: shape={pipe_kwargs['image_embeds'].shape}")
+            else:
+                print("[DEBUG] No image_embeds in pipeline kwargs")
+            if "ip_adapter_scale" in pipe_kwargs:
+                print(f"[DEBUG] IP-Adapter scale: {pipe_kwargs['ip_adapter_scale']}")
+            else:
+                print("[DEBUG] No ip_adapter_scale in pipeline kwargs")
             result = self.pipe(**pipe_kwargs)
             generated_image = result.images[0]