pixagram-dev

Runtime error

App Files Files Community

primerz commited on Oct 30

Commit

089fd21

verified ·

1 Parent(s): 44fc9c3

Update generator.py

Browse files

Files changed (1) hide show

generator.py +135 -105

generator.py CHANGED Viewed

@@ -20,7 +20,7 @@ from models import (
     load_face_analysis, load_depth_detector, load_controlnets, load_image_encoder,
     load_sdxl_pipeline, load_lora, setup_ip_adapter, setup_compel,
     setup_scheduler, optimize_pipeline, load_caption_model, set_clip_skip,
-    load_openpose_detector  # <-- NEW
 )
@@ -36,7 +36,7 @@ class RetroArtConverter:
             'instantid': False,
             'zoe_depth': False,
             'ip_adapter': False,
-            'openpose': False  # <-- NEW
         }
         # Initialize face analysis
@@ -64,17 +64,44 @@ class RetroArtConverter:
         else:
             self.image_encoder = None
         # Determine which controlnets to use
-        controlnets = [controlnet_depth, self.controlnet_openpose] # Start with depth and openpose
-        if self.instantid_enabled and self.controlnet_instantid is not None:
-            controlnets.insert(0, self.controlnet_instantid) # Add InstantID at the start if available
-            print(f"Initializing with multiple ControlNets: InstantID + Depth + OpenPose")
         else:
-            print(f"Initializing with ControlNets: Depth + OpenPose (InstantID disabled)")
         # Load SDXL pipeline
-        self.pipe, checkpoint_success = load_sdxl_pipeline(controlnets)
         self.models_loaded['custom_checkpoint'] = checkpoint_success
         # Load LORA
@@ -82,11 +109,11 @@ class RetroArtConverter:
         self.models_loaded['lora'] = lora_success
         # Setup IP-Adapter
-        if self.instantid_enabled and self.image_encoder is not None:
             self.image_proj_model, ip_adapter_success = setup_ip_adapter(self.pipe, self.image_encoder)
             self.models_loaded['ip_adapter'] = ip_adapter_success
         else:
-            print("[INFO] Face preservation: InstantID ControlNet keypoints only")
             self.models_loaded['ip_adapter'] = False
             self.image_proj_model = None
@@ -283,7 +310,7 @@ class RetroArtConverter:
     def validate_and_adjust_parameters(self, strength, guidance_scale, lora_scale,
                                        identity_preservation, identity_control_scale,
                                        depth_control_scale, consistency_mode=True,
-                                       expression_control_scale=0.6): # <-- NEW
         """
         Enhanced parameter validation with stricter rules for consistency.
         """
@@ -338,16 +365,29 @@ class RetroArtConverter:
                 adjustments.append(f"CFG: {original_cfg:.2f}->{guidance_scale:.2f} (LCM optimal)")
             # Rule 5: ControlNet balance
-            # <-- MODIFIED: Now balances 3 controlnets -->
-            total_control = identity_control_scale + depth_control_scale + expression_control_scale
             if total_control > 2.0: # Increased max total from 1.7 to 2.0
                 scale_factor = 2.0 / total_control
                 original_id_ctrl = identity_control_scale
                 original_depth_ctrl = depth_control_scale
                 original_expr_ctrl = expression_control_scale
-                identity_control_scale *= scale_factor
-                depth_control_scale *= scale_factor
-                expression_control_scale *= scale_factor
                 adjustments.append(f"ControlNets balanced: ID {original_id_ctrl:.2f}->{identity_control_scale:.2f}, Depth {original_depth_ctrl:.2f}->{depth_control_scale:.2f}, Expr {original_expr_ctrl:.2f}->{expression_control_scale:.2f}")
             # Report adjustments
@@ -441,7 +481,7 @@ class RetroArtConverter:
         guidance_scale=1.0,
         depth_control_scale=0.8,
         identity_control_scale=0.85,
-        expression_control_scale=0.6, # <-- NEW
         lora_scale=1.0,
         identity_preservation=0.8,
         strength=0.75,
@@ -462,10 +502,10 @@ class RetroArtConverter:
         if consistency_mode:
             print("\n[CONSISTENCY] Validating and adjusting parameters...")
             strength, guidance_scale, lora_scale, identity_preservation, identity_control_scale, depth_control_scale, expression_control_scale = \
-                self.validate_and_adjust_parameters( # <-- MODIFIED
                     strength, guidance_scale, lora_scale, identity_preservation,
                     identity_control_scale, depth_control_scale, consistency_mode,
-                    expression_control_scale # <-- NEW
                 )
         # Add trigger word
@@ -482,35 +522,37 @@ class RetroArtConverter:
         # Resize with high quality
         resized_image = input_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
         # Generate depth map
-        print("Generating Zoe depth map...")
-        depth_image = self.get_depth_map(resized_image)
-        if depth_image.size != (target_width, target_height):
-            depth_image = depth_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
-        # --- NEW: Generate OpenPose map ---
         openpose_image = None
-        if self.openpose_detector is not None:
             print("Generating OpenPose map...")
             try:
                 openpose_image = self.openpose_detector(resized_image, face_only=True)
             except Exception as e:
                 print(f"OpenPose failed, using blank map: {e}")
                 openpose_image = Image.new("RGB", (target_width, target_height), (0,0,0))
-        else:
-            openpose_image = Image.new("RGB", (target_width, target_height), (0,0,0))
-        # --- END NEW ---
         # Handle face detection
-        using_multiple_controlnets = self.using_multiple_controlnets
         face_kps_image = None
         face_embeddings = None
         face_crop_enhanced = None
         has_detected_faces = False
         face_bbox_original = None
-        if using_multiple_controlnets and self.face_app is not None and self.instantid_enabled: # <-- Check instantid_enabled
             print("Detecting faces and extracting keypoints...")
             img_array = cv2.cvtColor(np.array(resized_image), cv2.COLOR_RGB2BGR)
             faces = self.face_app.get(img_array)
@@ -631,90 +673,78 @@ class RetroArtConverter:
         if hasattr(self.pipe, 'text_encoder'):
             pipe_kwargs["clip_skip"] = 2
-        # Configure ControlNet inputs
-        # --- MODIFIED: Handle 3 ControlNets ---
-        if using_multiple_controlnets and has_detected_faces and face_kps_image is not None and self.instantid_enabled:
-            print("Using InstantID (keypoints) + Depth + OpenPose ControlNets")
-            control_images = [face_kps_image, depth_image, openpose_image]
-            conditioning_scales = [identity_control_scale, depth_control_scale, expression_control_scale]
-            pipe_kwargs["control_image"] = control_images
-            pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
-            # Add face embeddings for IP-Adapter if available
-            if face_embeddings is not None and self.models_loaded.get('ip_adapter', False) and face_crop_enhanced is not None:
-                print(f"Processing InstantID face embeddings with Resampler...")
-                with torch.no_grad():
-                    # Convert InsightFace embeddings to tensor
-                    face_emb_tensor = torch.from_numpy(face_embeddings).to(
-                        device=self.device,
-                        dtype=self.dtype
-                    )
-                    # Reshape for Resampler: [1, 1, 512]
-                    face_emb_tensor = face_emb_tensor.reshape(1, -1, 512)
-                    # Pass through Resampler: [1, 1, 512] -> 16x2048
-                    face_proj_embeds = self.image_proj_model(face_emb_tensor)
-                    # Scale with identity preservation
-                    boosted_scale = identity_preservation * IDENTITY_BOOST_MULTIPLIER
-                    face_proj_embeds = face_proj_embeds * boosted_scale
-                    print(f"  - Face embedding: {face_emb_tensor.shape}")
-                    print(f"  - Resampler output: {face_proj_embeds.shape}")
-                    print(f"  - Scale: {boosted_scale:.2f}")
-                    # CRITICAL: Concatenate with text embeddings (not separate kwargs!)
-                    if 'prompt_embeds' in pipe_kwargs:
-                        # Compel encoded prompts
-                        original_embeds = pipe_kwargs['prompt_embeds']
-                        # Handle CFG (classifier-free guidance)
-                        if original_embeds.shape[0] > 1:  # Has negative + positive
-                            # Duplicate for negative + positive
-                            face_proj_embeds = torch.cat([
-                                torch.zeros_like(face_proj_embeds),  # Negative
-                                face_proj_embeds                      # Positive
-                            ], dim=0)
-                        # Concatenate: [batch, text_tokens, 2048] + [batch, 16, 2048]
-                        combined_embeds = torch.cat([original_embeds, face_proj_embeds], dim=1)
-                        pipe_kwargs['prompt_embeds'] = combined_embeds
-                        print(f"  - Text embeds: {original_embeds.shape}")
-                        print(f"  - Combined embeds: {combined_embeds.shape}")
-                        print(f"  [OK] Face embeddings concatenated successfully!")
-                    else:
-                        print(f"  [WARNING] Can't concatenate - no prompt_embeds (use Compel)")
-            elif has_detected_faces and self.models_loaded.get('ip_adapter', False):
-                # Face detected but embeddings unavailable
-                print("  Face detected but embeddings unavailable, using keypoints only")
-        elif using_multiple_controlnets: # No face, or InstantID disabled
-            print("InstantID disabled or no faces detected, using depth + openpose only")
-            # Use blank image for InstantID
-            blank_kps = Image.new("RGB", (target_width, target_height), (0,0,0))
-            control_images = [blank_kps, depth_image, openpose_image]
-            conditioning_scales = [0.0, depth_control_scale, expression_control_scale]
             pipe_kwargs["control_image"] = control_images
             pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
-        else: # Fallback to just depth (shouldn't happen if setup is correct)
-            print("Using Depth ControlNet only")
-            pipe_kwargs["control_image"] = depth_image
-            pipe_kwargs["controlnet_conditioning_scale"] = depth_control_scale
-        # --- END MODIFICATION ---
         # Generate
         print(f"Generating with LCM: Steps={num_inference_steps}, CFG={guidance_scale}, Strength={strength}")
-        print(f"Controlnet scales - Identity: {identity_control_scale}, Depth: {depth_control_scale}, Expression: {expression_control_scale}")
         result = self.pipe(**pipe_kwargs)
         generated_image = result.images[0]

     load_face_analysis, load_depth_detector, load_controlnets, load_image_encoder,
     load_sdxl_pipeline, load_lora, setup_ip_adapter, setup_compel,
     setup_scheduler, optimize_pipeline, load_caption_model, set_clip_skip,
+    load_openpose_detector
 )
             'instantid': False,
             'zoe_depth': False,
             'ip_adapter': False,
+            'openpose': False
         }
         # Initialize face analysis
         else:
             self.image_encoder = None
+        # --- FIX START: Robust ControlNet Loading ---
         # Determine which controlnets to use
+        # Store booleans for which models are active
+        self.instantid_active = self.instantid_enabled and self.controlnet_instantid is not None
+        self.depth_active = self.controlnet_depth is not None
+        self.openpose_active = self.controlnet_openpose is not None
+        # Build the list of *active* controlnet models
+        controlnets = []
+        if self.instantid_active:
+            controlnets.append(self.controlnet_instantid)
+            print("  [CN] InstantID (Identity) active")
+        else:
+            print("  [CN] InstantID (Identity) DISABLED")
+        if self.depth_active:
+            controlnets.append(self.controlnet_depth)
+            print("  [CN] Depth active")
+        else:
+            print("  [CN] Depth DISABLED")
+        if self.openpose_active:
+            controlnets.append(self.controlnet_openpose)
+            print("  [CN] OpenPose (Expression) active")
         else:
+            print("  [CN] OpenPose (Expression) DISABLED")
+        if not controlnets:
+            print("[WARNING] No ControlNets loaded!")
+        print(f"Initializing with {len(controlnets)} active ControlNet(s)")
         # Load SDXL pipeline
+        # Pass the filtered list (or None if empty)
+        self.pipe, checkpoint_success = load_sdxl_pipeline(controlnets if controlnets else None)
+        # --- FIX END ---
         self.models_loaded['custom_checkpoint'] = checkpoint_success
         # Load LORA
         self.models_loaded['lora'] = lora_success
         # Setup IP-Adapter
+        if self.instantid_active and self.image_encoder is not None: # <-- Check instantid_active
             self.image_proj_model, ip_adapter_success = setup_ip_adapter(self.pipe, self.image_encoder)
             self.models_loaded['ip_adapter'] = ip_adapter_success
         else:
+            print("[INFO] Face preservation: IP-Adapter disabled (InstantID model failed or encoder failed)")
             self.models_loaded['ip_adapter'] = False
             self.image_proj_model = None
     def validate_and_adjust_parameters(self, strength, guidance_scale, lora_scale,
                                        identity_preservation, identity_control_scale,
                                        depth_control_scale, consistency_mode=True,
+                                       expression_control_scale=0.6):
         """
         Enhanced parameter validation with stricter rules for consistency.
         """
                 adjustments.append(f"CFG: {original_cfg:.2f}->{guidance_scale:.2f} (LCM optimal)")
             # Rule 5: ControlNet balance
+            # MODIFIED: Only sum *active* controlnets
+            total_control = 0
+            if self.instantid_active:
+                total_control += identity_control_scale
+            if self.depth_active:
+                total_control += depth_control_scale
+            if self.openpose_active:
+                total_control += expression_control_scale
             if total_control > 2.0: # Increased max total from 1.7 to 2.0
                 scale_factor = 2.0 / total_control
                 original_id_ctrl = identity_control_scale
                 original_depth_ctrl = depth_control_scale
                 original_expr_ctrl = expression_control_scale
+                # Only scale active controlnets
+                if self.instantid_active:
+                    identity_control_scale *= scale_factor
+                if self.depth_active:
+                    depth_control_scale *= scale_factor
+                if self.openpose_active:
+                    expression_control_scale *= scale_factor
                 adjustments.append(f"ControlNets balanced: ID {original_id_ctrl:.2f}->{identity_control_scale:.2f}, Depth {original_depth_ctrl:.2f}->{depth_control_scale:.2f}, Expr {original_expr_ctrl:.2f}->{expression_control_scale:.2f}")
             # Report adjustments
         guidance_scale=1.0,
         depth_control_scale=0.8,
         identity_control_scale=0.85,
+        expression_control_scale=0.6,
         lora_scale=1.0,
         identity_preservation=0.8,
         strength=0.75,
         if consistency_mode:
             print("\n[CONSISTENCY] Validating and adjusting parameters...")
             strength, guidance_scale, lora_scale, identity_preservation, identity_control_scale, depth_control_scale, expression_control_scale = \
+                self.validate_and_adjust_parameters(
                     strength, guidance_scale, lora_scale, identity_preservation,
                     identity_control_scale, depth_control_scale, consistency_mode,
+                    expression_control_scale
                 )
         # Add trigger word
         # Resize with high quality
         resized_image = input_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
+        # --- FIX START: Generate control images only if models are active ---
         # Generate depth map
+        depth_image = None
+        if self.depth_active:
+            print("Generating Zoe depth map...")
+            depth_image = self.get_depth_map(resized_image)
+            if depth_image.size != (target_width, target_height):
+                depth_image = depth_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
+        # Generate OpenPose map
         openpose_image = None
+        if self.openpose_active:
             print("Generating OpenPose map...")
             try:
                 openpose_image = self.openpose_detector(resized_image, face_only=True)
             except Exception as e:
                 print(f"OpenPose failed, using blank map: {e}")
                 openpose_image = Image.new("RGB", (target_width, target_height), (0,0,0))
+        # --- FIX END ---
         # Handle face detection
         face_kps_image = None
         face_embeddings = None
         face_crop_enhanced = None
         has_detected_faces = False
         face_bbox_original = None
+        if self.instantid_active and self.face_app is not None: # <-- Check instantid_active
             print("Detecting faces and extracting keypoints...")
             img_array = cv2.cvtColor(np.array(resized_image), cv2.COLOR_RGB2BGR)
             faces = self.face_app.get(img_array)
         if hasattr(self.pipe, 'text_encoder'):
             pipe_kwargs["clip_skip"] = 2
+        # --- FIX START: Configure ControlNet inputs dynamically ---
+        control_images = []
+        conditioning_scales = []
+        scale_debug_str = []
+        # 1. InstantID (Identity)
+        if self.instantid_active:
+            if has_detected_faces and face_kps_image is not None:
+                control_images.append(face_kps_image)
+                conditioning_scales.append(identity_control_scale)
+                scale_debug_str.append(f"Identity: {identity_control_scale:.2f}")
+                # Add face embeddings for IP-Adapter if available
+                if face_embeddings is not None and self.models_loaded.get('ip_adapter', False) and face_crop_enhanced is not None:
+                    print(f"Processing InstantID face embeddings with Resampler...")
+                    with torch.no_grad():
+                        face_emb_tensor = torch.from_numpy(face_embeddings).to(device=self.device, dtype=self.dtype)
+                        face_emb_tensor = face_emb_tensor.reshape(1, -1, 512)
+                        face_proj_embeds = self.image_proj_model(face_emb_tensor)
+                        boosted_scale = identity_preservation * IDENTITY_BOOST_MULTIPLIER
+                        face_proj_embeds = face_proj_embeds * boosted_scale
+                        print(f"  - Face embedding: {face_emb_tensor.shape} -> {face_proj_embeds.shape}, Scale: {boosted_scale:.2f}")
+                        if 'prompt_embeds' in pipe_kwargs:
+                            original_embeds = pipe_kwargs['prompt_embeds']
+                            if original_embeds.shape[0] > 1: # Handle CFG
+                                face_proj_embeds = torch.cat([torch.zeros_like(face_proj_embeds), face_proj_embeds], dim=0)
+                            combined_embeds = torch.cat([original_embeds, face_proj_embeds], dim=1)
+                            pipe_kwargs['prompt_embeds'] = combined_embeds
+                            print(f"  [OK] Face embeddings concatenated successfully! New shape: {combined_embeds.shape}")
+                        else:
+                            print(f"  [WARNING] Can't concatenate - no prompt_embeds (use Compel)")
+                elif has_detected_faces:
+                    print("  Face detected but IP-Adapter/embeddings unavailable, using keypoints only")
+            else:
+                # No face, must add a blank image to keep list order
+                print("Using blank map for InstantID (no face/disabled)")
+                control_images.append(Image.new("RGB", (target_width, target_height), (0,0,0)))
+                conditioning_scales.append(0.0) # Set scale to 0
+                scale_debug_str.append("Identity: 0.00")
+        # 2. Depth
+        if self.depth_active:
+            control_images.append(depth_image)
+            conditioning_scales.append(depth_control_scale)
+            scale_debug_str.append(f"Depth: {depth_control_scale:.2f}")
+        # 3. OpenPose (Expression)
+        if self.openpose_active:
+            control_images.append(openpose_image) # This is already a blank map if it failed
+            conditioning_scales.append(expression_control_scale)
+            scale_debug_str.append(f"Expression: {expression_control_scale:.2f}")
+        if control_images:
             pipe_kwargs["control_image"] = control_images
             pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
+            print(f"Active ControlNets: {len(control_images)}")
+        else:
+            print("No active ControlNets, running standard Img2Img")
+        # --- FIX END ---
         # Generate
         print(f"Generating with LCM: Steps={num_inference_steps}, CFG={guidance_scale}, Strength={strength}")
+        print(f"Controlnet scales - {' | '.join(scale_debug_str)}")
         result = self.pipe(**pipe_kwargs)
         generated_image = result.images[0]