pixagram-dev

Runtime error

App Files Files Community

primerz commited on Nov 1

Commit

8fe797f

verified ·

1 Parent(s): cec85e0

Update generator.py

Browse files

Files changed (1) hide show

generator.py +18 -21

generator.py CHANGED Viewed

@@ -228,7 +228,10 @@ class RetroArtConverter:
                 # Use torch.no_grad() and clear cache
                 with torch.no_grad():
                     depth_image = self.depth_detector(image_for_depth)
                 # ADDED: Clear GPU cache after depth detection
                 if torch.cuda.is_available():
@@ -448,6 +451,9 @@ class RetroArtConverter:
             num_beams = CAPTION_CONFIG['num_beams']
         try:
             if self.caption_model_type == "blip2":
                 # BLIP-2 specific processing
                 inputs = self.caption_processor(image, return_tensors="pt").to(self.device, self.dtype)
@@ -496,10 +502,12 @@ class RetroArtConverter:
                 caption = self.caption_processor.decode(output[0], skip_special_tokens=True)
             return caption.strip()
         except Exception as e:
             print(f"Caption generation failed: {e}")
             return None
     def generate_retro_art(
@@ -568,9 +576,13 @@ class RetroArtConverter:
         if self.openpose_active:
             print("Generating OpenPose map...")
             try:
                 openpose_image = self.openpose_detector(resized_image, face_only=True)
             except Exception as e:
                 print(f"OpenPose failed, using blank map: {e}")
                 openpose_image = Image.new("RGB", (target_width, target_height), (0,0,0))
         # --- FIX END ---
@@ -692,7 +704,7 @@ class RetroArtConverter:
                         else:
                             print("✗ MediapipeFace found no faces")
                     except Exception as e:
-                        print(f"[ERROR] MediapipeFace detection failed: {e}")
                         import traceback
                         traceback.print_exc()
                 else:
@@ -751,20 +763,14 @@ class RetroArtConverter:
         pipe_kwargs["generator"] = generator
-        # Use Compel for prompt encoding if available
         if self.use_compel and self.compel is not None:
             try:
                 print("Encoding prompts with Compel...")
-                # --- FIX: Move text encoders to GPU for Compel ---
-                self.pipe.text_encoder.to(self.device)
-                self.pipe.text_encoder_2.to(self.device)
-                # --- END FIX ---
-                # --- FIX: Remove 'device=self.device' argument ---
                 conditioning = self.compel(prompt)
                 negative_conditioning = self.compel(negative_prompt)
-                # --- END FIX ---
                 pipe_kwargs["prompt_embeds"] = conditioning[0]
                 pipe_kwargs["pooled_prompt_embeds"] = conditioning[1]
@@ -774,21 +780,12 @@ class RetroArtConverter:
                 print("[OK] Using Compel-encoded prompts")
             except Exception as e:
                 print(f"Compel encoding failed, using standard prompts: {e}")
-                import traceback
-                traceback.print_exc()
                 pipe_kwargs["prompt"] = prompt
                 pipe_kwargs["negative_prompt"] = negative_prompt
-            finally:
-                # --- FIX: Move text encoders back to CPU to save VRAM ---
-                try:
-                    self.pipe.text_encoder.to("cpu")
-                    self.pipe.text_encoder_2.to("cpu")
-                except Exception as e:
-                    print(f"Could not move text encoders back to CPU: {e}")
-                # --- END FIX ---
         else:
             pipe_kwargs["prompt"] = prompt
             pipe_kwargs["negative_prompt"] = negative_prompt
         # Add CLIP skip
         if hasattr(self.pipe, 'text_encoder'):
@@ -882,7 +879,7 @@ class RetroArtConverter:
             pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
             print(f"Active ControlNets: {len(control_images)} (all {target_width}x{target_height})")
         else:
-            print("No active ControlNfets, running standard Img2Img")
         # Generate
         print(f"Generating with LCM: Steps={num_inference_steps}, CFG={guidance_scale}, Strength={strength}")

                 # Use torch.no_grad() and clear cache
                 with torch.no_grad():
+                    # --- FIX: Move model to GPU for inference and back to CPU ---
+                    self.depth_detector.to(self.device)
                     depth_image = self.depth_detector(image_for_depth)
+                    self.depth_detector.to("cpu")
                 # ADDED: Clear GPU cache after depth detection
                 if torch.cuda.is_available():
             num_beams = CAPTION_CONFIG['num_beams']
         try:
+            # --- FIX: Move model to GPU for inference and back to CPU ---
+            self.caption_model.to(self.device)
             if self.caption_model_type == "blip2":
                 # BLIP-2 specific processing
                 inputs = self.caption_processor(image, return_tensors="pt").to(self.device, self.dtype)
                 caption = self.caption_processor.decode(output[0], skip_special_tokens=True)
+            self.caption_model.to("cpu")
             return caption.strip()
         except Exception as e:
             print(f"Caption generation failed: {e}")
+            self.caption_model.to("cpu")
             return None
     def generate_retro_art(
         if self.openpose_active:
             print("Generating OpenPose map...")
             try:
+                # --- FIX: Move model to GPU for inference and back to CPU ---
+                self.openpose_detector.to(self.device)
                 openpose_image = self.openpose_detector(resized_image, face_only=True)
+                self.openpose_detector.to("cpu")
             except Exception as e:
                 print(f"OpenPose failed, using blank map: {e}")
+                self.openpose_detector.to("cpu")
                 openpose_image = Image.new("RGB", (target_width, target_height), (0,0,0))
         # --- FIX END ---
                         else:
                             print("✗ MediapipeFace found no faces")
                     except Exception as e:
+                        print(f"ERROR] MediapipeFace detection failed: {e}")
                         import traceback
                         traceback.print_exc()
                 else:
         pipe_kwargs["generator"] = generator
+        # --- FIX: Reverted Compel block ---
+        # No more try/finally, no more .to(device)
+        # This works because optimize_pipeline() no longer offloads the text encoders.
         if self.use_compel and self.compel is not None:
             try:
                 print("Encoding prompts with Compel...")
                 conditioning = self.compel(prompt)
                 negative_conditioning = self.compel(negative_prompt)
                 pipe_kwargs["prompt_embeds"] = conditioning[0]
                 pipe_kwargs["pooled_prompt_embeds"] = conditioning[1]
                 print("[OK] Using Compel-encoded prompts")
             except Exception as e:
                 print(f"Compel encoding failed, using standard prompts: {e}")
                 pipe_kwargs["prompt"] = prompt
                 pipe_kwargs["negative_prompt"] = negative_prompt
         else:
             pipe_kwargs["prompt"] = prompt
             pipe_kwargs["negative_prompt"] = negative_prompt
+        # --- END FIX ---
         # Add CLIP skip
         if hasattr(self.pipe, 'text_encoder'):
             pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
             print(f"Active ControlNets: {len(control_images)} (all {target_width}x{target_height})")
         else:
+            print("No active ControlNets, running standard Img2Img")
         # Generate
         print(f"Generating with LCM: Steps={num_inference_steps}, CFG={guidance_scale}, Strength={strength}")