OrlandoHugBot commited on
Commit
24ace1c
·
verified ·
1 Parent(s): fc3e76d

Update pipeline_qwenimage_edit.py

Browse files
Files changed (1) hide show
  1. pipeline_qwenimage_edit.py +13 -15
pipeline_qwenimage_edit.py CHANGED
@@ -48,7 +48,6 @@ EXAMPLE_DOC_STRING = """
48
  >>> from PIL import Image
49
  >>> from diffusers import QwenImageEditPipeline
50
  >>> from diffusers.utils import load_image
51
-
52
  >>> pipe = QwenImageEditPipeline.from_pretrained("Qwen/Qwen-Image-Edit", torch_dtype=torch.bfloat16)
53
  >>> pipe.to("cuda")
54
  >>> image = load_image(
@@ -91,7 +90,6 @@ def retrieve_timesteps(
91
  r"""
92
  Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
93
  custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
94
-
95
  Args:
96
  scheduler (`SchedulerMixin`):
97
  The scheduler to get timesteps from.
@@ -106,7 +104,6 @@ def retrieve_timesteps(
106
  sigmas (`List[float]`, *optional*):
107
  Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
108
  `num_inference_steps` and `timesteps` must be `None`.
109
-
110
  Returns:
111
  `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
112
  second element is the number of inference steps.
@@ -177,7 +174,6 @@ def resize_to_multiple_of(image, multiple_of=32):
177
  class QwenImageEditPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
178
  r"""
179
  The Qwen-Image-Edit pipeline for image editing.
180
-
181
  Args:
182
  transformer ([`QwenImageTransformer2DModel`]):
183
  Conditional Transformer (MMDiT) architecture to denoise the encoded image latents.
@@ -244,7 +240,10 @@ class QwenImageEditPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
244
  device: Optional[torch.device] = None,
245
  dtype: Optional[torch.dtype] = None,
246
  ):
247
- device = device or self._execution_device
 
 
 
248
  dtype = dtype or self.text_encoder.dtype
249
 
250
  prompts = [prompts] if isinstance(prompts, str) else prompts
@@ -277,14 +276,14 @@ class QwenImageEditPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
277
  )
278
  texts.append(text)
279
 
280
- # Process inputs
281
  model_inputs = self.processor(
282
  text=texts,
283
  images=images,
284
  do_resize=False, # already resized
285
  padding=True,
286
  return_tensors="pt"
287
- ).to(self.device)
288
 
289
  # template = self.prompt_template_encode
290
  drop_idx = self.prompt_template_encode_start_idx
@@ -333,7 +332,6 @@ class QwenImageEditPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
333
  max_sequence_length: int = 1024,
334
  ):
335
  r"""
336
-
337
  Args:
338
  prompt (`str` or `List[str]`, *optional*):
339
  prompt to be encoded
@@ -347,7 +345,9 @@ class QwenImageEditPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
347
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
348
  provided, text embeddings will be generated from `prompt` input argument.
349
  """
350
- device = device or self._execution_device
 
 
351
 
352
  prompt = [prompt] if isinstance(prompt, str) else prompt
353
  batch_size = len(prompt) if prompt_embeds is None else prompt_embeds.shape[0]
@@ -601,7 +601,6 @@ class QwenImageEditPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
601
  ):
602
  r"""
603
  Function invoked when calling the pipeline for generation.
604
-
605
  Args:
606
  prompt (`str` or `List[str]`, *optional*):
607
  The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
@@ -629,7 +628,6 @@ class QwenImageEditPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
629
  of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
630
  `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
631
  the text `prompt`, usually at the expense of lower image quality.
632
-
633
  This parameter in the pipeline is there to support future guidance-distilled models when they come up.
634
  Note that passing `guidance_scale` to the pipeline is ineffective. To enable classifier-free guidance,
635
  please pass `true_cfg_scale` and `negative_prompt` (even an empty negative prompt like " ") should
@@ -669,9 +667,7 @@ class QwenImageEditPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
669
  will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
670
  `._callback_tensor_inputs` attribute of your pipeline class.
671
  max_sequence_length (`int` defaults to 512): Maximum sequence length to use with the `prompt`.
672
-
673
  Examples:
674
-
675
  Returns:
676
  [`~pipelines.qwenimage.QwenImagePipelineOutput`] or `tuple`:
677
  [`~pipelines.qwenimage.QwenImagePipelineOutput`] if `return_dict` is True, otherwise a `tuple`. When
@@ -720,7 +716,9 @@ class QwenImageEditPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
720
  else:
721
  batch_size = prompt_embeds.shape[0]
722
 
723
- device = self._execution_device
 
 
724
  # 3. Preprocess image
725
 
726
  prompt_images = [image.resize((round(image.width * 28 / 32), round(image.height * 28 / 32))) for image in images]
@@ -907,4 +905,4 @@ class QwenImageEditPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
907
  if not return_dict:
908
  return (image,)
909
 
910
- return QwenImagePipelineOutput(images=image)
 
48
  >>> from PIL import Image
49
  >>> from diffusers import QwenImageEditPipeline
50
  >>> from diffusers.utils import load_image
 
51
  >>> pipe = QwenImageEditPipeline.from_pretrained("Qwen/Qwen-Image-Edit", torch_dtype=torch.bfloat16)
52
  >>> pipe.to("cuda")
53
  >>> image = load_image(
 
90
  r"""
91
  Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
92
  custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
 
93
  Args:
94
  scheduler (`SchedulerMixin`):
95
  The scheduler to get timesteps from.
 
104
  sigmas (`List[float]`, *optional*):
105
  Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
106
  `num_inference_steps` and `timesteps` must be `None`.
 
107
  Returns:
108
  `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
109
  second element is the number of inference steps.
 
174
  class QwenImageEditPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
175
  r"""
176
  The Qwen-Image-Edit pipeline for image editing.
 
177
  Args:
178
  transformer ([`QwenImageTransformer2DModel`]):
179
  Conditional Transformer (MMDiT) architecture to denoise the encoded image latents.
 
240
  device: Optional[torch.device] = None,
241
  dtype: Optional[torch.dtype] = None,
242
  ):
243
+ # 修复:优先使用 text_encoder 的实际设备,而不是 self.device
244
+ # 这确保在 ZeroGPU 环境下设备一致性
245
+ if device is None:
246
+ device = next(self.text_encoder.parameters()).device
247
  dtype = dtype or self.text_encoder.dtype
248
 
249
  prompts = [prompts] if isinstance(prompts, str) else prompts
 
276
  )
277
  texts.append(text)
278
 
279
+ # Process inputs - 修复:使用 text_encoder 的实际设备
280
  model_inputs = self.processor(
281
  text=texts,
282
  images=images,
283
  do_resize=False, # already resized
284
  padding=True,
285
  return_tensors="pt"
286
+ ).to(device)
287
 
288
  # template = self.prompt_template_encode
289
  drop_idx = self.prompt_template_encode_start_idx
 
332
  max_sequence_length: int = 1024,
333
  ):
334
  r"""
 
335
  Args:
336
  prompt (`str` or `List[str]`, *optional*):
337
  prompt to be encoded
 
345
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
346
  provided, text embeddings will be generated from `prompt` input argument.
347
  """
348
+ # 修复:优先使用 text_encoder 的实际设备
349
+ if device is None:
350
+ device = next(self.text_encoder.parameters()).device
351
 
352
  prompt = [prompt] if isinstance(prompt, str) else prompt
353
  batch_size = len(prompt) if prompt_embeds is None else prompt_embeds.shape[0]
 
601
  ):
602
  r"""
603
  Function invoked when calling the pipeline for generation.
 
604
  Args:
605
  prompt (`str` or `List[str]`, *optional*):
606
  The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
 
628
  of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
629
  `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
630
  the text `prompt`, usually at the expense of lower image quality.
 
631
  This parameter in the pipeline is there to support future guidance-distilled models when they come up.
632
  Note that passing `guidance_scale` to the pipeline is ineffective. To enable classifier-free guidance,
633
  please pass `true_cfg_scale` and `negative_prompt` (even an empty negative prompt like " ") should
 
667
  will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
668
  `._callback_tensor_inputs` attribute of your pipeline class.
669
  max_sequence_length (`int` defaults to 512): Maximum sequence length to use with the `prompt`.
 
670
  Examples:
 
671
  Returns:
672
  [`~pipelines.qwenimage.QwenImagePipelineOutput`] or `tuple`:
673
  [`~pipelines.qwenimage.QwenImagePipelineOutput`] if `return_dict` is True, otherwise a `tuple`. When
 
716
  else:
717
  batch_size = prompt_embeds.shape[0]
718
 
719
+ # 修复:使用 text_encoder 的实际设备,而不是 _execution_device
720
+ device = next(self.text_encoder.parameters()).device
721
+
722
  # 3. Preprocess image
723
 
724
  prompt_images = [image.resize((round(image.width * 28 / 32), round(image.height * 28 / 32))) for image in images]
 
905
  if not return_dict:
906
  return (image,)
907
 
908
+ return QwenImagePipelineOutput(images=image)