Add files using upload-large-folder tool

be9fa39 verified 5 months ago

34.5 kB

	# Copyright 2025 The HuggingFace Team. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import inspect
	from typing import Any, List, Optional, Tuple

	import torch

	from ...configuration_utils import FrozenDict
	from ...guiders import ClassifierFreeGuidance
	from ...models import ControlNetModel, UNet2DConditionModel
	from ...schedulers import EulerDiscreteScheduler
	from ...utils import logging
	from ..modular_pipeline import (
	BlockState,
	LoopSequentialPipelineBlocks,
	ModularPipelineBlocks,
	PipelineState,
	)
	from ..modular_pipeline_utils import ComponentSpec, InputParam, OutputParam
	from .modular_pipeline import StableDiffusionXLModularPipeline


	logger = logging.get_logger(__name__) # pylint: disable=invalid-name


	# YiYi experimenting composible denoise loop
	# loop step (1): prepare latent input for denoiser
	class StableDiffusionXLLoopBeforeDenoiser(ModularPipelineBlocks):
	model_name = "stable-diffusion-xl"

	@property
	def expected_components(self) -> List[ComponentSpec]:
	return [
	ComponentSpec("scheduler", EulerDiscreteScheduler),
	]

	@property
	def description(self) -> str:
	return (
	"step within the denoising loop that prepare the latent input for the denoiser. "
	"This block should be used to compose the `sub_blocks` attribute of a `LoopSequentialPipelineBlocks` "
	"object (e.g. `StableDiffusionXLDenoiseLoopWrapper`)"
	)

	@property
	def inputs(self) -> List[str]:
	return [
	InputParam(
	"latents",
	required=True,
	type_hint=torch.Tensor,
	description="The initial latents to use for the denoising process. Can be generated in prepare_latent step.",
	),
	]

	@torch.no_grad()
	def __call__(self, components: StableDiffusionXLModularPipeline, block_state: BlockState, i: int, t: int):
	block_state.scaled_latents = components.scheduler.scale_model_input(block_state.latents, t)

	return components, block_state


	# loop step (1): prepare latent input for denoiser (with inpainting)
	class StableDiffusionXLInpaintLoopBeforeDenoiser(ModularPipelineBlocks):
	model_name = "stable-diffusion-xl"

	@property
	def expected_components(self) -> List[ComponentSpec]:
	return [
	ComponentSpec("scheduler", EulerDiscreteScheduler),
	ComponentSpec("unet", UNet2DConditionModel),
	]

	@property
	def description(self) -> str:
	return (
	"step within the denoising loop that prepare the latent input for the denoiser (for inpainting workflow only). "
	"This block should be used to compose the `sub_blocks` attribute of a `LoopSequentialPipelineBlocks` object"
	)

	@property
	def inputs(self) -> List[str]:
	return [
	InputParam(
	"latents",
	required=True,
	type_hint=torch.Tensor,
	description="The initial latents to use for the denoising process. Can be generated in prepare_latent step.",
	),
	InputParam(
	"mask",
	type_hint=Optional[torch.Tensor],
	description="The mask to use for the denoising process, for inpainting task only. Can be generated in vae_encode or prepare_latent step.",
	),
	InputParam(
	"masked_image_latents",
	type_hint=Optional[torch.Tensor],
	description="The masked image latents to use for the denoising process, for inpainting task only. Can be generated in vae_encode or prepare_latent step.",
	),
	]

	@staticmethod
	def check_inputs(components, block_state):
	num_channels_unet = components.num_channels_unet
	if num_channels_unet == 9:
	# default case for runwayml/stable-diffusion-inpainting
	if block_state.mask is None or block_state.masked_image_latents is None:
	raise ValueError("mask and masked_image_latents must be provided for inpainting-specific Unet")
	num_channels_latents = block_state.latents.shape[1]
	num_channels_mask = block_state.mask.shape[1]
	num_channels_masked_image = block_state.masked_image_latents.shape[1]
	if num_channels_latents + num_channels_mask + num_channels_masked_image != num_channels_unet:
	raise ValueError(
	f"Incorrect configuration settings! The config of `components.unet`: {components.unet.config} expects"
	f" {components.unet.config.in_channels} but received `num_channels_latents`: {num_channels_latents} +"
	f" `num_channels_mask`: {num_channels_mask} + `num_channels_masked_image`: {num_channels_masked_image}"
	f" = {num_channels_latents + num_channels_masked_image + num_channels_mask}. Please verify the config of"
	" `components.unet` or your `mask_image` or `image` input."
	)

	@torch.no_grad()
	def __call__(self, components: StableDiffusionXLModularPipeline, block_state: BlockState, i: int, t: int):
	self.check_inputs(components, block_state)

	block_state.scaled_latents = components.scheduler.scale_model_input(block_state.latents, t)
	if components.num_channels_unet == 9:
	block_state.scaled_latents = torch.cat(
	[block_state.scaled_latents, block_state.mask, block_state.masked_image_latents], dim=1
	)

	return components, block_state


	# loop step (2): denoise the latents with guidance
	class StableDiffusionXLLoopDenoiser(ModularPipelineBlocks):
	model_name = "stable-diffusion-xl"

	@property
	def expected_components(self) -> List[ComponentSpec]:
	return [
	ComponentSpec(
	"guider",
	ClassifierFreeGuidance,
	config=FrozenDict({"guidance_scale": 7.5}),
	default_creation_method="from_config",
	),
	ComponentSpec("unet", UNet2DConditionModel),
	]

	@property
	def description(self) -> str:
	return (
	"Step within the denoising loop that denoise the latents with guidance. "
	"This block should be used to compose the `sub_blocks` attribute of a `LoopSequentialPipelineBlocks` "
	"object (e.g. `StableDiffusionXLDenoiseLoopWrapper`)"
	)

	@property
	def inputs(self) -> List[Tuple[str, Any]]:
	return [
	InputParam("cross_attention_kwargs"),
	InputParam(
	"num_inference_steps",
	required=True,
	type_hint=int,
	description="The number of inference steps to use for the denoising process. Can be generated in set_timesteps step.",
	),
	InputParam(
	"timestep_cond",
	type_hint=Optional[torch.Tensor],
	description="The guidance scale embedding to use for Latent Consistency Models(LCMs). Can be generated in prepare_additional_conditioning step.",
	),
	InputParam(
	kwargs_type="guider_input_fields",
	description=(
	"All conditional model inputs that need to be prepared with guider. "
	"It should contain prompt_embeds/negative_prompt_embeds, "
	"add_time_ids/negative_add_time_ids, "
	"pooled_prompt_embeds/negative_pooled_prompt_embeds, "
	"and ip_adapter_embeds/negative_ip_adapter_embeds (optional)."
	"please add `kwargs_type=guider_input_fields` to their parameter spec (`OutputParam`) when they are created and added to the pipeline state"
	),
	),
	]

	@torch.no_grad()
	def __call__(
	self, components: StableDiffusionXLModularPipeline, block_state: BlockState, i: int, t: int
	) -> PipelineState:
	# Map the keys we'll see on each `guider_state_batch` (e.g. guider_state_batch.prompt_embeds)
	# to the corresponding (cond, uncond) fields on block_state. (e.g. block_state.prompt_embeds, block_state.negative_prompt_embeds)
	guider_input_fields = {
	"prompt_embeds": ("prompt_embeds", "negative_prompt_embeds"),
	"time_ids": ("add_time_ids", "negative_add_time_ids"),
	"text_embeds": ("pooled_prompt_embeds", "negative_pooled_prompt_embeds"),
	"image_embeds": ("ip_adapter_embeds", "negative_ip_adapter_embeds"),
	}

	components.guider.set_state(step=i, num_inference_steps=block_state.num_inference_steps, timestep=t)

	# Prepare mini‐batches according to guidance method and `guider_input_fields`
	# Each guider_state_batch will have .prompt_embeds, .time_ids, text_embeds, image_embeds.
	# e.g. for CFG, we prepare two batches: one for uncond, one for cond
	# for first batch, guider_state_batch.prompt_embeds correspond to block_state.prompt_embeds
	# for second batch, guider_state_batch.prompt_embeds correspond to block_state.negative_prompt_embeds
	guider_state = components.guider.prepare_inputs(block_state, guider_input_fields)

	# run the denoiser for each guidance batch
	for guider_state_batch in guider_state:
	components.guider.prepare_models(components.unet)
	cond_kwargs = guider_state_batch.as_dict()
	cond_kwargs = {k: v for k, v in cond_kwargs.items() if k in guider_input_fields}
	prompt_embeds = cond_kwargs.pop("prompt_embeds")

	# Predict the noise residual
	# store the noise_pred in guider_state_batch so that we can apply guidance across all batches
	guider_state_batch.noise_pred = components.unet(
	block_state.scaled_latents,
	t,
	encoder_hidden_states=prompt_embeds,
	timestep_cond=block_state.timestep_cond,
	cross_attention_kwargs=block_state.cross_attention_kwargs,
	added_cond_kwargs=cond_kwargs,
	return_dict=False,
	)[0]
	components.guider.cleanup_models(components.unet)

	# Perform guidance
	block_state.noise_pred = components.guider(guider_state)[0]

	return components, block_state


	# loop step (2): denoise the latents with guidance (with controlnet)
	class StableDiffusionXLControlNetLoopDenoiser(ModularPipelineBlocks):
	model_name = "stable-diffusion-xl"

	@property
	def expected_components(self) -> List[ComponentSpec]:
	return [
	ComponentSpec(
	"guider",
	ClassifierFreeGuidance,
	config=FrozenDict({"guidance_scale": 7.5}),
	default_creation_method="from_config",
	),
	ComponentSpec("unet", UNet2DConditionModel),
	ComponentSpec("controlnet", ControlNetModel),
	]

	@property
	def description(self) -> str:
	return (
	"step within the denoising loop that denoise the latents with guidance (with controlnet). "
	"This block should be used to compose the `sub_blocks` attribute of a `LoopSequentialPipelineBlocks` "
	"object (e.g. `StableDiffusionXLDenoiseLoopWrapper`)"
	)

	@property
	def inputs(self) -> List[Tuple[str, Any]]:
	return [
	InputParam("cross_attention_kwargs"),
	InputParam(
	"controlnet_cond",
	required=True,
	type_hint=torch.Tensor,
	description="The control image to use for the denoising process. Can be generated in prepare_controlnet_inputs step.",
	),
	InputParam(
	"conditioning_scale",
	type_hint=float,
	description="The controlnet conditioning scale value to use for the denoising process. Can be generated in prepare_controlnet_inputs step.",
	),
	InputParam(
	"guess_mode",
	required=True,
	type_hint=bool,
	description="The guess mode value to use for the denoising process. Can be generated in prepare_controlnet_inputs step.",
	),
	InputParam(
	"controlnet_keep",
	required=True,
	type_hint=List[float],
	description="The controlnet keep values to use for the denoising process. Can be generated in prepare_controlnet_inputs step.",
	),
	InputParam(
	"timestep_cond",
	type_hint=Optional[torch.Tensor],
	description="The guidance scale embedding to use for Latent Consistency Models(LCMs), can be generated by prepare_additional_conditioning step",
	),
	InputParam(
	"num_inference_steps",
	required=True,
	type_hint=int,
	description="The number of inference steps to use for the denoising process. Can be generated in set_timesteps step.",
	),
	InputParam(
	kwargs_type="guider_input_fields",
	description=(
	"All conditional model inputs that need to be prepared with guider. "
	"It should contain prompt_embeds/negative_prompt_embeds, "
	"add_time_ids/negative_add_time_ids, "
	"pooled_prompt_embeds/negative_pooled_prompt_embeds, "
	"and ip_adapter_embeds/negative_ip_adapter_embeds (optional)."
	"please add `kwargs_type=guider_input_fields` to their parameter spec (`OutputParam`) when they are created and added to the pipeline state"
	),
	),
	InputParam(
	kwargs_type="controlnet_kwargs",
	description=(
	"additional kwargs for controlnet (e.g. control_type_idx and control_type from the controlnet union input step )"
	"please add `kwargs_type=controlnet_kwargs` to their parameter spec (`OutputParam`) when they are created and added to the pipeline state"
	),
	),
	]

	@staticmethod
	def prepare_extra_kwargs(func, exclude_kwargs=[], **kwargs):
	accepted_kwargs = set(inspect.signature(func).parameters.keys())
	extra_kwargs = {}
	for key, value in kwargs.items():
	if key in accepted_kwargs and key not in exclude_kwargs:
	extra_kwargs[key] = value

	return extra_kwargs

	@torch.no_grad()
	def __call__(self, components: StableDiffusionXLModularPipeline, block_state: BlockState, i: int, t: int):
	extra_controlnet_kwargs = self.prepare_extra_kwargs(
	components.controlnet.forward, **block_state.controlnet_kwargs
	)

	# Map the keys we'll see on each `guider_state_batch` (e.g. guider_state_batch.prompt_embeds)
	# to the corresponding (cond, uncond) fields on block_state. (e.g. block_state.prompt_embeds, block_state.negative_prompt_embeds)
	guider_input_fields = {
	"prompt_embeds": ("prompt_embeds", "negative_prompt_embeds"),
	"time_ids": ("add_time_ids", "negative_add_time_ids"),
	"text_embeds": ("pooled_prompt_embeds", "negative_pooled_prompt_embeds"),
	"image_embeds": ("ip_adapter_embeds", "negative_ip_adapter_embeds"),
	}

	# cond_scale for the timestep (controlnet input)
	if isinstance(block_state.controlnet_keep[i], list):
	block_state.cond_scale = [
	c * s for c, s in zip(block_state.conditioning_scale, block_state.controlnet_keep[i])
	]
	else:
	controlnet_cond_scale = block_state.conditioning_scale
	if isinstance(controlnet_cond_scale, list):
	controlnet_cond_scale = controlnet_cond_scale[0]
	block_state.cond_scale = controlnet_cond_scale * block_state.controlnet_keep[i]

	# default controlnet output/unet input for guess mode + conditional path
	block_state.down_block_res_samples_zeros = None
	block_state.mid_block_res_sample_zeros = None

	# guided denoiser step
	components.guider.set_state(step=i, num_inference_steps=block_state.num_inference_steps, timestep=t)

	# Prepare mini‐batches according to guidance method and `guider_input_fields`
	# Each guider_state_batch will have .prompt_embeds, .time_ids, text_embeds, image_embeds.
	# e.g. for CFG, we prepare two batches: one for uncond, one for cond
	# for first batch, guider_state_batch.prompt_embeds correspond to block_state.prompt_embeds
	# for second batch, guider_state_batch.prompt_embeds correspond to block_state.negative_prompt_embeds
	guider_state = components.guider.prepare_inputs(block_state, guider_input_fields)

	# run the denoiser for each guidance batch
	for guider_state_batch in guider_state:
	components.guider.prepare_models(components.unet)

	# Prepare additional conditionings
	added_cond_kwargs = {
	"text_embeds": guider_state_batch.text_embeds,
	"time_ids": guider_state_batch.time_ids,
	}
	if hasattr(guider_state_batch, "image_embeds") and guider_state_batch.image_embeds is not None:
	added_cond_kwargs["image_embeds"] = guider_state_batch.image_embeds

	# Prepare controlnet additional conditionings
	controlnet_added_cond_kwargs = {
	"text_embeds": guider_state_batch.text_embeds,
	"time_ids": guider_state_batch.time_ids,
	}
	# run controlnet for the guidance batch
	if block_state.guess_mode and not components.guider.is_conditional:
	# guider always run uncond batch first, so these tensors should be set already
	down_block_res_samples = block_state.down_block_res_samples_zeros
	mid_block_res_sample = block_state.mid_block_res_sample_zeros
	else:
	down_block_res_samples, mid_block_res_sample = components.controlnet(
	block_state.scaled_latents,
	t,
	encoder_hidden_states=guider_state_batch.prompt_embeds,
	controlnet_cond=block_state.controlnet_cond,
	conditioning_scale=block_state.cond_scale,
	guess_mode=block_state.guess_mode,
	added_cond_kwargs=controlnet_added_cond_kwargs,
	return_dict=False,
	**extra_controlnet_kwargs,
	)

	# assign it to block_state so it will be available for the uncond guidance batch
	if block_state.down_block_res_samples_zeros is None:
	block_state.down_block_res_samples_zeros = [torch.zeros_like(d) for d in down_block_res_samples]
	if block_state.mid_block_res_sample_zeros is None:
	block_state.mid_block_res_sample_zeros = torch.zeros_like(mid_block_res_sample)

	# Predict the noise
	# store the noise_pred in guider_state_batch so we can apply guidance across all batches
	guider_state_batch.noise_pred = components.unet(
	block_state.scaled_latents,
	t,
	encoder_hidden_states=guider_state_batch.prompt_embeds,
	timestep_cond=block_state.timestep_cond,
	cross_attention_kwargs=block_state.cross_attention_kwargs,
	added_cond_kwargs=added_cond_kwargs,
	down_block_additional_residuals=down_block_res_samples,
	mid_block_additional_residual=mid_block_res_sample,
	return_dict=False,
	)[0]
	components.guider.cleanup_models(components.unet)

	# Perform guidance
	block_state.noise_pred = components.guider(guider_state)[0]

	return components, block_state


	# loop step (3): scheduler step to update latents
	class StableDiffusionXLLoopAfterDenoiser(ModularPipelineBlocks):
	model_name = "stable-diffusion-xl"

	@property
	def expected_components(self) -> List[ComponentSpec]:
	return [
	ComponentSpec("scheduler", EulerDiscreteScheduler),
	]

	@property
	def description(self) -> str:
	return (
	"step within the denoising loop that update the latents. "
	"This block should be used to compose the `sub_blocks` attribute of a `LoopSequentialPipelineBlocks` "
	"object (e.g. `StableDiffusionXLDenoiseLoopWrapper`)"
	)

	@property
	def inputs(self) -> List[Tuple[str, Any]]:
	return [
	InputParam("eta", default=0.0),
	InputParam("generator"),
	]

	@property
	def intermediate_outputs(self) -> List[OutputParam]:
	return [OutputParam("latents", type_hint=torch.Tensor, description="The denoised latents")]

	# YiYi TODO: move this out of here
	@staticmethod
	def prepare_extra_kwargs(func, exclude_kwargs=[], **kwargs):
	accepted_kwargs = set(inspect.signature(func).parameters.keys())
	extra_kwargs = {}
	for key, value in kwargs.items():
	if key in accepted_kwargs and key not in exclude_kwargs:
	extra_kwargs[key] = value

	return extra_kwargs

	@torch.no_grad()
	def __call__(self, components: StableDiffusionXLModularPipeline, block_state: BlockState, i: int, t: int):
	# Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
	block_state.extra_step_kwargs = self.prepare_extra_kwargs(
	components.scheduler.step, generator=block_state.generator, eta=block_state.eta
	)

	# Perform scheduler step using the predicted output
	block_state.latents_dtype = block_state.latents.dtype
	block_state.latents = components.scheduler.step(
	block_state.noise_pred,
	t,
	block_state.latents,
	**block_state.extra_step_kwargs,
	return_dict=False,
	)[0]

	if block_state.latents.dtype != block_state.latents_dtype:
	if torch.backends.mps.is_available():
	# some platforms (eg. apple mps) misbehave due to a pytorch bug: https://github.com/pytorch/pytorch/pull/99272
	block_state.latents = block_state.latents.to(block_state.latents_dtype)

	return components, block_state


	# loop step (3): scheduler step to update latents (with inpainting)
	class StableDiffusionXLInpaintLoopAfterDenoiser(ModularPipelineBlocks):
	model_name = "stable-diffusion-xl"

	@property
	def expected_components(self) -> List[ComponentSpec]:
	return [
	ComponentSpec("scheduler", EulerDiscreteScheduler),
	ComponentSpec("unet", UNet2DConditionModel),
	]

	@property
	def description(self) -> str:
	return (
	"step within the denoising loop that update the latents (for inpainting workflow only). "
	"This block should be used to compose the `sub_blocks` attribute of a `LoopSequentialPipelineBlocks` "
	"object (e.g. `StableDiffusionXLDenoiseLoopWrapper`)"
	)

	@property
	def inputs(self) -> List[Tuple[str, Any]]:
	return [
	InputParam("eta", default=0.0),
	InputParam("generator"),
	InputParam(
	"timesteps",
	required=True,
	type_hint=torch.Tensor,
	description="The timesteps to use for the denoising process. Can be generated in set_timesteps step.",
	),
	InputParam(
	"mask",
	type_hint=Optional[torch.Tensor],
	description="The mask to use for the denoising process, for inpainting task only. Can be generated in vae_encode or prepare_latent step.",
	),
	InputParam(
	"noise",
	type_hint=Optional[torch.Tensor],
	description="The noise added to the image latents, for inpainting task only. Can be generated in prepare_latent step.",
	),
	InputParam(
	"image_latents",
	type_hint=Optional[torch.Tensor],
	description="The image latents to use for the denoising process, for inpainting/image-to-image task only. Can be generated in vae_encode or prepare_latent step.",
	),
	]

	@property
	def intermediate_outputs(self) -> List[OutputParam]:
	return [OutputParam("latents", type_hint=torch.Tensor, description="The denoised latents")]

	@staticmethod
	def prepare_extra_kwargs(func, exclude_kwargs=[], **kwargs):
	accepted_kwargs = set(inspect.signature(func).parameters.keys())
	extra_kwargs = {}
	for key, value in kwargs.items():
	if key in accepted_kwargs and key not in exclude_kwargs:
	extra_kwargs[key] = value

	return extra_kwargs

	def check_inputs(self, components, block_state):
	if components.num_channels_unet == 4:
	if block_state.image_latents is None:
	raise ValueError(f"image_latents is required for this step {self.__class__.__name__}")
	if block_state.mask is None:
	raise ValueError(f"mask is required for this step {self.__class__.__name__}")
	if block_state.noise is None:
	raise ValueError(f"noise is required for this step {self.__class__.__name__}")

	@torch.no_grad()
	def __call__(self, components: StableDiffusionXLModularPipeline, block_state: BlockState, i: int, t: int):
	self.check_inputs(components, block_state)

	# Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
	block_state.extra_step_kwargs = self.prepare_extra_kwargs(
	components.scheduler.step, generator=block_state.generator, eta=block_state.eta
	)

	# Perform scheduler step using the predicted output
	block_state.latents_dtype = block_state.latents.dtype
	block_state.latents = components.scheduler.step(
	block_state.noise_pred,
	t,
	block_state.latents,
	**block_state.extra_step_kwargs,
	return_dict=False,
	)[0]

	if block_state.latents.dtype != block_state.latents_dtype:
	if torch.backends.mps.is_available():
	# some platforms (eg. apple mps) misbehave due to a pytorch bug: https://github.com/pytorch/pytorch/pull/99272
	block_state.latents = block_state.latents.to(block_state.latents_dtype)

	# adjust latent for inpainting
	if components.num_channels_unet == 4:
	block_state.init_latents_proper = block_state.image_latents
	if i < len(block_state.timesteps) - 1:
	block_state.noise_timestep = block_state.timesteps[i + 1]
	block_state.init_latents_proper = components.scheduler.add_noise(
	block_state.init_latents_proper, block_state.noise, torch.tensor([block_state.noise_timestep])
	)

	block_state.latents = (
	1 - block_state.mask
	) * block_state.init_latents_proper + block_state.mask * block_state.latents

	return components, block_state


	# the loop wrapper that iterates over the timesteps
	class StableDiffusionXLDenoiseLoopWrapper(LoopSequentialPipelineBlocks):
	model_name = "stable-diffusion-xl"

	@property
	def description(self) -> str:
	return (
	"Pipeline block that iteratively denoise the latents over `timesteps`. "
	"The specific steps with each iteration can be customized with `sub_blocks` attributes"
	)

	@property
	def loop_expected_components(self) -> List[ComponentSpec]:
	return [
	ComponentSpec(
	"guider",
	ClassifierFreeGuidance,
	config=FrozenDict({"guidance_scale": 7.5}),
	default_creation_method="from_config",
	),
	ComponentSpec("scheduler", EulerDiscreteScheduler),
	ComponentSpec("unet", UNet2DConditionModel),
	]

	@property
	def loop_inputs(self) -> List[InputParam]:
	return [
	InputParam(
	"timesteps",
	required=True,
	type_hint=torch.Tensor,
	description="The timesteps to use for the denoising process. Can be generated in set_timesteps step.",
	),
	InputParam(
	"num_inference_steps",
	required=True,
	type_hint=int,
	description="The number of inference steps to use for the denoising process. Can be generated in set_timesteps step.",
	),
	]

	@torch.no_grad()
	def __call__(self, components: StableDiffusionXLModularPipeline, state: PipelineState) -> PipelineState:
	block_state = self.get_block_state(state)

	block_state.disable_guidance = True if components.unet.config.time_cond_proj_dim is not None else False
	if block_state.disable_guidance:
	components.guider.disable()
	else:
	components.guider.enable()

	block_state.num_warmup_steps = max(
	len(block_state.timesteps) - block_state.num_inference_steps * components.scheduler.order, 0
	)

	with self.progress_bar(total=block_state.num_inference_steps) as progress_bar:
	for i, t in enumerate(block_state.timesteps):
	components, block_state = self.loop_step(components, block_state, i=i, t=t)
	if i == len(block_state.timesteps) - 1 or (
	(i + 1) > block_state.num_warmup_steps and (i + 1) % components.scheduler.order == 0
	):
	progress_bar.update()

	self.set_block_state(state, block_state)

	return components, state


	# composing the denoising loops
	class StableDiffusionXLDenoiseStep(StableDiffusionXLDenoiseLoopWrapper):
	block_classes = [
	StableDiffusionXLLoopBeforeDenoiser,
	StableDiffusionXLLoopDenoiser,
	StableDiffusionXLLoopAfterDenoiser,
	]
	block_names = ["before_denoiser", "denoiser", "after_denoiser"]

	@property
	def description(self) -> str:
	return (
	"Denoise step that iteratively denoise the latents. \n"
	"Its loop logic is defined in `StableDiffusionXLDenoiseLoopWrapper.__call__` method \n"
	"At each iteration, it runs blocks defined in `sub_blocks` sequentially:\n"
	" - `StableDiffusionXLLoopBeforeDenoiser`\n"
	" - `StableDiffusionXLLoopDenoiser`\n"
	" - `StableDiffusionXLLoopAfterDenoiser`\n"
	"This block supports both text2img and img2img tasks."
	)


	# control_cond
	class StableDiffusionXLControlNetDenoiseStep(StableDiffusionXLDenoiseLoopWrapper):
	block_classes = [
	StableDiffusionXLLoopBeforeDenoiser,
	StableDiffusionXLControlNetLoopDenoiser,
	StableDiffusionXLLoopAfterDenoiser,
	]
	block_names = ["before_denoiser", "denoiser", "after_denoiser"]

	@property
	def description(self) -> str:
	return (
	"Denoise step that iteratively denoise the latents with controlnet. \n"
	"Its loop logic is defined in `StableDiffusionXLDenoiseLoopWrapper.__call__` method \n"
	"At each iteration, it runs blocks defined in `sub_blocks` sequentially:\n"
	" - `StableDiffusionXLLoopBeforeDenoiser`\n"
	" - `StableDiffusionXLControlNetLoopDenoiser`\n"
	" - `StableDiffusionXLLoopAfterDenoiser`\n"
	"This block supports using controlnet for both text2img and img2img tasks."
	)


	# mask
	class StableDiffusionXLInpaintDenoiseStep(StableDiffusionXLDenoiseLoopWrapper):
	block_classes = [
	StableDiffusionXLInpaintLoopBeforeDenoiser,
	StableDiffusionXLLoopDenoiser,
	StableDiffusionXLInpaintLoopAfterDenoiser,
	]
	block_names = ["before_denoiser", "denoiser", "after_denoiser"]

	@property
	def description(self) -> str:
	return (
	"Denoise step that iteratively denoise the latents(for inpainting task only). \n"
	"Its loop logic is defined in `StableDiffusionXLDenoiseLoopWrapper.__call__` method \n"
	"At each iteration, it runs blocks defined in `sub_blocks` sequentially:\n"
	" - `StableDiffusionXLInpaintLoopBeforeDenoiser`\n"
	" - `StableDiffusionXLLoopDenoiser`\n"
	" - `StableDiffusionXLInpaintLoopAfterDenoiser`\n"
	"This block onlysupports inpainting tasks."
	)


	# control_cond + mask
	class StableDiffusionXLInpaintControlNetDenoiseStep(StableDiffusionXLDenoiseLoopWrapper):
	block_classes = [
	StableDiffusionXLInpaintLoopBeforeDenoiser,
	StableDiffusionXLControlNetLoopDenoiser,
	StableDiffusionXLInpaintLoopAfterDenoiser,
	]
	block_names = ["before_denoiser", "denoiser", "after_denoiser"]

	@property
	def description(self) -> str:
	return (
	"Denoise step that iteratively denoise the latents(for inpainting task only) with controlnet. \n"
	"Its loop logic is defined in `StableDiffusionXLDenoiseLoopWrapper.__call__` method \n"
	"At each iteration, it runs blocks defined in `sub_blocks` sequentially:\n"
	" - `StableDiffusionXLInpaintLoopBeforeDenoiser`\n"
	" - `StableDiffusionXLControlNetLoopDenoiser`\n"
	" - `StableDiffusionXLInpaintLoopAfterDenoiser`\n"
	"This block only supports using controlnet for inpainting tasks."
	)