QwenTest
/
pythonProject
/diffusers-main
/build
/lib
/diffusers
/modular_pipelines
/qwenimage
/modular_blocks.py
| # Copyright 2025 Qwen-Image Team and The HuggingFace Team. All rights reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| from ...utils import logging | |
| from ..modular_pipeline import AutoPipelineBlocks, SequentialPipelineBlocks | |
| from ..modular_pipeline_utils import InsertableDict | |
| from .before_denoise import ( | |
| QwenImageControlNetBeforeDenoiserStep, | |
| QwenImageCreateMaskLatentsStep, | |
| QwenImageEditRoPEInputsStep, | |
| QwenImagePrepareLatentsStep, | |
| QwenImagePrepareLatentsWithStrengthStep, | |
| QwenImageRoPEInputsStep, | |
| QwenImageSetTimestepsStep, | |
| QwenImageSetTimestepsWithStrengthStep, | |
| ) | |
| from .decoders import QwenImageDecoderStep, QwenImageInpaintProcessImagesOutputStep, QwenImageProcessImagesOutputStep | |
| from .denoise import ( | |
| QwenImageControlNetDenoiseStep, | |
| QwenImageDenoiseStep, | |
| QwenImageEditDenoiseStep, | |
| QwenImageEditInpaintDenoiseStep, | |
| QwenImageInpaintControlNetDenoiseStep, | |
| QwenImageInpaintDenoiseStep, | |
| QwenImageLoopBeforeDenoiserControlNet, | |
| ) | |
| from .encoders import ( | |
| QwenImageControlNetVaeEncoderStep, | |
| QwenImageEditResizeDynamicStep, | |
| QwenImageEditTextEncoderStep, | |
| QwenImageInpaintProcessImagesInputStep, | |
| QwenImageProcessImagesInputStep, | |
| QwenImageTextEncoderStep, | |
| QwenImageVaeEncoderDynamicStep, | |
| ) | |
| from .inputs import QwenImageControlNetInputsStep, QwenImageInputsDynamicStep, QwenImageTextInputsStep | |
| logger = logging.get_logger(__name__) | |
| # 1. QwenImage | |
| ## 1.1 QwenImage/text2image | |
| #### QwenImage/decode | |
| #### (standard decode step works for most tasks except for inpaint) | |
| QwenImageDecodeBlocks = InsertableDict( | |
| [ | |
| ("decode", QwenImageDecoderStep()), | |
| ("postprocess", QwenImageProcessImagesOutputStep()), | |
| ] | |
| ) | |
| class QwenImageDecodeStep(SequentialPipelineBlocks): | |
| model_name = "qwenimage" | |
| block_classes = QwenImageDecodeBlocks.values() | |
| block_names = QwenImageDecodeBlocks.keys() | |
| def description(self): | |
| return "Decode step that decodes the latents to images and postprocess the generated image." | |
| #### QwenImage/text2image presets | |
| TEXT2IMAGE_BLOCKS = InsertableDict( | |
| [ | |
| ("text_encoder", QwenImageTextEncoderStep()), | |
| ("input", QwenImageTextInputsStep()), | |
| ("prepare_latents", QwenImagePrepareLatentsStep()), | |
| ("set_timesteps", QwenImageSetTimestepsStep()), | |
| ("prepare_rope_inputs", QwenImageRoPEInputsStep()), | |
| ("denoise", QwenImageDenoiseStep()), | |
| ("decode", QwenImageDecodeStep()), | |
| ] | |
| ) | |
| ## 1.2 QwenImage/inpaint | |
| #### QwenImage/inpaint vae encoder | |
| QwenImageInpaintVaeEncoderBlocks = InsertableDict( | |
| [ | |
| ( | |
| "preprocess", | |
| QwenImageInpaintProcessImagesInputStep, | |
| ), # image, mask_image -> processed_image, processed_mask_image, mask_overlay_kwargs | |
| ("encode", QwenImageVaeEncoderDynamicStep()), # processed_image -> image_latents | |
| ] | |
| ) | |
| class QwenImageInpaintVaeEncoderStep(SequentialPipelineBlocks): | |
| model_name = "qwenimage" | |
| block_classes = QwenImageInpaintVaeEncoderBlocks.values() | |
| block_names = QwenImageInpaintVaeEncoderBlocks.keys() | |
| def description(self) -> str: | |
| return ( | |
| "This step is used for processing image and mask inputs for inpainting tasks. It:\n" | |
| " - Resizes the image to the target size, based on `height` and `width`.\n" | |
| " - Processes and updates `image` and `mask_image`.\n" | |
| " - Creates `image_latents`." | |
| ) | |
| #### QwenImage/inpaint inputs | |
| QwenImageInpaintInputBlocks = InsertableDict( | |
| [ | |
| ("text_inputs", QwenImageTextInputsStep()), # default step to process text embeddings | |
| ( | |
| "additional_inputs", | |
| QwenImageInputsDynamicStep( | |
| image_latent_inputs=["image_latents"], additional_batch_inputs=["processed_mask_image"] | |
| ), | |
| ), | |
| ] | |
| ) | |
| class QwenImageInpaintInputStep(SequentialPipelineBlocks): | |
| model_name = "qwenimage" | |
| block_classes = QwenImageInpaintInputBlocks.values() | |
| block_names = QwenImageInpaintInputBlocks.keys() | |
| def description(self): | |
| return "Input step that prepares the inputs for the inpainting denoising step. It:\n" | |
| " - make sure the text embeddings have consistent batch size as well as the additional inputs (`image_latents` and `processed_mask_image`).\n" | |
| " - update height/width based `image_latents`, patchify `image_latents`." | |
| # QwenImage/inpaint prepare latents | |
| QwenImageInpaintPrepareLatentsBlocks = InsertableDict( | |
| [ | |
| ("add_noise_to_latents", QwenImagePrepareLatentsWithStrengthStep()), | |
| ("create_mask_latents", QwenImageCreateMaskLatentsStep()), | |
| ] | |
| ) | |
| class QwenImageInpaintPrepareLatentsStep(SequentialPipelineBlocks): | |
| model_name = "qwenimage" | |
| block_classes = QwenImageInpaintPrepareLatentsBlocks.values() | |
| block_names = QwenImageInpaintPrepareLatentsBlocks.keys() | |
| def description(self) -> str: | |
| return ( | |
| "This step prepares the latents/image_latents and mask inputs for the inpainting denoising step. It:\n" | |
| " - Add noise to the image latents to create the latents input for the denoiser.\n" | |
| " - Create the pachified latents `mask` based on the processedmask image.\n" | |
| ) | |
| #### QwenImage/inpaint decode | |
| QwenImageInpaintDecodeBlocks = InsertableDict( | |
| [ | |
| ("decode", QwenImageDecoderStep()), | |
| ("postprocess", QwenImageInpaintProcessImagesOutputStep()), | |
| ] | |
| ) | |
| class QwenImageInpaintDecodeStep(SequentialPipelineBlocks): | |
| model_name = "qwenimage" | |
| block_classes = QwenImageInpaintDecodeBlocks.values() | |
| block_names = QwenImageInpaintDecodeBlocks.keys() | |
| def description(self): | |
| return "Decode step that decodes the latents to images and postprocess the generated image, optional apply the mask overally to the original image." | |
| #### QwenImage/inpaint presets | |
| INPAINT_BLOCKS = InsertableDict( | |
| [ | |
| ("text_encoder", QwenImageTextEncoderStep()), | |
| ("vae_encoder", QwenImageInpaintVaeEncoderStep()), | |
| ("input", QwenImageInpaintInputStep()), | |
| ("prepare_latents", QwenImagePrepareLatentsStep()), | |
| ("set_timesteps", QwenImageSetTimestepsWithStrengthStep()), | |
| ("prepare_inpaint_latents", QwenImageInpaintPrepareLatentsStep()), | |
| ("prepare_rope_inputs", QwenImageRoPEInputsStep()), | |
| ("denoise", QwenImageInpaintDenoiseStep()), | |
| ("decode", QwenImageInpaintDecodeStep()), | |
| ] | |
| ) | |
| ## 1.3 QwenImage/img2img | |
| #### QwenImage/img2img vae encoder | |
| QwenImageImg2ImgVaeEncoderBlocks = InsertableDict( | |
| [ | |
| ("preprocess", QwenImageProcessImagesInputStep()), | |
| ("encode", QwenImageVaeEncoderDynamicStep()), | |
| ] | |
| ) | |
| class QwenImageImg2ImgVaeEncoderStep(SequentialPipelineBlocks): | |
| model_name = "qwenimage" | |
| block_classes = QwenImageImg2ImgVaeEncoderBlocks.values() | |
| block_names = QwenImageImg2ImgVaeEncoderBlocks.keys() | |
| def description(self) -> str: | |
| return "Vae encoder step that preprocess andencode the image inputs into their latent representations." | |
| #### QwenImage/img2img inputs | |
| QwenImageImg2ImgInputBlocks = InsertableDict( | |
| [ | |
| ("text_inputs", QwenImageTextInputsStep()), # default step to process text embeddings | |
| ("additional_inputs", QwenImageInputsDynamicStep(image_latent_inputs=["image_latents"])), | |
| ] | |
| ) | |
| class QwenImageImg2ImgInputStep(SequentialPipelineBlocks): | |
| model_name = "qwenimage" | |
| block_classes = QwenImageImg2ImgInputBlocks.values() | |
| block_names = QwenImageImg2ImgInputBlocks.keys() | |
| def description(self): | |
| return "Input step that prepares the inputs for the img2img denoising step. It:\n" | |
| " - make sure the text embeddings have consistent batch size as well as the additional inputs (`image_latents`).\n" | |
| " - update height/width based `image_latents`, patchify `image_latents`." | |
| #### QwenImage/img2img presets | |
| IMAGE2IMAGE_BLOCKS = InsertableDict( | |
| [ | |
| ("text_encoder", QwenImageTextEncoderStep()), | |
| ("vae_encoder", QwenImageImg2ImgVaeEncoderStep()), | |
| ("input", QwenImageImg2ImgInputStep()), | |
| ("prepare_latents", QwenImagePrepareLatentsStep()), | |
| ("set_timesteps", QwenImageSetTimestepsWithStrengthStep()), | |
| ("prepare_img2img_latents", QwenImagePrepareLatentsWithStrengthStep()), | |
| ("prepare_rope_inputs", QwenImageRoPEInputsStep()), | |
| ("denoise", QwenImageDenoiseStep()), | |
| ("decode", QwenImageDecodeStep()), | |
| ] | |
| ) | |
| ## 1.4 QwenImage/controlnet | |
| #### QwenImage/controlnet presets | |
| CONTROLNET_BLOCKS = InsertableDict( | |
| [ | |
| ("controlnet_vae_encoder", QwenImageControlNetVaeEncoderStep()), # vae encoder step for control_image | |
| ("controlnet_inputs", QwenImageControlNetInputsStep()), # additional input step for controlnet | |
| ( | |
| "controlnet_before_denoise", | |
| QwenImageControlNetBeforeDenoiserStep(), | |
| ), # before denoise step (after set_timesteps step) | |
| ( | |
| "controlnet_denoise_loop_before", | |
| QwenImageLoopBeforeDenoiserControlNet(), | |
| ), # controlnet loop step (insert before the denoiseloop_denoiser) | |
| ] | |
| ) | |
| ## 1.5 QwenImage/auto encoders | |
| #### for inpaint and img2img tasks | |
| class QwenImageAutoVaeEncoderStep(AutoPipelineBlocks): | |
| block_classes = [QwenImageInpaintVaeEncoderStep, QwenImageImg2ImgVaeEncoderStep] | |
| block_names = ["inpaint", "img2img"] | |
| block_trigger_inputs = ["mask_image", "image"] | |
| def description(self): | |
| return ( | |
| "Vae encoder step that encode the image inputs into their latent representations.\n" | |
| + "This is an auto pipeline block.\n" | |
| + " - `QwenImageInpaintVaeEncoderStep` (inpaint) is used when `mask_image` is provided.\n" | |
| + " - `QwenImageImg2ImgVaeEncoderStep` (img2img) is used when `image` is provided.\n" | |
| + " - if `mask_image` or `image` is not provided, step will be skipped." | |
| ) | |
| # for controlnet tasks | |
| class QwenImageOptionalControlNetVaeEncoderStep(AutoPipelineBlocks): | |
| block_classes = [QwenImageControlNetVaeEncoderStep] | |
| block_names = ["controlnet"] | |
| block_trigger_inputs = ["control_image"] | |
| def description(self): | |
| return ( | |
| "Vae encoder step that encode the image inputs into their latent representations.\n" | |
| + "This is an auto pipeline block.\n" | |
| + " - `QwenImageControlNetVaeEncoderStep` (controlnet) is used when `control_image` is provided.\n" | |
| + " - if `control_image` is not provided, step will be skipped." | |
| ) | |
| ## 1.6 QwenImage/auto inputs | |
| # text2image/inpaint/img2img | |
| class QwenImageAutoInputStep(AutoPipelineBlocks): | |
| block_classes = [QwenImageInpaintInputStep, QwenImageImg2ImgInputStep, QwenImageTextInputsStep] | |
| block_names = ["inpaint", "img2img", "text2image"] | |
| block_trigger_inputs = ["processed_mask_image", "image_latents", None] | |
| def description(self): | |
| return ( | |
| "Input step that standardize the inputs for the denoising step, e.g. make sure inputs have consistent batch size, and patchified. \n" | |
| " This is an auto pipeline block that works for text2image/inpaint/img2img tasks.\n" | |
| + " - `QwenImageInpaintInputStep` (inpaint) is used when `processed_mask_image` is provided.\n" | |
| + " - `QwenImageImg2ImgInputStep` (img2img) is used when `image_latents` is provided.\n" | |
| + " - `QwenImageTextInputsStep` (text2image) is used when both `processed_mask_image` and `image_latents` are not provided.\n" | |
| ) | |
| # controlnet | |
| class QwenImageOptionalControlNetInputStep(AutoPipelineBlocks): | |
| block_classes = [QwenImageControlNetInputsStep] | |
| block_names = ["controlnet"] | |
| block_trigger_inputs = ["control_image_latents"] | |
| def description(self): | |
| return ( | |
| "Controlnet input step that prepare the control_image_latents input.\n" | |
| + "This is an auto pipeline block.\n" | |
| + " - `QwenImageControlNetInputsStep` (controlnet) is used when `control_image_latents` is provided.\n" | |
| + " - if `control_image_latents` is not provided, step will be skipped." | |
| ) | |
| ## 1.7 QwenImage/auto before denoise step | |
| # compose the steps into a BeforeDenoiseStep for text2image/img2img/inpaint tasks before combine into an auto step | |
| # QwenImage/text2image before denoise | |
| QwenImageText2ImageBeforeDenoiseBlocks = InsertableDict( | |
| [ | |
| ("prepare_latents", QwenImagePrepareLatentsStep()), | |
| ("set_timesteps", QwenImageSetTimestepsStep()), | |
| ("prepare_rope_inputs", QwenImageRoPEInputsStep()), | |
| ] | |
| ) | |
| class QwenImageText2ImageBeforeDenoiseStep(SequentialPipelineBlocks): | |
| model_name = "qwenimage" | |
| block_classes = QwenImageText2ImageBeforeDenoiseBlocks.values() | |
| block_names = QwenImageText2ImageBeforeDenoiseBlocks.keys() | |
| def description(self): | |
| return "Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for text2image task." | |
| # QwenImage/inpaint before denoise | |
| QwenImageInpaintBeforeDenoiseBlocks = InsertableDict( | |
| [ | |
| ("prepare_latents", QwenImagePrepareLatentsStep()), | |
| ("set_timesteps", QwenImageSetTimestepsWithStrengthStep()), | |
| ("prepare_inpaint_latents", QwenImageInpaintPrepareLatentsStep()), | |
| ("prepare_rope_inputs", QwenImageRoPEInputsStep()), | |
| ] | |
| ) | |
| class QwenImageInpaintBeforeDenoiseStep(SequentialPipelineBlocks): | |
| model_name = "qwenimage" | |
| block_classes = QwenImageInpaintBeforeDenoiseBlocks.values() | |
| block_names = QwenImageInpaintBeforeDenoiseBlocks.keys() | |
| def description(self): | |
| return "Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint task." | |
| # QwenImage/img2img before denoise | |
| QwenImageImg2ImgBeforeDenoiseBlocks = InsertableDict( | |
| [ | |
| ("prepare_latents", QwenImagePrepareLatentsStep()), | |
| ("set_timesteps", QwenImageSetTimestepsWithStrengthStep()), | |
| ("prepare_img2img_latents", QwenImagePrepareLatentsWithStrengthStep()), | |
| ("prepare_rope_inputs", QwenImageRoPEInputsStep()), | |
| ] | |
| ) | |
| class QwenImageImg2ImgBeforeDenoiseStep(SequentialPipelineBlocks): | |
| model_name = "qwenimage" | |
| block_classes = QwenImageImg2ImgBeforeDenoiseBlocks.values() | |
| block_names = QwenImageImg2ImgBeforeDenoiseBlocks.keys() | |
| def description(self): | |
| return "Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img task." | |
| # auto before_denoise step for text2image, inpaint, img2img tasks | |
| class QwenImageAutoBeforeDenoiseStep(AutoPipelineBlocks): | |
| block_classes = [ | |
| QwenImageInpaintBeforeDenoiseStep, | |
| QwenImageImg2ImgBeforeDenoiseStep, | |
| QwenImageText2ImageBeforeDenoiseStep, | |
| ] | |
| block_names = ["inpaint", "img2img", "text2image"] | |
| block_trigger_inputs = ["processed_mask_image", "image_latents", None] | |
| def description(self): | |
| return ( | |
| "Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step.\n" | |
| + "This is an auto pipeline block that works for text2img, inpainting, img2img tasks.\n" | |
| + " - `QwenImageInpaintBeforeDenoiseStep` (inpaint) is used when `processed_mask_image` is provided.\n" | |
| + " - `QwenImageImg2ImgBeforeDenoiseStep` (img2img) is used when `image_latents` is provided.\n" | |
| + " - `QwenImageText2ImageBeforeDenoiseStep` (text2image) is used when both `processed_mask_image` and `image_latents` are not provided.\n" | |
| ) | |
| # auto before_denoise step for controlnet tasks | |
| class QwenImageOptionalControlNetBeforeDenoiseStep(AutoPipelineBlocks): | |
| block_classes = [QwenImageControlNetBeforeDenoiserStep] | |
| block_names = ["controlnet"] | |
| block_trigger_inputs = ["control_image_latents"] | |
| def description(self): | |
| return ( | |
| "Controlnet before denoise step that prepare the controlnet input.\n" | |
| + "This is an auto pipeline block.\n" | |
| + " - `QwenImageControlNetBeforeDenoiserStep` (controlnet) is used when `control_image_latents` is provided.\n" | |
| + " - if `control_image_latents` is not provided, step will be skipped." | |
| ) | |
| ## 1.8 QwenImage/auto denoise | |
| # auto denoise step for controlnet tasks: works for all tasks with controlnet | |
| class QwenImageControlNetAutoDenoiseStep(AutoPipelineBlocks): | |
| block_classes = [QwenImageInpaintControlNetDenoiseStep, QwenImageControlNetDenoiseStep] | |
| block_names = ["inpaint_denoise", "denoise"] | |
| block_trigger_inputs = ["mask", None] | |
| def description(self): | |
| return ( | |
| "Controlnet step during the denoising process. \n" | |
| " This is an auto pipeline block that works for inpaint and text2image/img2img tasks with controlnet.\n" | |
| + " - `QwenImageInpaintControlNetDenoiseStep` (inpaint) is used when `mask` is provided.\n" | |
| + " - `QwenImageControlNetDenoiseStep` (text2image/img2img) is used when `mask` is not provided.\n" | |
| ) | |
| # auto denoise step for everything: works for all tasks with or without controlnet | |
| class QwenImageAutoDenoiseStep(AutoPipelineBlocks): | |
| block_classes = [ | |
| QwenImageControlNetAutoDenoiseStep, | |
| QwenImageInpaintDenoiseStep, | |
| QwenImageDenoiseStep, | |
| ] | |
| block_names = ["controlnet_denoise", "inpaint_denoise", "denoise"] | |
| block_trigger_inputs = ["control_image_latents", "mask", None] | |
| def description(self): | |
| return ( | |
| "Denoise step that iteratively denoise the latents. \n" | |
| " This is an auto pipeline block that works for inpaint/text2image/img2img tasks. It also works with controlnet\n" | |
| + " - `QwenImageControlNetAutoDenoiseStep` (controlnet) is used when `control_image_latents` is provided.\n" | |
| + " - `QwenImageInpaintDenoiseStep` (inpaint) is used when `mask` is provided and `control_image_latents` is not provided.\n" | |
| + " - `QwenImageDenoiseStep` (text2image/img2img) is used when `mask` is not provided and `control_image_latents` is not provided.\n" | |
| ) | |
| ## 1.9 QwenImage/auto decode | |
| # auto decode step for inpaint and text2image tasks | |
| class QwenImageAutoDecodeStep(AutoPipelineBlocks): | |
| block_classes = [QwenImageInpaintDecodeStep, QwenImageDecodeStep] | |
| block_names = ["inpaint_decode", "decode"] | |
| block_trigger_inputs = ["mask", None] | |
| def description(self): | |
| return ( | |
| "Decode step that decode the latents into images. \n" | |
| " This is an auto pipeline block that works for inpaint/text2image/img2img tasks, for both QwenImage and QwenImage-Edit.\n" | |
| + " - `QwenImageInpaintDecodeStep` (inpaint) is used when `mask` is provided.\n" | |
| + " - `QwenImageDecodeStep` (text2image/img2img) is used when `mask` is not provided.\n" | |
| ) | |
| ## 1.10 QwenImage/auto block & presets | |
| AUTO_BLOCKS = InsertableDict( | |
| [ | |
| ("text_encoder", QwenImageTextEncoderStep()), | |
| ("vae_encoder", QwenImageAutoVaeEncoderStep()), | |
| ("controlnet_vae_encoder", QwenImageOptionalControlNetVaeEncoderStep()), | |
| ("input", QwenImageAutoInputStep()), | |
| ("controlnet_input", QwenImageOptionalControlNetInputStep()), | |
| ("before_denoise", QwenImageAutoBeforeDenoiseStep()), | |
| ("controlnet_before_denoise", QwenImageOptionalControlNetBeforeDenoiseStep()), | |
| ("denoise", QwenImageAutoDenoiseStep()), | |
| ("decode", QwenImageAutoDecodeStep()), | |
| ] | |
| ) | |
| class QwenImageAutoBlocks(SequentialPipelineBlocks): | |
| model_name = "qwenimage" | |
| block_classes = AUTO_BLOCKS.values() | |
| block_names = AUTO_BLOCKS.keys() | |
| def description(self): | |
| return ( | |
| "Auto Modular pipeline for text-to-image, image-to-image, inpainting, and controlnet tasks using QwenImage.\n" | |
| + "- for image-to-image generation, you need to provide `image`\n" | |
| + "- for inpainting, you need to provide `mask_image` and `image`, optionally you can provide `padding_mask_crop` \n" | |
| + "- to run the controlnet workflow, you need to provide `control_image`\n" | |
| + "- for text-to-image generation, all you need to provide is `prompt`" | |
| ) | |
| # 2. QwenImage-Edit | |
| ## 2.1 QwenImage-Edit/edit | |
| #### QwenImage-Edit/edit vl encoder: take both image and text prompts | |
| QwenImageEditVLEncoderBlocks = InsertableDict( | |
| [ | |
| ("resize", QwenImageEditResizeDynamicStep()), | |
| ("encode", QwenImageEditTextEncoderStep()), | |
| ] | |
| ) | |
| class QwenImageEditVLEncoderStep(SequentialPipelineBlocks): | |
| model_name = "qwenimage" | |
| block_classes = QwenImageEditVLEncoderBlocks.values() | |
| block_names = QwenImageEditVLEncoderBlocks.keys() | |
| def description(self) -> str: | |
| return "QwenImage-Edit VL encoder step that encode the image an text prompts together." | |
| #### QwenImage-Edit/edit vae encoder | |
| QwenImageEditVaeEncoderBlocks = InsertableDict( | |
| [ | |
| ("resize", QwenImageEditResizeDynamicStep()), # edit has a different resize step | |
| ("preprocess", QwenImageProcessImagesInputStep()), # resized_image -> processed_image | |
| ("encode", QwenImageVaeEncoderDynamicStep()), # processed_image -> image_latents | |
| ] | |
| ) | |
| class QwenImageEditVaeEncoderStep(SequentialPipelineBlocks): | |
| model_name = "qwenimage" | |
| block_classes = QwenImageEditVaeEncoderBlocks.values() | |
| block_names = QwenImageEditVaeEncoderBlocks.keys() | |
| def description(self) -> str: | |
| return "Vae encoder step that encode the image inputs into their latent representations." | |
| #### QwenImage-Edit/edit input | |
| QwenImageEditInputBlocks = InsertableDict( | |
| [ | |
| ("text_inputs", QwenImageTextInputsStep()), # default step to process text embeddings | |
| ("additional_inputs", QwenImageInputsDynamicStep(image_latent_inputs=["image_latents"])), | |
| ] | |
| ) | |
| class QwenImageEditInputStep(SequentialPipelineBlocks): | |
| model_name = "qwenimage" | |
| block_classes = QwenImageEditInputBlocks.values() | |
| block_names = QwenImageEditInputBlocks.keys() | |
| def description(self): | |
| return "Input step that prepares the inputs for the edit denoising step. It:\n" | |
| " - make sure the text embeddings have consistent batch size as well as the additional inputs: \n" | |
| " - `image_latents`.\n" | |
| " - update height/width based `image_latents`, patchify `image_latents`." | |
| #### QwenImage/edit presets | |
| EDIT_BLOCKS = InsertableDict( | |
| [ | |
| ("text_encoder", QwenImageEditVLEncoderStep()), | |
| ("vae_encoder", QwenImageEditVaeEncoderStep()), | |
| ("input", QwenImageEditInputStep()), | |
| ("prepare_latents", QwenImagePrepareLatentsStep()), | |
| ("set_timesteps", QwenImageSetTimestepsStep()), | |
| ("prepare_rope_inputs", QwenImageEditRoPEInputsStep()), | |
| ("denoise", QwenImageEditDenoiseStep()), | |
| ("decode", QwenImageDecodeStep()), | |
| ] | |
| ) | |
| ## 2.2 QwenImage-Edit/edit inpaint | |
| #### QwenImage-Edit/edit inpaint vae encoder: the difference from regular inpaint is the resize step | |
| QwenImageEditInpaintVaeEncoderBlocks = InsertableDict( | |
| [ | |
| ("resize", QwenImageEditResizeDynamicStep()), # image -> resized_image | |
| ( | |
| "preprocess", | |
| QwenImageInpaintProcessImagesInputStep, | |
| ), # resized_image, mask_image -> processed_image, processed_mask_image, mask_overlay_kwargs | |
| ( | |
| "encode", | |
| QwenImageVaeEncoderDynamicStep(input_name="processed_image", output_name="image_latents"), | |
| ), # processed_image -> image_latents | |
| ] | |
| ) | |
| class QwenImageEditInpaintVaeEncoderStep(SequentialPipelineBlocks): | |
| model_name = "qwenimage" | |
| block_classes = QwenImageEditInpaintVaeEncoderBlocks.values() | |
| block_names = QwenImageEditInpaintVaeEncoderBlocks.keys() | |
| def description(self) -> str: | |
| return ( | |
| "This step is used for processing image and mask inputs for QwenImage-Edit inpaint tasks. It:\n" | |
| " - resize the image for target area (1024 * 1024) while maintaining the aspect ratio.\n" | |
| " - process the resized image and mask image.\n" | |
| " - create image latents." | |
| ) | |
| #### QwenImage-Edit/edit inpaint presets | |
| EDIT_INPAINT_BLOCKS = InsertableDict( | |
| [ | |
| ("text_encoder", QwenImageEditVLEncoderStep()), | |
| ("vae_encoder", QwenImageEditInpaintVaeEncoderStep()), | |
| ("input", QwenImageInpaintInputStep()), | |
| ("prepare_latents", QwenImagePrepareLatentsStep()), | |
| ("set_timesteps", QwenImageSetTimestepsWithStrengthStep()), | |
| ("prepare_inpaint_latents", QwenImageInpaintPrepareLatentsStep()), | |
| ("prepare_rope_inputs", QwenImageEditRoPEInputsStep()), | |
| ("denoise", QwenImageEditInpaintDenoiseStep()), | |
| ("decode", QwenImageInpaintDecodeStep()), | |
| ] | |
| ) | |
| ## 2.3 QwenImage-Edit/auto encoders | |
| class QwenImageEditAutoVaeEncoderStep(AutoPipelineBlocks): | |
| block_classes = [ | |
| QwenImageEditInpaintVaeEncoderStep, | |
| QwenImageEditVaeEncoderStep, | |
| ] | |
| block_names = ["edit_inpaint", "edit"] | |
| block_trigger_inputs = ["mask_image", "image"] | |
| def description(self): | |
| return ( | |
| "Vae encoder step that encode the image inputs into their latent representations. \n" | |
| " This is an auto pipeline block that works for edit and edit_inpaint tasks.\n" | |
| + " - `QwenImageEditInpaintVaeEncoderStep` (edit_inpaint) is used when `mask_image` is provided.\n" | |
| + " - `QwenImageEditVaeEncoderStep` (edit) is used when `image` is provided.\n" | |
| + " - if `mask_image` or `image` is not provided, step will be skipped." | |
| ) | |
| ## 2.4 QwenImage-Edit/auto inputs | |
| class QwenImageEditAutoInputStep(AutoPipelineBlocks): | |
| block_classes = [QwenImageInpaintInputStep, QwenImageEditInputStep] | |
| block_names = ["edit_inpaint", "edit"] | |
| block_trigger_inputs = ["processed_mask_image", "image"] | |
| def description(self): | |
| return ( | |
| "Input step that prepares the inputs for the edit denoising step.\n" | |
| + " It is an auto pipeline block that works for edit and edit_inpaint tasks.\n" | |
| + " - `QwenImageInpaintInputStep` (edit_inpaint) is used when `processed_mask_image` is provided.\n" | |
| + " - `QwenImageEditInputStep` (edit) is used when `image_latents` is provided.\n" | |
| + " - if `processed_mask_image` or `image_latents` is not provided, step will be skipped." | |
| ) | |
| ## 2.5 QwenImage-Edit/auto before denoise | |
| # compose the steps into a BeforeDenoiseStep for edit and edit_inpaint tasks before combine into an auto step | |
| #### QwenImage-Edit/edit before denoise | |
| QwenImageEditBeforeDenoiseBlocks = InsertableDict( | |
| [ | |
| ("prepare_latents", QwenImagePrepareLatentsStep()), | |
| ("set_timesteps", QwenImageSetTimestepsStep()), | |
| ("prepare_rope_inputs", QwenImageEditRoPEInputsStep()), | |
| ] | |
| ) | |
| class QwenImageEditBeforeDenoiseStep(SequentialPipelineBlocks): | |
| model_name = "qwenimage" | |
| block_classes = QwenImageEditBeforeDenoiseBlocks.values() | |
| block_names = QwenImageEditBeforeDenoiseBlocks.keys() | |
| def description(self): | |
| return "Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for edit task." | |
| #### QwenImage-Edit/edit inpaint before denoise | |
| QwenImageEditInpaintBeforeDenoiseBlocks = InsertableDict( | |
| [ | |
| ("prepare_latents", QwenImagePrepareLatentsStep()), | |
| ("set_timesteps", QwenImageSetTimestepsWithStrengthStep()), | |
| ("prepare_inpaint_latents", QwenImageInpaintPrepareLatentsStep()), | |
| ("prepare_rope_inputs", QwenImageEditRoPEInputsStep()), | |
| ] | |
| ) | |
| class QwenImageEditInpaintBeforeDenoiseStep(SequentialPipelineBlocks): | |
| model_name = "qwenimage" | |
| block_classes = QwenImageEditInpaintBeforeDenoiseBlocks.values() | |
| block_names = QwenImageEditInpaintBeforeDenoiseBlocks.keys() | |
| def description(self): | |
| return "Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for edit inpaint task." | |
| # auto before_denoise step for edit and edit_inpaint tasks | |
| class QwenImageEditAutoBeforeDenoiseStep(AutoPipelineBlocks): | |
| model_name = "qwenimage-edit" | |
| block_classes = [ | |
| QwenImageEditInpaintBeforeDenoiseStep, | |
| QwenImageEditBeforeDenoiseStep, | |
| ] | |
| block_names = ["edit_inpaint", "edit"] | |
| block_trigger_inputs = ["processed_mask_image", "image_latents"] | |
| def description(self): | |
| return ( | |
| "Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step.\n" | |
| + "This is an auto pipeline block that works for edit (img2img) and edit inpaint tasks.\n" | |
| + " - `QwenImageEditInpaintBeforeDenoiseStep` (edit_inpaint) is used when `processed_mask_image` is provided.\n" | |
| + " - `QwenImageEditBeforeDenoiseStep` (edit) is used when `image_latents` is provided and `processed_mask_image` is not provided.\n" | |
| + " - if `image_latents` or `processed_mask_image` is not provided, step will be skipped." | |
| ) | |
| ## 2.6 QwenImage-Edit/auto denoise | |
| class QwenImageEditAutoDenoiseStep(AutoPipelineBlocks): | |
| model_name = "qwenimage-edit" | |
| block_classes = [QwenImageEditInpaintDenoiseStep, QwenImageEditDenoiseStep] | |
| block_names = ["inpaint_denoise", "denoise"] | |
| block_trigger_inputs = ["processed_mask_image", "image_latents"] | |
| def description(self): | |
| return ( | |
| "Denoise step that iteratively denoise the latents. \n" | |
| + "This block supports edit (img2img) and edit inpaint tasks for QwenImage Edit. \n" | |
| + " - `QwenImageEditInpaintDenoiseStep` (inpaint) is used when `processed_mask_image` is provided.\n" | |
| + " - `QwenImageEditDenoiseStep` (img2img) is used when `image_latents` is provided.\n" | |
| + " - if `processed_mask_image` or `image_latents` is not provided, step will be skipped." | |
| ) | |
| ## 2.7 QwenImage-Edit/auto blocks & presets | |
| EDIT_AUTO_BLOCKS = InsertableDict( | |
| [ | |
| ("text_encoder", QwenImageEditVLEncoderStep()), | |
| ("vae_encoder", QwenImageEditAutoVaeEncoderStep()), | |
| ("input", QwenImageEditAutoInputStep()), | |
| ("before_denoise", QwenImageEditAutoBeforeDenoiseStep()), | |
| ("denoise", QwenImageEditAutoDenoiseStep()), | |
| ("decode", QwenImageAutoDecodeStep()), | |
| ] | |
| ) | |
| class QwenImageEditAutoBlocks(SequentialPipelineBlocks): | |
| model_name = "qwenimage-edit" | |
| block_classes = EDIT_AUTO_BLOCKS.values() | |
| block_names = EDIT_AUTO_BLOCKS.keys() | |
| def description(self): | |
| return ( | |
| "Auto Modular pipeline for edit (img2img) and edit inpaint tasks using QwenImage-Edit.\n" | |
| + "- for edit (img2img) generation, you need to provide `image`\n" | |
| + "- for edit inpainting, you need to provide `mask_image` and `image`, optionally you can provide `padding_mask_crop` \n" | |
| ) | |
| # 3. all block presets supported in QwenImage & QwenImage-Edit | |
| ALL_BLOCKS = { | |
| "text2image": TEXT2IMAGE_BLOCKS, | |
| "img2img": IMAGE2IMAGE_BLOCKS, | |
| "edit": EDIT_BLOCKS, | |
| "edit_inpaint": EDIT_INPAINT_BLOCKS, | |
| "inpaint": INPAINT_BLOCKS, | |
| "controlnet": CONTROLNET_BLOCKS, | |
| "auto": AUTO_BLOCKS, | |
| "edit_auto": EDIT_AUTO_BLOCKS, | |
| } | |