| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
|
|
| from ...utils import logging |
| from ..modular_pipeline import AutoPipelineBlocks, SequentialPipelineBlocks |
| from ..modular_pipeline_utils import InsertableDict, OutputParam |
| from .before_denoise import ( |
| Flux2PrepareGuidanceStep, |
| Flux2PrepareImageLatentsStep, |
| Flux2PrepareLatentsStep, |
| Flux2RoPEInputsStep, |
| Flux2SetTimestepsStep, |
| ) |
| from .decoders import Flux2DecodeStep, Flux2UnpackLatentsStep |
| from .denoise import Flux2DenoiseStep |
| from .encoders import ( |
| Flux2TextEncoderStep, |
| Flux2VaeEncoderStep, |
| ) |
| from .inputs import ( |
| Flux2ProcessImagesInputStep, |
| Flux2TextInputStep, |
| ) |
|
|
|
|
| logger = logging.get_logger(__name__) |
|
|
|
|
| |
| class Flux2VaeEncoderSequentialStep(SequentialPipelineBlocks): |
| """ |
| VAE encoder step that preprocesses, encodes, and prepares image latents for Flux2 conditioning. |
| |
| Components: |
| image_processor (`Flux2ImageProcessor`) vae (`AutoencoderKLFlux2`) |
| |
| Inputs: |
| image (`None`, *optional*): |
| TODO: Add description. |
| height (`None`, *optional*): |
| TODO: Add description. |
| width (`None`, *optional*): |
| TODO: Add description. |
| generator (`None`, *optional*): |
| TODO: Add description. |
| |
| Outputs: |
| condition_images (`list`): |
| TODO: Add description. |
| image_latents (`list`): |
| List of latent representations for each reference image |
| """ |
|
|
| model_name = "flux2" |
|
|
| block_classes = [Flux2ProcessImagesInputStep(), Flux2VaeEncoderStep()] |
| block_names = ["preprocess", "encode"] |
|
|
| @property |
| def description(self) -> str: |
| return "VAE encoder step that preprocesses, encodes, and prepares image latents for Flux2 conditioning." |
|
|
|
|
| |
| class Flux2AutoVaeEncoderStep(AutoPipelineBlocks): |
| """ |
| VAE encoder step that encodes the image inputs into their latent representations. |
| This is an auto pipeline block that works for image conditioning tasks. |
| - `Flux2VaeEncoderSequentialStep` is used when `image` is provided. |
| - If `image` is not provided, step will be skipped. |
| |
| Components: |
| image_processor (`Flux2ImageProcessor`) vae (`AutoencoderKLFlux2`) |
| |
| Inputs: |
| image (`None`, *optional*): |
| TODO: Add description. |
| height (`None`, *optional*): |
| TODO: Add description. |
| width (`None`, *optional*): |
| TODO: Add description. |
| generator (`None`, *optional*): |
| TODO: Add description. |
| |
| Outputs: |
| condition_images (`list`): |
| TODO: Add description. |
| image_latents (`list`): |
| List of latent representations for each reference image |
| """ |
|
|
| block_classes = [Flux2VaeEncoderSequentialStep] |
| block_names = ["img_conditioning"] |
| block_trigger_inputs = ["image"] |
|
|
| @property |
| def description(self): |
| return ( |
| "VAE encoder step that encodes the image inputs into their latent representations.\n" |
| "This is an auto pipeline block that works for image conditioning tasks.\n" |
| " - `Flux2VaeEncoderSequentialStep` is used when `image` is provided.\n" |
| " - If `image` is not provided, step will be skipped." |
| ) |
|
|
|
|
| Flux2CoreDenoiseBlocks = InsertableDict( |
| [ |
| ("input", Flux2TextInputStep()), |
| ("prepare_latents", Flux2PrepareLatentsStep()), |
| ("set_timesteps", Flux2SetTimestepsStep()), |
| ("prepare_guidance", Flux2PrepareGuidanceStep()), |
| ("prepare_rope_inputs", Flux2RoPEInputsStep()), |
| ("denoise", Flux2DenoiseStep()), |
| ("after_denoise", Flux2UnpackLatentsStep()), |
| ] |
| ) |
|
|
|
|
| |
| class Flux2CoreDenoiseStep(SequentialPipelineBlocks): |
| """ |
| Core denoise step that performs the denoising process for Flux2-dev. |
| |
| Components: |
| scheduler (`FlowMatchEulerDiscreteScheduler`) transformer (`Flux2Transformer2DModel`) |
| |
| Inputs: |
| num_images_per_prompt (`None`, *optional*, defaults to 1): |
| TODO: Add description. |
| prompt_embeds (`Tensor`): |
| Pre-generated text embeddings. Can be generated from text_encoder step. |
| height (`int`, *optional*): |
| TODO: Add description. |
| width (`int`, *optional*): |
| TODO: Add description. |
| latents (`Tensor | NoneType`, *optional*): |
| TODO: Add description. |
| generator (`None`, *optional*): |
| TODO: Add description. |
| num_inference_steps (`None`, *optional*, defaults to 50): |
| TODO: Add description. |
| timesteps (`None`, *optional*): |
| TODO: Add description. |
| sigmas (`None`, *optional*): |
| TODO: Add description. |
| guidance_scale (`None`, *optional*, defaults to 4.0): |
| TODO: Add description. |
| joint_attention_kwargs (`None`, *optional*): |
| TODO: Add description. |
| image_latents (`Tensor`, *optional*): |
| Packed image latents for conditioning. Shape: (B, img_seq_len, C) |
| image_latent_ids (`Tensor`, *optional*): |
| Position IDs for image latents. Shape: (B, img_seq_len, 4) |
| |
| Outputs: |
| latents (`Tensor`): |
| Denoised latents. |
| """ |
|
|
| model_name = "flux2" |
|
|
| block_classes = Flux2CoreDenoiseBlocks.values() |
| block_names = Flux2CoreDenoiseBlocks.keys() |
|
|
| @property |
| def description(self): |
| return "Core denoise step that performs the denoising process for Flux2-dev." |
|
|
| @property |
| def outputs(self): |
| return [ |
| OutputParam.template("latents"), |
| ] |
|
|
|
|
| Flux2ImageConditionedCoreDenoiseBlocks = InsertableDict( |
| [ |
| ("input", Flux2TextInputStep()), |
| ("prepare_image_latents", Flux2PrepareImageLatentsStep()), |
| ("prepare_latents", Flux2PrepareLatentsStep()), |
| ("set_timesteps", Flux2SetTimestepsStep()), |
| ("prepare_guidance", Flux2PrepareGuidanceStep()), |
| ("prepare_rope_inputs", Flux2RoPEInputsStep()), |
| ("denoise", Flux2DenoiseStep()), |
| ("after_denoise", Flux2UnpackLatentsStep()), |
| ] |
| ) |
|
|
|
|
| |
| class Flux2ImageConditionedCoreDenoiseStep(SequentialPipelineBlocks): |
| """ |
| Core denoise step that performs the denoising process for Flux2-dev with image conditioning. |
| |
| Components: |
| scheduler (`FlowMatchEulerDiscreteScheduler`) transformer (`Flux2Transformer2DModel`) |
| |
| Inputs: |
| num_images_per_prompt (`None`, *optional*, defaults to 1): |
| TODO: Add description. |
| prompt_embeds (`Tensor`): |
| Pre-generated text embeddings. Can be generated from text_encoder step. |
| image_latents (`list`, *optional*): |
| TODO: Add description. |
| height (`int`, *optional*): |
| TODO: Add description. |
| width (`int`, *optional*): |
| TODO: Add description. |
| latents (`Tensor | NoneType`, *optional*): |
| TODO: Add description. |
| generator (`None`, *optional*): |
| TODO: Add description. |
| num_inference_steps (`None`, *optional*, defaults to 50): |
| TODO: Add description. |
| timesteps (`None`, *optional*): |
| TODO: Add description. |
| sigmas (`None`, *optional*): |
| TODO: Add description. |
| guidance_scale (`None`, *optional*, defaults to 4.0): |
| TODO: Add description. |
| joint_attention_kwargs (`None`, *optional*): |
| TODO: Add description. |
| |
| Outputs: |
| latents (`Tensor`): |
| Denoised latents. |
| """ |
|
|
| model_name = "flux2" |
|
|
| block_classes = Flux2ImageConditionedCoreDenoiseBlocks.values() |
| block_names = Flux2ImageConditionedCoreDenoiseBlocks.keys() |
|
|
| @property |
| def description(self): |
| return "Core denoise step that performs the denoising process for Flux2-dev with image conditioning." |
|
|
| @property |
| def outputs(self): |
| return [ |
| OutputParam.template("latents"), |
| ] |
|
|
|
|
| class Flux2AutoCoreDenoiseStep(AutoPipelineBlocks): |
| model_name = "flux2" |
|
|
| block_classes = [Flux2ImageConditionedCoreDenoiseStep, Flux2CoreDenoiseStep] |
| block_names = ["image_conditioned", "text2image"] |
| block_trigger_inputs = ["image_latents", None] |
|
|
| @property |
| def description(self): |
| return ( |
| "Auto core denoise step that performs the denoising process for Flux2-dev." |
| "This is an auto pipeline block that works for text-to-image and image-conditioned generation." |
| " - `Flux2CoreDenoiseStep` is used for text-to-image generation.\n" |
| " - `Flux2ImageConditionedCoreDenoiseStep` is used for image-conditioned generation.\n" |
| ) |
|
|
|
|
| AUTO_BLOCKS = InsertableDict( |
| [ |
| ("text_encoder", Flux2TextEncoderStep()), |
| ("vae_encoder", Flux2AutoVaeEncoderStep()), |
| ("denoise", Flux2AutoCoreDenoiseStep()), |
| ("decode", Flux2DecodeStep()), |
| ] |
| ) |
|
|
|
|
| |
| class Flux2AutoBlocks(SequentialPipelineBlocks): |
| """ |
| Auto Modular pipeline for text-to-image and image-conditioned generation using Flux2. |
| |
| Supported workflows: |
| - `text2image`: requires `prompt` |
| - `image_conditioned`: requires `image`, `prompt` |
| |
| Components: |
| text_encoder (`Mistral3ForConditionalGeneration`) tokenizer (`AutoProcessor`) image_processor |
| (`Flux2ImageProcessor`) vae (`AutoencoderKLFlux2`) scheduler (`FlowMatchEulerDiscreteScheduler`) transformer |
| (`Flux2Transformer2DModel`) |
| |
| Inputs: |
| prompt (`None`, *optional*): |
| TODO: Add description. |
| max_sequence_length (`int`, *optional*, defaults to 512): |
| TODO: Add description. |
| text_encoder_out_layers (`tuple`, *optional*, defaults to (10, 20, 30)): |
| TODO: Add description. |
| image (`None`, *optional*): |
| TODO: Add description. |
| height (`None`, *optional*): |
| TODO: Add description. |
| width (`None`, *optional*): |
| TODO: Add description. |
| generator (`None`, *optional*): |
| TODO: Add description. |
| num_images_per_prompt (`None`, *optional*, defaults to 1): |
| TODO: Add description. |
| image_latents (`list`, *optional*): |
| TODO: Add description. |
| latents (`Tensor | NoneType`): |
| TODO: Add description. |
| num_inference_steps (`None`): |
| TODO: Add description. |
| timesteps (`None`): |
| TODO: Add description. |
| sigmas (`None`, *optional*): |
| TODO: Add description. |
| guidance_scale (`None`, *optional*, defaults to 4.0): |
| TODO: Add description. |
| joint_attention_kwargs (`None`, *optional*): |
| TODO: Add description. |
| image_latent_ids (`Tensor`, *optional*): |
| Position IDs for image latents. Shape: (B, img_seq_len, 4) |
| output_type (`None`, *optional*, defaults to pil): |
| TODO: Add description. |
| |
| Outputs: |
| images (`list`): |
| Generated images. |
| """ |
|
|
| model_name = "flux2" |
|
|
| block_classes = AUTO_BLOCKS.values() |
| block_names = AUTO_BLOCKS.keys() |
| _workflow_map = { |
| "text2image": {"prompt": True}, |
| "image_conditioned": {"image": True, "prompt": True}, |
| } |
|
|
| @property |
| def description(self): |
| return "Auto Modular pipeline for text-to-image and image-conditioned generation using Flux2." |
|
|
| @property |
| def outputs(self): |
| return [ |
| OutputParam.template("images"), |
| ] |
|
|