Add files using upload-large-folder tool

4f4376a verified 2 months ago

5.33 kB

	# Copyright 2025 The HuggingFace Team. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	from ...utils import logging
	from ..modular_pipeline import SequentialPipelineBlocks
	from ..modular_pipeline_utils import OutputParam
	from .before_denoise import (
	WanPrepareLatentsStep,
	WanSetTimestepsStep,
	WanTextInputStep,
	)
	from .decoders import WanVaeDecoderStep
	from .denoise import (
	WanDenoiseStep,
	)
	from .encoders import (
	WanTextEncoderStep,
	)


	logger = logging.get_logger(__name__) # pylint: disable=invalid-name


	# ====================
	# 1. DENOISE
	# ====================


	# inputs(text) -> set_timesteps -> prepare_latents -> denoise
	# auto_docstring
	class WanCoreDenoiseStep(SequentialPipelineBlocks):
	"""
	denoise block that takes encoded conditions and runs the denoising process.

	Components:
	transformer (`WanTransformer3DModel`) scheduler (`UniPCMultistepScheduler`) guider (`ClassifierFreeGuidance`)

	Inputs:
	num_videos_per_prompt (`None`, optional, defaults to 1):
	TODO: Add description.
	prompt_embeds (`Tensor`):
	Pre-generated text embeddings. Can be generated from text_encoder step.
	negative_prompt_embeds (`Tensor`, optional):
	Pre-generated negative text embeddings. Can be generated from text_encoder step.
	num_inference_steps (`None`, optional, defaults to 50):
	TODO: Add description.
	timesteps (`None`, optional):
	TODO: Add description.
	sigmas (`None`, optional):
	TODO: Add description.
	height (`int`, optional):
	TODO: Add description.
	width (`int`, optional):
	TODO: Add description.
	num_frames (`int`, optional):
	TODO: Add description.
	latents (`Tensor \| NoneType`, optional):
	TODO: Add description.
	generator (`None`, optional):
	TODO: Add description.
	attention_kwargs (`None`, optional):
	TODO: Add description.

	Outputs:
	latents (`Tensor`):
	Denoised latents.
	"""

	model_name = "wan"
	block_classes = [
	WanTextInputStep,
	WanSetTimestepsStep,
	WanPrepareLatentsStep,
	WanDenoiseStep,
	]
	block_names = ["input", "set_timesteps", "prepare_latents", "denoise"]

	@property
	def description(self):
	return "denoise block that takes encoded conditions and runs the denoising process."

	@property
	def outputs(self):
	return [OutputParam.template("latents")]


	# ====================
	# 2. BLOCKS (Wan2.1 text2video)
	# ====================


	# auto_docstring
	class WanBlocks(SequentialPipelineBlocks):
	"""
	Modular pipeline blocks for Wan2.1.

	Components:
	text_encoder (`UMT5EncoderModel`) tokenizer (`AutoTokenizer`) guider (`ClassifierFreeGuidance`) transformer
	(`WanTransformer3DModel`) scheduler (`UniPCMultistepScheduler`) vae (`AutoencoderKLWan`) video_processor
	(`VideoProcessor`)

	Inputs:
	prompt (`None`, optional):
	TODO: Add description.
	negative_prompt (`None`, optional):
	TODO: Add description.
	max_sequence_length (`None`, optional, defaults to 512):
	TODO: Add description.
	num_videos_per_prompt (`None`, optional, defaults to 1):
	TODO: Add description.
	num_inference_steps (`None`, optional, defaults to 50):
	TODO: Add description.
	timesteps (`None`, optional):
	TODO: Add description.
	sigmas (`None`, optional):
	TODO: Add description.
	height (`int`, optional):
	TODO: Add description.
	width (`int`, optional):
	TODO: Add description.
	num_frames (`int`, optional):
	TODO: Add description.
	latents (`Tensor \| NoneType`, optional):
	TODO: Add description.
	generator (`None`, optional):
	TODO: Add description.
	attention_kwargs (`None`, optional):
	TODO: Add description.
	output_type (`str`, optional, defaults to np):
	The output type of the decoded videos

	Outputs:
	videos (`list`):
	The generated videos.
	"""

	model_name = "wan"
	block_classes = [
	WanTextEncoderStep,
	WanCoreDenoiseStep,
	WanVaeDecoderStep,
	]
	block_names = ["text_encoder", "denoise", "decode"]

	@property
	def description(self):
	return "Modular pipeline blocks for Wan2.1."

	@property
	def outputs(self):
	return [OutputParam.template("videos")]