| import logging
|
| from enum import Enum
|
| from typing import Any, Callable, Optional, Literal, TypeVar
|
| from typing_extensions import override
|
|
|
| import torch
|
| from pydantic import BaseModel, Field
|
|
|
| from comfy_api.latest import ComfyExtension, io as comfy_io
|
| from comfy_api_nodes.util.validation_utils import (
|
| validate_aspect_ratio_closeness,
|
| validate_image_dimensions,
|
| validate_image_aspect_ratio_range,
|
| get_number_of_images,
|
| )
|
| from comfy_api_nodes.apis.client import (
|
| ApiEndpoint,
|
| HttpMethod,
|
| SynchronousOperation,
|
| PollingOperation,
|
| EmptyRequest,
|
| )
|
| from comfy_api_nodes.apinode_utils import download_url_to_video_output, upload_images_to_comfyapi
|
|
|
|
|
| VIDU_TEXT_TO_VIDEO = "/proxy/vidu/text2video"
|
| VIDU_IMAGE_TO_VIDEO = "/proxy/vidu/img2video"
|
| VIDU_REFERENCE_VIDEO = "/proxy/vidu/reference2video"
|
| VIDU_START_END_VIDEO = "/proxy/vidu/start-end2video"
|
| VIDU_GET_GENERATION_STATUS = "/proxy/vidu/tasks/%s/creations"
|
|
|
| R = TypeVar("R")
|
|
|
| class VideoModelName(str, Enum):
|
| vidu_q1 = 'viduq1'
|
|
|
|
|
| class AspectRatio(str, Enum):
|
| r_16_9 = "16:9"
|
| r_9_16 = "9:16"
|
| r_1_1 = "1:1"
|
|
|
|
|
| class Resolution(str, Enum):
|
| r_1080p = "1080p"
|
|
|
|
|
| class MovementAmplitude(str, Enum):
|
| auto = "auto"
|
| small = "small"
|
| medium = "medium"
|
| large = "large"
|
|
|
|
|
| class TaskCreationRequest(BaseModel):
|
| model: VideoModelName = VideoModelName.vidu_q1
|
| prompt: Optional[str] = Field(None, max_length=1500)
|
| duration: Optional[Literal[5]] = 5
|
| seed: Optional[int] = Field(0, ge=0, le=2147483647)
|
| aspect_ratio: Optional[AspectRatio] = AspectRatio.r_16_9
|
| resolution: Optional[Resolution] = Resolution.r_1080p
|
| movement_amplitude: Optional[MovementAmplitude] = MovementAmplitude.auto
|
| images: Optional[list[str]] = Field(None, description="Base64 encoded string or image URL")
|
|
|
|
|
| class TaskStatus(str, Enum):
|
| created = "created"
|
| queueing = "queueing"
|
| processing = "processing"
|
| success = "success"
|
| failed = "failed"
|
|
|
|
|
| class TaskCreationResponse(BaseModel):
|
| task_id: str = Field(...)
|
| state: TaskStatus = Field(...)
|
| created_at: str = Field(...)
|
| code: Optional[int] = Field(None, description="Error code")
|
|
|
|
|
| class TaskResult(BaseModel):
|
| id: str = Field(..., description="Creation id")
|
| url: str = Field(..., description="The URL of the generated results, valid for one hour")
|
| cover_url: str = Field(..., description="The cover URL of the generated results, valid for one hour")
|
|
|
|
|
| class TaskStatusResponse(BaseModel):
|
| state: TaskStatus = Field(...)
|
| err_code: Optional[str] = Field(None)
|
| creations: list[TaskResult] = Field(..., description="Generated results")
|
|
|
|
|
| async def poll_until_finished(
|
| auth_kwargs: dict[str, str],
|
| api_endpoint: ApiEndpoint[Any, R],
|
| result_url_extractor: Optional[Callable[[R], str]] = None,
|
| estimated_duration: Optional[int] = None,
|
| node_id: Optional[str] = None,
|
| ) -> R:
|
| return await PollingOperation(
|
| poll_endpoint=api_endpoint,
|
| completed_statuses=[TaskStatus.success.value],
|
| failed_statuses=[TaskStatus.failed.value],
|
| status_extractor=lambda response: response.state.value,
|
| auth_kwargs=auth_kwargs,
|
| result_url_extractor=result_url_extractor,
|
| estimated_duration=estimated_duration,
|
| node_id=node_id,
|
| poll_interval=16.0,
|
| max_poll_attempts=256,
|
| ).execute()
|
|
|
|
|
| def get_video_url_from_response(response) -> Optional[str]:
|
| if response.creations:
|
| return response.creations[0].url
|
| return None
|
|
|
|
|
| def get_video_from_response(response) -> TaskResult:
|
| if not response.creations:
|
| error_msg = f"Vidu request does not contain results. State: {response.state}, Error Code: {response.err_code}"
|
| logging.info(error_msg)
|
| raise RuntimeError(error_msg)
|
| logging.info("Vidu task %s succeeded. Video URL: %s", response.creations[0].id, response.creations[0].url)
|
| return response.creations[0]
|
|
|
|
|
| async def execute_task(
|
| vidu_endpoint: str,
|
| auth_kwargs: Optional[dict[str, str]],
|
| payload: TaskCreationRequest,
|
| estimated_duration: int,
|
| node_id: str,
|
| ) -> R:
|
| response = await SynchronousOperation(
|
| endpoint=ApiEndpoint(
|
| path=vidu_endpoint,
|
| method=HttpMethod.POST,
|
| request_model=TaskCreationRequest,
|
| response_model=TaskCreationResponse,
|
| ),
|
| request=payload,
|
| auth_kwargs=auth_kwargs,
|
| ).execute()
|
| if response.state == TaskStatus.failed:
|
| error_msg = f"Vidu request failed. Code: {response.code}"
|
| logging.error(error_msg)
|
| raise RuntimeError(error_msg)
|
| return await poll_until_finished(
|
| auth_kwargs,
|
| ApiEndpoint(
|
| path=VIDU_GET_GENERATION_STATUS % response.task_id,
|
| method=HttpMethod.GET,
|
| request_model=EmptyRequest,
|
| response_model=TaskStatusResponse,
|
| ),
|
| result_url_extractor=get_video_url_from_response,
|
| estimated_duration=estimated_duration,
|
| node_id=node_id,
|
| )
|
|
|
|
|
| class ViduTextToVideoNode(comfy_io.ComfyNode):
|
|
|
| @classmethod
|
| def define_schema(cls):
|
| return comfy_io.Schema(
|
| node_id="ViduTextToVideoNode",
|
| display_name="Vidu Text To Video Generation",
|
| category="api node/video/Vidu",
|
| description="Generate video from text prompt",
|
| inputs=[
|
| comfy_io.Combo.Input(
|
| "model",
|
| options=[model.value for model in VideoModelName],
|
| default=VideoModelName.vidu_q1.value,
|
| tooltip="Model name",
|
| ),
|
| comfy_io.String.Input(
|
| "prompt",
|
| multiline=True,
|
| tooltip="A textual description for video generation",
|
| ),
|
| comfy_io.Int.Input(
|
| "duration",
|
| default=5,
|
| min=5,
|
| max=5,
|
| step=1,
|
| display_mode=comfy_io.NumberDisplay.number,
|
| tooltip="Duration of the output video in seconds",
|
| optional=True,
|
| ),
|
| comfy_io.Int.Input(
|
| "seed",
|
| default=0,
|
| min=0,
|
| max=2147483647,
|
| step=1,
|
| display_mode=comfy_io.NumberDisplay.number,
|
| control_after_generate=True,
|
| tooltip="Seed for video generation (0 for random)",
|
| optional=True,
|
| ),
|
| comfy_io.Combo.Input(
|
| "aspect_ratio",
|
| options=[model.value for model in AspectRatio],
|
| default=AspectRatio.r_16_9.value,
|
| tooltip="The aspect ratio of the output video",
|
| optional=True,
|
| ),
|
| comfy_io.Combo.Input(
|
| "resolution",
|
| options=[model.value for model in Resolution],
|
| default=Resolution.r_1080p.value,
|
| tooltip="Supported values may vary by model & duration",
|
| optional=True,
|
| ),
|
| comfy_io.Combo.Input(
|
| "movement_amplitude",
|
| options=[model.value for model in MovementAmplitude],
|
| default=MovementAmplitude.auto.value,
|
| tooltip="The movement amplitude of objects in the frame",
|
| optional=True,
|
| ),
|
| ],
|
| outputs=[
|
| comfy_io.Video.Output(),
|
| ],
|
| hidden=[
|
| comfy_io.Hidden.auth_token_comfy_org,
|
| comfy_io.Hidden.api_key_comfy_org,
|
| comfy_io.Hidden.unique_id,
|
| ],
|
| is_api_node=True,
|
| )
|
|
|
| @classmethod
|
| async def execute(
|
| cls,
|
| model: str,
|
| prompt: str,
|
| duration: int,
|
| seed: int,
|
| aspect_ratio: str,
|
| resolution: str,
|
| movement_amplitude: str,
|
| ) -> comfy_io.NodeOutput:
|
| if not prompt:
|
| raise ValueError("The prompt field is required and cannot be empty.")
|
| payload = TaskCreationRequest(
|
| model_name=model,
|
| prompt=prompt,
|
| duration=duration,
|
| seed=seed,
|
| aspect_ratio=aspect_ratio,
|
| resolution=resolution,
|
| movement_amplitude=movement_amplitude,
|
| )
|
| auth = {
|
| "auth_token": cls.hidden.auth_token_comfy_org,
|
| "comfy_api_key": cls.hidden.api_key_comfy_org,
|
| }
|
| results = await execute_task(VIDU_TEXT_TO_VIDEO, auth, payload, 320, cls.hidden.unique_id)
|
| return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
|
|
|
|
|
| class ViduImageToVideoNode(comfy_io.ComfyNode):
|
|
|
| @classmethod
|
| def define_schema(cls):
|
| return comfy_io.Schema(
|
| node_id="ViduImageToVideoNode",
|
| display_name="Vidu Image To Video Generation",
|
| category="api node/video/Vidu",
|
| description="Generate video from image and optional prompt",
|
| inputs=[
|
| comfy_io.Combo.Input(
|
| "model",
|
| options=[model.value for model in VideoModelName],
|
| default=VideoModelName.vidu_q1.value,
|
| tooltip="Model name",
|
| ),
|
| comfy_io.Image.Input(
|
| "image",
|
| tooltip="An image to be used as the start frame of the generated video",
|
| ),
|
| comfy_io.String.Input(
|
| "prompt",
|
| multiline=True,
|
| default="",
|
| tooltip="A textual description for video generation",
|
| optional=True,
|
| ),
|
| comfy_io.Int.Input(
|
| "duration",
|
| default=5,
|
| min=5,
|
| max=5,
|
| step=1,
|
| display_mode=comfy_io.NumberDisplay.number,
|
| tooltip="Duration of the output video in seconds",
|
| optional=True,
|
| ),
|
| comfy_io.Int.Input(
|
| "seed",
|
| default=0,
|
| min=0,
|
| max=2147483647,
|
| step=1,
|
| display_mode=comfy_io.NumberDisplay.number,
|
| control_after_generate=True,
|
| tooltip="Seed for video generation (0 for random)",
|
| optional=True,
|
| ),
|
| comfy_io.Combo.Input(
|
| "resolution",
|
| options=[model.value for model in Resolution],
|
| default=Resolution.r_1080p.value,
|
| tooltip="Supported values may vary by model & duration",
|
| optional=True,
|
| ),
|
| comfy_io.Combo.Input(
|
| "movement_amplitude",
|
| options=[model.value for model in MovementAmplitude],
|
| default=MovementAmplitude.auto.value,
|
| tooltip="The movement amplitude of objects in the frame",
|
| optional=True,
|
| ),
|
| ],
|
| outputs=[
|
| comfy_io.Video.Output(),
|
| ],
|
| hidden=[
|
| comfy_io.Hidden.auth_token_comfy_org,
|
| comfy_io.Hidden.api_key_comfy_org,
|
| comfy_io.Hidden.unique_id,
|
| ],
|
| is_api_node=True,
|
| )
|
|
|
| @classmethod
|
| async def execute(
|
| cls,
|
| model: str,
|
| image: torch.Tensor,
|
| prompt: str,
|
| duration: int,
|
| seed: int,
|
| resolution: str,
|
| movement_amplitude: str,
|
| ) -> comfy_io.NodeOutput:
|
| if get_number_of_images(image) > 1:
|
| raise ValueError("Only one input image is allowed.")
|
| validate_image_aspect_ratio_range(image, (1, 4), (4, 1))
|
| payload = TaskCreationRequest(
|
| model_name=model,
|
| prompt=prompt,
|
| duration=duration,
|
| seed=seed,
|
| resolution=resolution,
|
| movement_amplitude=movement_amplitude,
|
| )
|
| auth = {
|
| "auth_token": cls.hidden.auth_token_comfy_org,
|
| "comfy_api_key": cls.hidden.api_key_comfy_org,
|
| }
|
| payload.images = await upload_images_to_comfyapi(
|
| image,
|
| max_images=1,
|
| mime_type="image/png",
|
| auth_kwargs=auth,
|
| )
|
| results = await execute_task(VIDU_IMAGE_TO_VIDEO, auth, payload, 120, cls.hidden.unique_id)
|
| return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
|
|
|
|
|
| class ViduReferenceVideoNode(comfy_io.ComfyNode):
|
|
|
| @classmethod
|
| def define_schema(cls):
|
| return comfy_io.Schema(
|
| node_id="ViduReferenceVideoNode",
|
| display_name="Vidu Reference To Video Generation",
|
| category="api node/video/Vidu",
|
| description="Generate video from multiple images and prompt",
|
| inputs=[
|
| comfy_io.Combo.Input(
|
| "model",
|
| options=[model.value for model in VideoModelName],
|
| default=VideoModelName.vidu_q1.value,
|
| tooltip="Model name",
|
| ),
|
| comfy_io.Image.Input(
|
| "images",
|
| tooltip="Images to use as references to generate a video with consistent subjects (max 7 images).",
|
| ),
|
| comfy_io.String.Input(
|
| "prompt",
|
| multiline=True,
|
| tooltip="A textual description for video generation",
|
| ),
|
| comfy_io.Int.Input(
|
| "duration",
|
| default=5,
|
| min=5,
|
| max=5,
|
| step=1,
|
| display_mode=comfy_io.NumberDisplay.number,
|
| tooltip="Duration of the output video in seconds",
|
| optional=True,
|
| ),
|
| comfy_io.Int.Input(
|
| "seed",
|
| default=0,
|
| min=0,
|
| max=2147483647,
|
| step=1,
|
| display_mode=comfy_io.NumberDisplay.number,
|
| control_after_generate=True,
|
| tooltip="Seed for video generation (0 for random)",
|
| optional=True,
|
| ),
|
| comfy_io.Combo.Input(
|
| "aspect_ratio",
|
| options=[model.value for model in AspectRatio],
|
| default=AspectRatio.r_16_9.value,
|
| tooltip="The aspect ratio of the output video",
|
| optional=True,
|
| ),
|
| comfy_io.Combo.Input(
|
| "resolution",
|
| options=[model.value for model in Resolution],
|
| default=Resolution.r_1080p.value,
|
| tooltip="Supported values may vary by model & duration",
|
| optional=True,
|
| ),
|
| comfy_io.Combo.Input(
|
| "movement_amplitude",
|
| options=[model.value for model in MovementAmplitude],
|
| default=MovementAmplitude.auto.value,
|
| tooltip="The movement amplitude of objects in the frame",
|
| optional=True,
|
| ),
|
| ],
|
| outputs=[
|
| comfy_io.Video.Output(),
|
| ],
|
| hidden=[
|
| comfy_io.Hidden.auth_token_comfy_org,
|
| comfy_io.Hidden.api_key_comfy_org,
|
| comfy_io.Hidden.unique_id,
|
| ],
|
| is_api_node=True,
|
| )
|
|
|
| @classmethod
|
| async def execute(
|
| cls,
|
| model: str,
|
| images: torch.Tensor,
|
| prompt: str,
|
| duration: int,
|
| seed: int,
|
| aspect_ratio: str,
|
| resolution: str,
|
| movement_amplitude: str,
|
| ) -> comfy_io.NodeOutput:
|
| if not prompt:
|
| raise ValueError("The prompt field is required and cannot be empty.")
|
| a = get_number_of_images(images)
|
| if a > 7:
|
| raise ValueError("Too many images, maximum allowed is 7.")
|
| for image in images:
|
| validate_image_aspect_ratio_range(image, (1, 4), (4, 1))
|
| validate_image_dimensions(image, min_width=128, min_height=128)
|
| payload = TaskCreationRequest(
|
| model_name=model,
|
| prompt=prompt,
|
| duration=duration,
|
| seed=seed,
|
| aspect_ratio=aspect_ratio,
|
| resolution=resolution,
|
| movement_amplitude=movement_amplitude,
|
| )
|
| auth = {
|
| "auth_token": cls.hidden.auth_token_comfy_org,
|
| "comfy_api_key": cls.hidden.api_key_comfy_org,
|
| }
|
| payload.images = await upload_images_to_comfyapi(
|
| images,
|
| max_images=7,
|
| mime_type="image/png",
|
| auth_kwargs=auth,
|
| )
|
| results = await execute_task(VIDU_REFERENCE_VIDEO, auth, payload, 120, cls.hidden.unique_id)
|
| return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
|
|
|
|
|
| class ViduStartEndToVideoNode(comfy_io.ComfyNode):
|
|
|
| @classmethod
|
| def define_schema(cls):
|
| return comfy_io.Schema(
|
| node_id="ViduStartEndToVideoNode",
|
| display_name="Vidu Start End To Video Generation",
|
| category="api node/video/Vidu",
|
| description="Generate a video from start and end frames and a prompt",
|
| inputs=[
|
| comfy_io.Combo.Input(
|
| "model",
|
| options=[model.value for model in VideoModelName],
|
| default=VideoModelName.vidu_q1.value,
|
| tooltip="Model name",
|
| ),
|
| comfy_io.Image.Input(
|
| "first_frame",
|
| tooltip="Start frame",
|
| ),
|
| comfy_io.Image.Input(
|
| "end_frame",
|
| tooltip="End frame",
|
| ),
|
| comfy_io.String.Input(
|
| "prompt",
|
| multiline=True,
|
| tooltip="A textual description for video generation",
|
| optional=True,
|
| ),
|
| comfy_io.Int.Input(
|
| "duration",
|
| default=5,
|
| min=5,
|
| max=5,
|
| step=1,
|
| display_mode=comfy_io.NumberDisplay.number,
|
| tooltip="Duration of the output video in seconds",
|
| optional=True,
|
| ),
|
| comfy_io.Int.Input(
|
| "seed",
|
| default=0,
|
| min=0,
|
| max=2147483647,
|
| step=1,
|
| display_mode=comfy_io.NumberDisplay.number,
|
| control_after_generate=True,
|
| tooltip="Seed for video generation (0 for random)",
|
| optional=True,
|
| ),
|
| comfy_io.Combo.Input(
|
| "resolution",
|
| options=[model.value for model in Resolution],
|
| default=Resolution.r_1080p.value,
|
| tooltip="Supported values may vary by model & duration",
|
| optional=True,
|
| ),
|
| comfy_io.Combo.Input(
|
| "movement_amplitude",
|
| options=[model.value for model in MovementAmplitude],
|
| default=MovementAmplitude.auto.value,
|
| tooltip="The movement amplitude of objects in the frame",
|
| optional=True,
|
| ),
|
| ],
|
| outputs=[
|
| comfy_io.Video.Output(),
|
| ],
|
| hidden=[
|
| comfy_io.Hidden.auth_token_comfy_org,
|
| comfy_io.Hidden.api_key_comfy_org,
|
| comfy_io.Hidden.unique_id,
|
| ],
|
| is_api_node=True,
|
| )
|
|
|
| @classmethod
|
| async def execute(
|
| cls,
|
| model: str,
|
| first_frame: torch.Tensor,
|
| end_frame: torch.Tensor,
|
| prompt: str,
|
| duration: int,
|
| seed: int,
|
| resolution: str,
|
| movement_amplitude: str,
|
| ) -> comfy_io.NodeOutput:
|
| validate_aspect_ratio_closeness(first_frame, end_frame, min_rel=0.8, max_rel=1.25, strict=False)
|
| payload = TaskCreationRequest(
|
| model_name=model,
|
| prompt=prompt,
|
| duration=duration,
|
| seed=seed,
|
| resolution=resolution,
|
| movement_amplitude=movement_amplitude,
|
| )
|
| auth = {
|
| "auth_token": cls.hidden.auth_token_comfy_org,
|
| "comfy_api_key": cls.hidden.api_key_comfy_org,
|
| }
|
| payload.images = [
|
| (await upload_images_to_comfyapi(frame, max_images=1, mime_type="image/png", auth_kwargs=auth))[0]
|
| for frame in (first_frame, end_frame)
|
| ]
|
| results = await execute_task(VIDU_START_END_VIDEO, auth, payload, 96, cls.hidden.unique_id)
|
| return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
|
|
|
|
|
| class ViduExtension(ComfyExtension):
|
| @override
|
| async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
|
| return [
|
| ViduTextToVideoNode,
|
| ViduImageToVideoNode,
|
| ViduReferenceVideoNode,
|
| ViduStartEndToVideoNode,
|
| ]
|
|
|
| async def comfy_entrypoint() -> ViduExtension:
|
| return ViduExtension()
|
|
|