| | import logging |
| | from enum import Enum |
| | from typing import Any, Callable, Optional, Literal, TypeVar |
| | from typing_extensions import override |
| |
|
| | import torch |
| | from pydantic import BaseModel, Field |
| |
|
| | from comfy_api.latest import ComfyExtension, io as comfy_io |
| | from comfy_api_nodes.util.validation_utils import ( |
| | validate_aspect_ratio_closeness, |
| | validate_image_dimensions, |
| | validate_image_aspect_ratio_range, |
| | get_number_of_images, |
| | ) |
| | from comfy_api_nodes.apis.client import ( |
| | ApiEndpoint, |
| | HttpMethod, |
| | SynchronousOperation, |
| | PollingOperation, |
| | EmptyRequest, |
| | ) |
| | from comfy_api_nodes.apinode_utils import download_url_to_video_output, upload_images_to_comfyapi |
| |
|
| |
|
| | VIDU_TEXT_TO_VIDEO = "/proxy/vidu/text2video" |
| | VIDU_IMAGE_TO_VIDEO = "/proxy/vidu/img2video" |
| | VIDU_REFERENCE_VIDEO = "/proxy/vidu/reference2video" |
| | VIDU_START_END_VIDEO = "/proxy/vidu/start-end2video" |
| | VIDU_GET_GENERATION_STATUS = "/proxy/vidu/tasks/%s/creations" |
| |
|
| | R = TypeVar("R") |
| |
|
| | class VideoModelName(str, Enum): |
| | vidu_q1 = 'viduq1' |
| |
|
| |
|
| | class AspectRatio(str, Enum): |
| | r_16_9 = "16:9" |
| | r_9_16 = "9:16" |
| | r_1_1 = "1:1" |
| |
|
| |
|
| | class Resolution(str, Enum): |
| | r_1080p = "1080p" |
| |
|
| |
|
| | class MovementAmplitude(str, Enum): |
| | auto = "auto" |
| | small = "small" |
| | medium = "medium" |
| | large = "large" |
| |
|
| |
|
| | class TaskCreationRequest(BaseModel): |
| | model: VideoModelName = VideoModelName.vidu_q1 |
| | prompt: Optional[str] = Field(None, max_length=1500) |
| | duration: Optional[Literal[5]] = 5 |
| | seed: Optional[int] = Field(0, ge=0, le=2147483647) |
| | aspect_ratio: Optional[AspectRatio] = AspectRatio.r_16_9 |
| | resolution: Optional[Resolution] = Resolution.r_1080p |
| | movement_amplitude: Optional[MovementAmplitude] = MovementAmplitude.auto |
| | images: Optional[list[str]] = Field(None, description="Base64 encoded string or image URL") |
| |
|
| |
|
| | class TaskStatus(str, Enum): |
| | created = "created" |
| | queueing = "queueing" |
| | processing = "processing" |
| | success = "success" |
| | failed = "failed" |
| |
|
| |
|
| | class TaskCreationResponse(BaseModel): |
| | task_id: str = Field(...) |
| | state: TaskStatus = Field(...) |
| | created_at: str = Field(...) |
| | code: Optional[int] = Field(None, description="Error code") |
| |
|
| |
|
| | class TaskResult(BaseModel): |
| | id: str = Field(..., description="Creation id") |
| | url: str = Field(..., description="The URL of the generated results, valid for one hour") |
| | cover_url: str = Field(..., description="The cover URL of the generated results, valid for one hour") |
| |
|
| |
|
| | class TaskStatusResponse(BaseModel): |
| | state: TaskStatus = Field(...) |
| | err_code: Optional[str] = Field(None) |
| | creations: list[TaskResult] = Field(..., description="Generated results") |
| |
|
| |
|
| | async def poll_until_finished( |
| | auth_kwargs: dict[str, str], |
| | api_endpoint: ApiEndpoint[Any, R], |
| | result_url_extractor: Optional[Callable[[R], str]] = None, |
| | estimated_duration: Optional[int] = None, |
| | node_id: Optional[str] = None, |
| | ) -> R: |
| | return await PollingOperation( |
| | poll_endpoint=api_endpoint, |
| | completed_statuses=[TaskStatus.success.value], |
| | failed_statuses=[TaskStatus.failed.value], |
| | status_extractor=lambda response: response.state.value, |
| | auth_kwargs=auth_kwargs, |
| | result_url_extractor=result_url_extractor, |
| | estimated_duration=estimated_duration, |
| | node_id=node_id, |
| | poll_interval=16.0, |
| | max_poll_attempts=256, |
| | ).execute() |
| |
|
| |
|
| | def get_video_url_from_response(response) -> Optional[str]: |
| | if response.creations: |
| | return response.creations[0].url |
| | return None |
| |
|
| |
|
| | def get_video_from_response(response) -> TaskResult: |
| | if not response.creations: |
| | error_msg = f"Vidu request does not contain results. State: {response.state}, Error Code: {response.err_code}" |
| | logging.info(error_msg) |
| | raise RuntimeError(error_msg) |
| | logging.info("Vidu task %s succeeded. Video URL: %s", response.creations[0].id, response.creations[0].url) |
| | return response.creations[0] |
| |
|
| |
|
| | async def execute_task( |
| | vidu_endpoint: str, |
| | auth_kwargs: Optional[dict[str, str]], |
| | payload: TaskCreationRequest, |
| | estimated_duration: int, |
| | node_id: str, |
| | ) -> R: |
| | response = await SynchronousOperation( |
| | endpoint=ApiEndpoint( |
| | path=vidu_endpoint, |
| | method=HttpMethod.POST, |
| | request_model=TaskCreationRequest, |
| | response_model=TaskCreationResponse, |
| | ), |
| | request=payload, |
| | auth_kwargs=auth_kwargs, |
| | ).execute() |
| | if response.state == TaskStatus.failed: |
| | error_msg = f"Vidu request failed. Code: {response.code}" |
| | logging.error(error_msg) |
| | raise RuntimeError(error_msg) |
| | return await poll_until_finished( |
| | auth_kwargs, |
| | ApiEndpoint( |
| | path=VIDU_GET_GENERATION_STATUS % response.task_id, |
| | method=HttpMethod.GET, |
| | request_model=EmptyRequest, |
| | response_model=TaskStatusResponse, |
| | ), |
| | result_url_extractor=get_video_url_from_response, |
| | estimated_duration=estimated_duration, |
| | node_id=node_id, |
| | ) |
| |
|
| |
|
| | class ViduTextToVideoNode(comfy_io.ComfyNode): |
| |
|
| | @classmethod |
| | def define_schema(cls): |
| | return comfy_io.Schema( |
| | node_id="ViduTextToVideoNode", |
| | display_name="Vidu Text To Video Generation", |
| | category="api node/video/Vidu", |
| | description="Generate video from text prompt", |
| | inputs=[ |
| | comfy_io.Combo.Input( |
| | "model", |
| | options=[model.value for model in VideoModelName], |
| | default=VideoModelName.vidu_q1.value, |
| | tooltip="Model name", |
| | ), |
| | comfy_io.String.Input( |
| | "prompt", |
| | multiline=True, |
| | tooltip="A textual description for video generation", |
| | ), |
| | comfy_io.Int.Input( |
| | "duration", |
| | default=5, |
| | min=5, |
| | max=5, |
| | step=1, |
| | display_mode=comfy_io.NumberDisplay.number, |
| | tooltip="Duration of the output video in seconds", |
| | optional=True, |
| | ), |
| | comfy_io.Int.Input( |
| | "seed", |
| | default=0, |
| | min=0, |
| | max=2147483647, |
| | step=1, |
| | display_mode=comfy_io.NumberDisplay.number, |
| | control_after_generate=True, |
| | tooltip="Seed for video generation (0 for random)", |
| | optional=True, |
| | ), |
| | comfy_io.Combo.Input( |
| | "aspect_ratio", |
| | options=[model.value for model in AspectRatio], |
| | default=AspectRatio.r_16_9.value, |
| | tooltip="The aspect ratio of the output video", |
| | optional=True, |
| | ), |
| | comfy_io.Combo.Input( |
| | "resolution", |
| | options=[model.value for model in Resolution], |
| | default=Resolution.r_1080p.value, |
| | tooltip="Supported values may vary by model & duration", |
| | optional=True, |
| | ), |
| | comfy_io.Combo.Input( |
| | "movement_amplitude", |
| | options=[model.value for model in MovementAmplitude], |
| | default=MovementAmplitude.auto.value, |
| | tooltip="The movement amplitude of objects in the frame", |
| | optional=True, |
| | ), |
| | ], |
| | outputs=[ |
| | comfy_io.Video.Output(), |
| | ], |
| | hidden=[ |
| | comfy_io.Hidden.auth_token_comfy_org, |
| | comfy_io.Hidden.api_key_comfy_org, |
| | comfy_io.Hidden.unique_id, |
| | ], |
| | is_api_node=True, |
| | ) |
| |
|
| | @classmethod |
| | async def execute( |
| | cls, |
| | model: str, |
| | prompt: str, |
| | duration: int, |
| | seed: int, |
| | aspect_ratio: str, |
| | resolution: str, |
| | movement_amplitude: str, |
| | ) -> comfy_io.NodeOutput: |
| | if not prompt: |
| | raise ValueError("The prompt field is required and cannot be empty.") |
| | payload = TaskCreationRequest( |
| | model_name=model, |
| | prompt=prompt, |
| | duration=duration, |
| | seed=seed, |
| | aspect_ratio=aspect_ratio, |
| | resolution=resolution, |
| | movement_amplitude=movement_amplitude, |
| | ) |
| | auth = { |
| | "auth_token": cls.hidden.auth_token_comfy_org, |
| | "comfy_api_key": cls.hidden.api_key_comfy_org, |
| | } |
| | results = await execute_task(VIDU_TEXT_TO_VIDEO, auth, payload, 320, cls.hidden.unique_id) |
| | return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url)) |
| |
|
| |
|
| | class ViduImageToVideoNode(comfy_io.ComfyNode): |
| |
|
| | @classmethod |
| | def define_schema(cls): |
| | return comfy_io.Schema( |
| | node_id="ViduImageToVideoNode", |
| | display_name="Vidu Image To Video Generation", |
| | category="api node/video/Vidu", |
| | description="Generate video from image and optional prompt", |
| | inputs=[ |
| | comfy_io.Combo.Input( |
| | "model", |
| | options=[model.value for model in VideoModelName], |
| | default=VideoModelName.vidu_q1.value, |
| | tooltip="Model name", |
| | ), |
| | comfy_io.Image.Input( |
| | "image", |
| | tooltip="An image to be used as the start frame of the generated video", |
| | ), |
| | comfy_io.String.Input( |
| | "prompt", |
| | multiline=True, |
| | default="", |
| | tooltip="A textual description for video generation", |
| | optional=True, |
| | ), |
| | comfy_io.Int.Input( |
| | "duration", |
| | default=5, |
| | min=5, |
| | max=5, |
| | step=1, |
| | display_mode=comfy_io.NumberDisplay.number, |
| | tooltip="Duration of the output video in seconds", |
| | optional=True, |
| | ), |
| | comfy_io.Int.Input( |
| | "seed", |
| | default=0, |
| | min=0, |
| | max=2147483647, |
| | step=1, |
| | display_mode=comfy_io.NumberDisplay.number, |
| | control_after_generate=True, |
| | tooltip="Seed for video generation (0 for random)", |
| | optional=True, |
| | ), |
| | comfy_io.Combo.Input( |
| | "resolution", |
| | options=[model.value for model in Resolution], |
| | default=Resolution.r_1080p.value, |
| | tooltip="Supported values may vary by model & duration", |
| | optional=True, |
| | ), |
| | comfy_io.Combo.Input( |
| | "movement_amplitude", |
| | options=[model.value for model in MovementAmplitude], |
| | default=MovementAmplitude.auto.value, |
| | tooltip="The movement amplitude of objects in the frame", |
| | optional=True, |
| | ), |
| | ], |
| | outputs=[ |
| | comfy_io.Video.Output(), |
| | ], |
| | hidden=[ |
| | comfy_io.Hidden.auth_token_comfy_org, |
| | comfy_io.Hidden.api_key_comfy_org, |
| | comfy_io.Hidden.unique_id, |
| | ], |
| | is_api_node=True, |
| | ) |
| |
|
| | @classmethod |
| | async def execute( |
| | cls, |
| | model: str, |
| | image: torch.Tensor, |
| | prompt: str, |
| | duration: int, |
| | seed: int, |
| | resolution: str, |
| | movement_amplitude: str, |
| | ) -> comfy_io.NodeOutput: |
| | if get_number_of_images(image) > 1: |
| | raise ValueError("Only one input image is allowed.") |
| | validate_image_aspect_ratio_range(image, (1, 4), (4, 1)) |
| | payload = TaskCreationRequest( |
| | model_name=model, |
| | prompt=prompt, |
| | duration=duration, |
| | seed=seed, |
| | resolution=resolution, |
| | movement_amplitude=movement_amplitude, |
| | ) |
| | auth = { |
| | "auth_token": cls.hidden.auth_token_comfy_org, |
| | "comfy_api_key": cls.hidden.api_key_comfy_org, |
| | } |
| | payload.images = await upload_images_to_comfyapi( |
| | image, |
| | max_images=1, |
| | mime_type="image/png", |
| | auth_kwargs=auth, |
| | ) |
| | results = await execute_task(VIDU_IMAGE_TO_VIDEO, auth, payload, 120, cls.hidden.unique_id) |
| | return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url)) |
| |
|
| |
|
| | class ViduReferenceVideoNode(comfy_io.ComfyNode): |
| |
|
| | @classmethod |
| | def define_schema(cls): |
| | return comfy_io.Schema( |
| | node_id="ViduReferenceVideoNode", |
| | display_name="Vidu Reference To Video Generation", |
| | category="api node/video/Vidu", |
| | description="Generate video from multiple images and prompt", |
| | inputs=[ |
| | comfy_io.Combo.Input( |
| | "model", |
| | options=[model.value for model in VideoModelName], |
| | default=VideoModelName.vidu_q1.value, |
| | tooltip="Model name", |
| | ), |
| | comfy_io.Image.Input( |
| | "images", |
| | tooltip="Images to use as references to generate a video with consistent subjects (max 7 images).", |
| | ), |
| | comfy_io.String.Input( |
| | "prompt", |
| | multiline=True, |
| | tooltip="A textual description for video generation", |
| | ), |
| | comfy_io.Int.Input( |
| | "duration", |
| | default=5, |
| | min=5, |
| | max=5, |
| | step=1, |
| | display_mode=comfy_io.NumberDisplay.number, |
| | tooltip="Duration of the output video in seconds", |
| | optional=True, |
| | ), |
| | comfy_io.Int.Input( |
| | "seed", |
| | default=0, |
| | min=0, |
| | max=2147483647, |
| | step=1, |
| | display_mode=comfy_io.NumberDisplay.number, |
| | control_after_generate=True, |
| | tooltip="Seed for video generation (0 for random)", |
| | optional=True, |
| | ), |
| | comfy_io.Combo.Input( |
| | "aspect_ratio", |
| | options=[model.value for model in AspectRatio], |
| | default=AspectRatio.r_16_9.value, |
| | tooltip="The aspect ratio of the output video", |
| | optional=True, |
| | ), |
| | comfy_io.Combo.Input( |
| | "resolution", |
| | options=[model.value for model in Resolution], |
| | default=Resolution.r_1080p.value, |
| | tooltip="Supported values may vary by model & duration", |
| | optional=True, |
| | ), |
| | comfy_io.Combo.Input( |
| | "movement_amplitude", |
| | options=[model.value for model in MovementAmplitude], |
| | default=MovementAmplitude.auto.value, |
| | tooltip="The movement amplitude of objects in the frame", |
| | optional=True, |
| | ), |
| | ], |
| | outputs=[ |
| | comfy_io.Video.Output(), |
| | ], |
| | hidden=[ |
| | comfy_io.Hidden.auth_token_comfy_org, |
| | comfy_io.Hidden.api_key_comfy_org, |
| | comfy_io.Hidden.unique_id, |
| | ], |
| | is_api_node=True, |
| | ) |
| |
|
| | @classmethod |
| | async def execute( |
| | cls, |
| | model: str, |
| | images: torch.Tensor, |
| | prompt: str, |
| | duration: int, |
| | seed: int, |
| | aspect_ratio: str, |
| | resolution: str, |
| | movement_amplitude: str, |
| | ) -> comfy_io.NodeOutput: |
| | if not prompt: |
| | raise ValueError("The prompt field is required and cannot be empty.") |
| | a = get_number_of_images(images) |
| | if a > 7: |
| | raise ValueError("Too many images, maximum allowed is 7.") |
| | for image in images: |
| | validate_image_aspect_ratio_range(image, (1, 4), (4, 1)) |
| | validate_image_dimensions(image, min_width=128, min_height=128) |
| | payload = TaskCreationRequest( |
| | model_name=model, |
| | prompt=prompt, |
| | duration=duration, |
| | seed=seed, |
| | aspect_ratio=aspect_ratio, |
| | resolution=resolution, |
| | movement_amplitude=movement_amplitude, |
| | ) |
| | auth = { |
| | "auth_token": cls.hidden.auth_token_comfy_org, |
| | "comfy_api_key": cls.hidden.api_key_comfy_org, |
| | } |
| | payload.images = await upload_images_to_comfyapi( |
| | images, |
| | max_images=7, |
| | mime_type="image/png", |
| | auth_kwargs=auth, |
| | ) |
| | results = await execute_task(VIDU_REFERENCE_VIDEO, auth, payload, 120, cls.hidden.unique_id) |
| | return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url)) |
| |
|
| |
|
| | class ViduStartEndToVideoNode(comfy_io.ComfyNode): |
| |
|
| | @classmethod |
| | def define_schema(cls): |
| | return comfy_io.Schema( |
| | node_id="ViduStartEndToVideoNode", |
| | display_name="Vidu Start End To Video Generation", |
| | category="api node/video/Vidu", |
| | description="Generate a video from start and end frames and a prompt", |
| | inputs=[ |
| | comfy_io.Combo.Input( |
| | "model", |
| | options=[model.value for model in VideoModelName], |
| | default=VideoModelName.vidu_q1.value, |
| | tooltip="Model name", |
| | ), |
| | comfy_io.Image.Input( |
| | "first_frame", |
| | tooltip="Start frame", |
| | ), |
| | comfy_io.Image.Input( |
| | "end_frame", |
| | tooltip="End frame", |
| | ), |
| | comfy_io.String.Input( |
| | "prompt", |
| | multiline=True, |
| | tooltip="A textual description for video generation", |
| | optional=True, |
| | ), |
| | comfy_io.Int.Input( |
| | "duration", |
| | default=5, |
| | min=5, |
| | max=5, |
| | step=1, |
| | display_mode=comfy_io.NumberDisplay.number, |
| | tooltip="Duration of the output video in seconds", |
| | optional=True, |
| | ), |
| | comfy_io.Int.Input( |
| | "seed", |
| | default=0, |
| | min=0, |
| | max=2147483647, |
| | step=1, |
| | display_mode=comfy_io.NumberDisplay.number, |
| | control_after_generate=True, |
| | tooltip="Seed for video generation (0 for random)", |
| | optional=True, |
| | ), |
| | comfy_io.Combo.Input( |
| | "resolution", |
| | options=[model.value for model in Resolution], |
| | default=Resolution.r_1080p.value, |
| | tooltip="Supported values may vary by model & duration", |
| | optional=True, |
| | ), |
| | comfy_io.Combo.Input( |
| | "movement_amplitude", |
| | options=[model.value for model in MovementAmplitude], |
| | default=MovementAmplitude.auto.value, |
| | tooltip="The movement amplitude of objects in the frame", |
| | optional=True, |
| | ), |
| | ], |
| | outputs=[ |
| | comfy_io.Video.Output(), |
| | ], |
| | hidden=[ |
| | comfy_io.Hidden.auth_token_comfy_org, |
| | comfy_io.Hidden.api_key_comfy_org, |
| | comfy_io.Hidden.unique_id, |
| | ], |
| | is_api_node=True, |
| | ) |
| |
|
| | @classmethod |
| | async def execute( |
| | cls, |
| | model: str, |
| | first_frame: torch.Tensor, |
| | end_frame: torch.Tensor, |
| | prompt: str, |
| | duration: int, |
| | seed: int, |
| | resolution: str, |
| | movement_amplitude: str, |
| | ) -> comfy_io.NodeOutput: |
| | validate_aspect_ratio_closeness(first_frame, end_frame, min_rel=0.8, max_rel=1.25, strict=False) |
| | payload = TaskCreationRequest( |
| | model_name=model, |
| | prompt=prompt, |
| | duration=duration, |
| | seed=seed, |
| | resolution=resolution, |
| | movement_amplitude=movement_amplitude, |
| | ) |
| | auth = { |
| | "auth_token": cls.hidden.auth_token_comfy_org, |
| | "comfy_api_key": cls.hidden.api_key_comfy_org, |
| | } |
| | payload.images = [ |
| | (await upload_images_to_comfyapi(frame, max_images=1, mime_type="image/png", auth_kwargs=auth))[0] |
| | for frame in (first_frame, end_frame) |
| | ] |
| | results = await execute_task(VIDU_START_END_VIDEO, auth, payload, 96, cls.hidden.unique_id) |
| | return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url)) |
| |
|
| |
|
| | class ViduExtension(ComfyExtension): |
| | @override |
| | async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]: |
| | return [ |
| | ViduTextToVideoNode, |
| | ViduImageToVideoNode, |
| | ViduReferenceVideoNode, |
| | ViduStartEndToVideoNode, |
| | ] |
| |
|
| | async def comfy_entrypoint() -> ViduExtension: |
| | return ViduExtension() |
| |
|