| import logging |
| from typing import Optional |
|
|
| import torch |
| from typing_extensions import override |
|
|
| from comfy_api.input import VideoInput |
| from comfy_api.latest import IO, ComfyExtension |
| from comfy_api_nodes.apis import ( |
| MoonvalleyPromptResponse, |
| MoonvalleyTextToVideoInferenceParams, |
| MoonvalleyTextToVideoRequest, |
| MoonvalleyVideoToVideoInferenceParams, |
| MoonvalleyVideoToVideoRequest, |
| ) |
| from comfy_api_nodes.util import ( |
| ApiEndpoint, |
| download_url_to_video_output, |
| poll_op, |
| sync_op, |
| trim_video, |
| upload_images_to_comfyapi, |
| upload_video_to_comfyapi, |
| validate_container_format_is_mp4, |
| validate_image_dimensions, |
| validate_string, |
| ) |
|
|
| API_UPLOADS_ENDPOINT = "/proxy/moonvalley/uploads" |
| API_PROMPTS_ENDPOINT = "/proxy/moonvalley/prompts" |
| API_VIDEO2VIDEO_ENDPOINT = "/proxy/moonvalley/prompts/video-to-video" |
| API_TXT2VIDEO_ENDPOINT = "/proxy/moonvalley/prompts/text-to-video" |
| API_IMG2VIDEO_ENDPOINT = "/proxy/moonvalley/prompts/image-to-video" |
|
|
| MIN_WIDTH = 300 |
| MIN_HEIGHT = 300 |
|
|
| MAX_WIDTH = 10000 |
| MAX_HEIGHT = 10000 |
|
|
| MIN_VID_WIDTH = 300 |
| MIN_VID_HEIGHT = 300 |
|
|
| MAX_VID_WIDTH = 10000 |
| MAX_VID_HEIGHT = 10000 |
|
|
| MAX_VIDEO_SIZE = 1024 * 1024 * 1024 |
|
|
| MOONVALLEY_MAREY_MAX_PROMPT_LENGTH = 5000 |
|
|
|
|
| def is_valid_task_creation_response(response: MoonvalleyPromptResponse) -> bool: |
| """Verifies that the initial response contains a task ID.""" |
| return bool(response.id) |
|
|
|
|
| def validate_task_creation_response(response) -> None: |
| if not is_valid_task_creation_response(response): |
| error_msg = f"Moonvalley Marey API: Initial request failed. Code: {response.code}, Message: {response.message}, Data: {response}" |
| logging.error(error_msg) |
| raise RuntimeError(error_msg) |
|
|
|
|
| def validate_video_to_video_input(video: VideoInput) -> VideoInput: |
| """ |
| Validates and processes video input for Moonvalley Video-to-Video generation. |
| |
| Args: |
| video: Input video to validate |
| |
| Returns: |
| Validated and potentially trimmed video |
| |
| Raises: |
| ValueError: If video doesn't meet requirements |
| MoonvalleyApiError: If video duration is too short |
| """ |
| width, height = _get_video_dimensions(video) |
| _validate_video_dimensions(width, height) |
| validate_container_format_is_mp4(video) |
|
|
| return _validate_and_trim_duration(video) |
|
|
|
|
| def _get_video_dimensions(video: VideoInput) -> tuple[int, int]: |
| """Extracts video dimensions with error handling.""" |
| try: |
| return video.get_dimensions() |
| except Exception as e: |
| logging.error("Error getting dimensions of video: %s", e) |
| raise ValueError(f"Cannot get video dimensions: {e}") from e |
|
|
|
|
| def _validate_video_dimensions(width: int, height: int) -> None: |
| """Validates video dimensions meet Moonvalley V2V requirements.""" |
| supported_resolutions = { |
| (1920, 1080), |
| (1080, 1920), |
| (1152, 1152), |
| (1536, 1152), |
| (1152, 1536), |
| } |
|
|
| if (width, height) not in supported_resolutions: |
| supported_list = ", ".join([f"{w}x{h}" for w, h in sorted(supported_resolutions)]) |
| raise ValueError(f"Resolution {width}x{height} not supported. Supported: {supported_list}") |
|
|
|
|
| def _validate_and_trim_duration(video: VideoInput) -> VideoInput: |
| """Validates video duration and trims to 5 seconds if needed.""" |
| duration = video.get_duration() |
| _validate_minimum_duration(duration) |
| return _trim_if_too_long(video, duration) |
|
|
|
|
| def _validate_minimum_duration(duration: float) -> None: |
| """Ensures video is at least 5 seconds long.""" |
| if duration < 5: |
| raise ValueError("Input video must be at least 5 seconds long.") |
|
|
|
|
| def _trim_if_too_long(video: VideoInput, duration: float) -> VideoInput: |
| """Trims video to 5 seconds if longer.""" |
| if duration > 5: |
| return trim_video(video, 5) |
| return video |
|
|
|
|
| def parse_width_height_from_res(resolution: str): |
| |
| res_map = { |
| "16:9 (1920 x 1080)": {"width": 1920, "height": 1080}, |
| "9:16 (1080 x 1920)": {"width": 1080, "height": 1920}, |
| "1:1 (1152 x 1152)": {"width": 1152, "height": 1152}, |
| "4:3 (1536 x 1152)": {"width": 1536, "height": 1152}, |
| "3:4 (1152 x 1536)": {"width": 1152, "height": 1536}, |
| |
| } |
| return res_map.get(resolution, {"width": 1920, "height": 1080}) |
|
|
|
|
| def parse_control_parameter(value): |
| control_map = { |
| "Motion Transfer": "motion_control", |
| "Canny": "canny_control", |
| "Pose Transfer": "pose_control", |
| "Depth": "depth_control", |
| } |
| return control_map.get(value, control_map["Motion Transfer"]) |
|
|
|
|
| async def get_response(cls: type[IO.ComfyNode], task_id: str) -> MoonvalleyPromptResponse: |
| return await poll_op( |
| cls, |
| ApiEndpoint(path=f"{API_PROMPTS_ENDPOINT}/{task_id}"), |
| response_model=MoonvalleyPromptResponse, |
| status_extractor=lambda r: (r.status if r and r.status else None), |
| poll_interval=16.0, |
| max_poll_attempts=240, |
| ) |
|
|
|
|
| class MoonvalleyImg2VideoNode(IO.ComfyNode): |
|
|
| @classmethod |
| def define_schema(cls) -> IO.Schema: |
| return IO.Schema( |
| node_id="MoonvalleyImg2VideoNode", |
| display_name="Moonvalley Marey Image to Video", |
| category="api node/video/Moonvalley Marey", |
| description="Moonvalley Marey Image to Video Node", |
| inputs=[ |
| IO.Image.Input( |
| "image", |
| tooltip="The reference image used to generate the video", |
| ), |
| IO.String.Input( |
| "prompt", |
| multiline=True, |
| ), |
| IO.String.Input( |
| "negative_prompt", |
| multiline=True, |
| default="<synthetic> <scene cut> gopro, bright, contrast, static, overexposed, vignette, " |
| "artifacts, still, noise, texture, scanlines, videogame, 360 camera, VR, transition, " |
| "flare, saturation, distorted, warped, wide angle, saturated, vibrant, glowing, " |
| "cross dissolve, cheesy, ugly hands, mutated hands, mutant, disfigured, extra fingers, " |
| "blown out, horrible, blurry, worst quality, bad, dissolve, melt, fade in, fade out, " |
| "wobbly, weird, low quality, plastic, stock footage, video camera, boring", |
| tooltip="Negative prompt text", |
| ), |
| IO.Combo.Input( |
| "resolution", |
| options=[ |
| "16:9 (1920 x 1080)", |
| "9:16 (1080 x 1920)", |
| "1:1 (1152 x 1152)", |
| "4:3 (1536 x 1152)", |
| "3:4 (1152 x 1536)", |
| |
| ], |
| default="16:9 (1920 x 1080)", |
| tooltip="Resolution of the output video", |
| ), |
| IO.Float.Input( |
| "prompt_adherence", |
| default=4.5, |
| min=1.0, |
| max=20.0, |
| step=1.0, |
| tooltip="Guidance scale for generation control", |
| ), |
| IO.Int.Input( |
| "seed", |
| default=9, |
| min=0, |
| max=4294967295, |
| step=1, |
| display_mode=IO.NumberDisplay.number, |
| tooltip="Random seed value", |
| control_after_generate=True, |
| ), |
| IO.Int.Input( |
| "steps", |
| default=33, |
| min=1, |
| max=100, |
| step=1, |
| tooltip="Number of denoising steps", |
| ), |
| ], |
| outputs=[IO.Video.Output()], |
| hidden=[ |
| IO.Hidden.auth_token_comfy_org, |
| IO.Hidden.api_key_comfy_org, |
| IO.Hidden.unique_id, |
| ], |
| is_api_node=True, |
| ) |
|
|
| @classmethod |
| async def execute( |
| cls, |
| image: torch.Tensor, |
| prompt: str, |
| negative_prompt: str, |
| resolution: str, |
| prompt_adherence: float, |
| seed: int, |
| steps: int, |
| ) -> IO.NodeOutput: |
| validate_image_dimensions(image, min_width=300, min_height=300, max_height=MAX_HEIGHT, max_width=MAX_WIDTH) |
| validate_string(prompt, min_length=1, max_length=MOONVALLEY_MAREY_MAX_PROMPT_LENGTH) |
| validate_string(negative_prompt, field_name="negative_prompt", max_length=MOONVALLEY_MAREY_MAX_PROMPT_LENGTH) |
| width_height = parse_width_height_from_res(resolution) |
|
|
| inference_params = MoonvalleyTextToVideoInferenceParams( |
| negative_prompt=negative_prompt, |
| steps=steps, |
| seed=seed, |
| guidance_scale=prompt_adherence, |
| width=width_height["width"], |
| height=width_height["height"], |
| use_negative_prompts=True, |
| ) |
|
|
| |
| mime_type = "image/png" |
| image_url = (await upload_images_to_comfyapi(cls, image, max_images=1, mime_type=mime_type))[0] |
| task_creation_response = await sync_op( |
| cls, |
| endpoint=ApiEndpoint(path=API_IMG2VIDEO_ENDPOINT, method="POST"), |
| response_model=MoonvalleyPromptResponse, |
| data=MoonvalleyTextToVideoRequest( |
| image_url=image_url, prompt_text=prompt, inference_params=inference_params |
| ), |
| ) |
| validate_task_creation_response(task_creation_response) |
| final_response = await get_response(cls, task_creation_response.id) |
| video = await download_url_to_video_output(final_response.output_url) |
| return IO.NodeOutput(video) |
|
|
|
|
| class MoonvalleyVideo2VideoNode(IO.ComfyNode): |
|
|
| @classmethod |
| def define_schema(cls) -> IO.Schema: |
| return IO.Schema( |
| node_id="MoonvalleyVideo2VideoNode", |
| display_name="Moonvalley Marey Video to Video", |
| category="api node/video/Moonvalley Marey", |
| description="", |
| inputs=[ |
| IO.String.Input( |
| "prompt", |
| multiline=True, |
| tooltip="Describes the video to generate", |
| ), |
| IO.String.Input( |
| "negative_prompt", |
| multiline=True, |
| default="<synthetic> <scene cut> gopro, bright, contrast, static, overexposed, vignette, " |
| "artifacts, still, noise, texture, scanlines, videogame, 360 camera, VR, transition, " |
| "flare, saturation, distorted, warped, wide angle, saturated, vibrant, glowing, " |
| "cross dissolve, cheesy, ugly hands, mutated hands, mutant, disfigured, extra fingers, " |
| "blown out, horrible, blurry, worst quality, bad, dissolve, melt, fade in, fade out, " |
| "wobbly, weird, low quality, plastic, stock footage, video camera, boring", |
| tooltip="Negative prompt text", |
| ), |
| IO.Int.Input( |
| "seed", |
| default=9, |
| min=0, |
| max=4294967295, |
| step=1, |
| display_mode=IO.NumberDisplay.number, |
| tooltip="Random seed value", |
| control_after_generate=False, |
| ), |
| IO.Video.Input( |
| "video", |
| tooltip="The reference video used to generate the output video. Must be at least 5 seconds long. " |
| "Videos longer than 5s will be automatically trimmed. Only MP4 format supported.", |
| ), |
| IO.Combo.Input( |
| "control_type", |
| options=["Motion Transfer", "Pose Transfer"], |
| default="Motion Transfer", |
| optional=True, |
| ), |
| IO.Int.Input( |
| "motion_intensity", |
| default=100, |
| min=0, |
| max=100, |
| step=1, |
| tooltip="Only used if control_type is 'Motion Transfer'", |
| optional=True, |
| ), |
| IO.Int.Input( |
| "steps", |
| default=33, |
| min=1, |
| max=100, |
| step=1, |
| display_mode=IO.NumberDisplay.number, |
| tooltip="Number of inference steps", |
| ), |
| ], |
| outputs=[IO.Video.Output()], |
| hidden=[ |
| IO.Hidden.auth_token_comfy_org, |
| IO.Hidden.api_key_comfy_org, |
| IO.Hidden.unique_id, |
| ], |
| is_api_node=True, |
| ) |
|
|
| @classmethod |
| async def execute( |
| cls, |
| prompt: str, |
| negative_prompt: str, |
| seed: int, |
| video: Optional[VideoInput] = None, |
| control_type: str = "Motion Transfer", |
| motion_intensity: Optional[int] = 100, |
| steps=33, |
| prompt_adherence=4.5, |
| ) -> IO.NodeOutput: |
| validated_video = validate_video_to_video_input(video) |
| video_url = await upload_video_to_comfyapi(cls, validated_video) |
| validate_string(prompt, min_length=1, max_length=MOONVALLEY_MAREY_MAX_PROMPT_LENGTH) |
| validate_string(negative_prompt, field_name="negative_prompt", max_length=MOONVALLEY_MAREY_MAX_PROMPT_LENGTH) |
|
|
| |
| control_params = {} |
| if control_type == "Motion Transfer" and motion_intensity is not None: |
| control_params["motion_intensity"] = motion_intensity |
|
|
| inference_params = MoonvalleyVideoToVideoInferenceParams( |
| negative_prompt=negative_prompt, |
| seed=seed, |
| control_params=control_params, |
| steps=steps, |
| guidance_scale=prompt_adherence, |
| ) |
|
|
| task_creation_response = await sync_op( |
| cls, |
| endpoint=ApiEndpoint(path=API_VIDEO2VIDEO_ENDPOINT, method="POST"), |
| response_model=MoonvalleyPromptResponse, |
| data=MoonvalleyVideoToVideoRequest( |
| control_type=parse_control_parameter(control_type), |
| video_url=video_url, |
| prompt_text=prompt, |
| inference_params=inference_params, |
| ), |
| ) |
| validate_task_creation_response(task_creation_response) |
| final_response = await get_response(cls, task_creation_response.id) |
| return IO.NodeOutput(await download_url_to_video_output(final_response.output_url)) |
|
|
|
|
| class MoonvalleyTxt2VideoNode(IO.ComfyNode): |
|
|
| @classmethod |
| def define_schema(cls) -> IO.Schema: |
| return IO.Schema( |
| node_id="MoonvalleyTxt2VideoNode", |
| display_name="Moonvalley Marey Text to Video", |
| category="api node/video/Moonvalley Marey", |
| description="", |
| inputs=[ |
| IO.String.Input( |
| "prompt", |
| multiline=True, |
| ), |
| IO.String.Input( |
| "negative_prompt", |
| multiline=True, |
| default="<synthetic> <scene cut> gopro, bright, contrast, static, overexposed, vignette, " |
| "artifacts, still, noise, texture, scanlines, videogame, 360 camera, VR, transition, " |
| "flare, saturation, distorted, warped, wide angle, saturated, vibrant, glowing, " |
| "cross dissolve, cheesy, ugly hands, mutated hands, mutant, disfigured, extra fingers, " |
| "blown out, horrible, blurry, worst quality, bad, dissolve, melt, fade in, fade out, " |
| "wobbly, weird, low quality, plastic, stock footage, video camera, boring", |
| tooltip="Negative prompt text", |
| ), |
| IO.Combo.Input( |
| "resolution", |
| options=[ |
| "16:9 (1920 x 1080)", |
| "9:16 (1080 x 1920)", |
| "1:1 (1152 x 1152)", |
| "4:3 (1536 x 1152)", |
| "3:4 (1152 x 1536)", |
| "21:9 (2560 x 1080)", |
| ], |
| default="16:9 (1920 x 1080)", |
| tooltip="Resolution of the output video", |
| ), |
| IO.Float.Input( |
| "prompt_adherence", |
| default=4.0, |
| min=1.0, |
| max=20.0, |
| step=1.0, |
| tooltip="Guidance scale for generation control", |
| ), |
| IO.Int.Input( |
| "seed", |
| default=9, |
| min=0, |
| max=4294967295, |
| step=1, |
| display_mode=IO.NumberDisplay.number, |
| control_after_generate=True, |
| tooltip="Random seed value", |
| ), |
| IO.Int.Input( |
| "steps", |
| default=33, |
| min=1, |
| max=100, |
| step=1, |
| tooltip="Inference steps", |
| ), |
| ], |
| outputs=[IO.Video.Output()], |
| hidden=[ |
| IO.Hidden.auth_token_comfy_org, |
| IO.Hidden.api_key_comfy_org, |
| IO.Hidden.unique_id, |
| ], |
| is_api_node=True, |
| ) |
|
|
| @classmethod |
| async def execute( |
| cls, |
| prompt: str, |
| negative_prompt: str, |
| resolution: str, |
| prompt_adherence: float, |
| seed: int, |
| steps: int, |
| ) -> IO.NodeOutput: |
| validate_string(prompt, min_length=1, max_length=MOONVALLEY_MAREY_MAX_PROMPT_LENGTH) |
| validate_string(negative_prompt, field_name="negative_prompt", max_length=MOONVALLEY_MAREY_MAX_PROMPT_LENGTH) |
| width_height = parse_width_height_from_res(resolution) |
|
|
| inference_params = MoonvalleyTextToVideoInferenceParams( |
| negative_prompt=negative_prompt, |
| steps=steps, |
| seed=seed, |
| guidance_scale=prompt_adherence, |
| num_frames=128, |
| width=width_height["width"], |
| height=width_height["height"], |
| ) |
|
|
| task_creation_response = await sync_op( |
| cls, |
| endpoint=ApiEndpoint(path=API_TXT2VIDEO_ENDPOINT, method="POST"), |
| response_model=MoonvalleyPromptResponse, |
| data=MoonvalleyTextToVideoRequest(prompt_text=prompt, inference_params=inference_params), |
| ) |
| validate_task_creation_response(task_creation_response) |
| final_response = await get_response(cls, task_creation_response.id) |
| return IO.NodeOutput(await download_url_to_video_output(final_response.output_url)) |
|
|
|
|
| class MoonvalleyExtension(ComfyExtension): |
| @override |
| async def get_node_list(self) -> list[type[IO.ComfyNode]]: |
| return [ |
| MoonvalleyImg2VideoNode, |
| MoonvalleyTxt2VideoNode, |
| MoonvalleyVideo2VideoNode, |
| ] |
|
|
|
|
| async def comfy_entrypoint() -> MoonvalleyExtension: |
| return MoonvalleyExtension() |
|
|