| import comfy |
| import comfy_extras.nodes_lt as nodes_lt |
| import comfy_extras.nodes_post_processing as post_processing |
| import nodes |
|
|
| from .nodes_registry import comfy_node |
|
|
|
|
| def blur_internal(image, blur_radius): |
| if blur_radius > 0: |
| |
| |
| |
| |
| sigma = 0.3 * blur_radius |
| image = post_processing.Blur.execute(image, blur_radius, sigma)[0] |
| return image |
|
|
|
|
| @comfy_node(name="LTXVAddGuideAdvanced") |
| class LTXVAddGuideAdvanced: |
| @classmethod |
| def INPUT_TYPES(s): |
| return { |
| "required": { |
| "positive": ("CONDITIONING",), |
| "negative": ("CONDITIONING",), |
| "vae": ("VAE",), |
| "latent": ("LATENT",), |
| "image": ("IMAGE",), |
| "frame_idx": ( |
| "INT", |
| { |
| "default": 0, |
| "min": -9999, |
| "max": 9999, |
| "tooltip": "Frame index to start the conditioning at. For single-frame images or " |
| "videos with 1-8 frames, any frame_idx value is acceptable. For videos with 9+ " |
| "frames, frame_idx must be divisible by 8, otherwise it will be rounded down to " |
| "the nearest multiple of 8. Negative values are counted from the end of the video.", |
| }, |
| ), |
| "strength": ( |
| "FLOAT", |
| { |
| "default": 1.0, |
| "min": 0.0, |
| "max": 1.0, |
| "tooltip": "Strength of the conditioning. Higher values will make the conditioning more exact.", |
| }, |
| ), |
| "crf": ( |
| "INT", |
| { |
| "default": 29, |
| "min": 0, |
| "max": 51, |
| "step": 1, |
| "tooltip": "CRF value for the video. Higher values mean more motion, lower values mean higher quality.", |
| }, |
| ), |
| "blur_radius": ( |
| "INT", |
| { |
| "default": 0, |
| "min": 0, |
| "max": 7, |
| "step": 1, |
| "tooltip": "Blur kernel radius size. Higher values mean more motion, lower values mean higher quality.", |
| }, |
| ), |
| "interpolation": ( |
| [ |
| "lanczos", |
| "bislerp", |
| "nearest", |
| "bilinear", |
| "bicubic", |
| "area", |
| "nearest-exact", |
| ], |
| {"default": "lanczos"}, |
| ), |
| "crop": (["center", "disabled"], {"default": "disabled"}), |
| } |
| } |
|
|
| RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT") |
| RETURN_NAMES = ("positive", "negative", "latent") |
|
|
| CATEGORY = "conditioning/video_models" |
| FUNCTION = "generate" |
|
|
| DESCRIPTION = ( |
| "Adds a conditioning frame or a video at a specific frame index. " |
| "This node is used to add a keyframe or a video segment which should appear in the " |
| "generated video at a specified index. It resizes the image to the correct size and " |
| "applies preprocessing to it." |
| ) |
|
|
| def generate( |
| self, |
| positive, |
| negative, |
| vae, |
| latent, |
| image, |
| frame_idx, |
| strength, |
| crf, |
| blur_radius, |
| interpolation, |
| crop, |
| ): |
| _, width_scale_factor, height_scale_factor = vae.downscale_index_formula |
| width, height = ( |
| latent["samples"].shape[4] * width_scale_factor, |
| latent["samples"].shape[3] * height_scale_factor, |
| ) |
| image = ( |
| comfy.utils.common_upscale( |
| image.movedim(-1, 1), width, height, interpolation, crop=crop |
| ) |
| .movedim(1, -1) |
| .clamp(0, 1) |
| ) |
| image = nodes_lt.LTXVPreprocess().execute(image, crf)[0] |
| image = blur_internal(image, blur_radius) |
| return nodes_lt.LTXVAddGuide().execute( |
| positive=positive, |
| negative=negative, |
| vae=vae, |
| latent=latent, |
| image=image, |
| frame_idx=frame_idx, |
| strength=strength, |
| ) |
|
|
|
|
| @comfy_node(name="LTXVImgToVideoAdvanced") |
| class LTXVImgToVideoAdvanced: |
| @classmethod |
| def INPUT_TYPES(s): |
| return { |
| "required": { |
| "positive": ("CONDITIONING",), |
| "negative": ("CONDITIONING",), |
| "vae": ("VAE",), |
| "image": ("IMAGE",), |
| "width": ( |
| "INT", |
| { |
| "default": 768, |
| "min": 64, |
| "max": nodes.MAX_RESOLUTION, |
| "step": 32, |
| }, |
| ), |
| "height": ( |
| "INT", |
| { |
| "default": 512, |
| "min": 64, |
| "max": nodes.MAX_RESOLUTION, |
| "step": 32, |
| }, |
| ), |
| "length": ( |
| "INT", |
| {"default": 97, "min": 9, "max": nodes.MAX_RESOLUTION, "step": 8}, |
| ), |
| "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}), |
| "crf": ( |
| "INT", |
| { |
| "default": 29, |
| "min": 0, |
| "max": 51, |
| "step": 1, |
| "tooltip": "CRF value for the video. Higher values mean more motion, lower values mean higher quality.", |
| }, |
| ), |
| "blur_radius": ( |
| "INT", |
| { |
| "default": 0, |
| "min": 0, |
| "max": 7, |
| "step": 1, |
| "tooltip": "Blur kernel radius size. Higher values mean more motion, lower values mean higher quality.", |
| }, |
| ), |
| "interpolation": ( |
| [ |
| "lanczos", |
| "bislerp", |
| "nearest", |
| "bilinear", |
| "bicubic", |
| "area", |
| "nearest-exact", |
| ], |
| {"default": "lanczos"}, |
| ), |
| "crop": (["center", "disabled"], {"default": "disabled"}), |
| "strength": ("FLOAT", {"default": 0.9, "min": 0, "max": 1}), |
| } |
| } |
|
|
| RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT") |
| RETURN_NAMES = ("positive", "negative", "latent") |
|
|
| CATEGORY = "conditioning/video_models" |
| FUNCTION = "generate" |
|
|
| DESCRIPTION = ( |
| "Adds a conditioning frame or a video at index 0. " |
| "This node is used to add a keyframe or a video segment which should appear in the " |
| "generated video at index 0. It resizes the image to the correct size " |
| "and applies preprocessing to it." |
| ) |
|
|
| def generate( |
| self, |
| positive, |
| negative, |
| vae, |
| image, |
| width, |
| height, |
| length, |
| batch_size, |
| crf, |
| blur_radius, |
| interpolation, |
| crop, |
| strength, |
| ): |
| image = comfy.utils.common_upscale( |
| image.movedim(-1, 1), width, height, interpolation, crop=crop |
| ).movedim(1, -1) |
| image = nodes_lt.LTXVPreprocess().execute(image, crf)[0] |
| image = blur_internal(image, blur_radius) |
| return nodes_lt.LTXVImgToVideo().execute( |
| positive=positive, |
| negative=negative, |
| vae=vae, |
| image=image, |
| width=width, |
| height=height, |
| length=length, |
| batch_size=batch_size, |
| strength=strength, |
| ) |
|
|
|
|
| @comfy_node(name="LTXVAddGuideAdvancedAttention") |
| class LTXVAddGuideAdvancedAttention: |
| """Extended keyframe guide node with per-guide attention strength control. |
| |
| Same preprocessing as LTXVAddGuideAdvanced (CRF, blur, interpolation, crop), |
| plus attention_strength and attention_mask inputs to control how strongly |
| this guide's conditioning influences generation via self-attention. |
| """ |
|
|
| @classmethod |
| def INPUT_TYPES(s): |
| return { |
| "required": { |
| "positive": ("CONDITIONING",), |
| "negative": ("CONDITIONING",), |
| "vae": ("VAE",), |
| "latent": ("LATENT",), |
| "image": ("IMAGE",), |
| "frame_idx": ( |
| "INT", |
| { |
| "default": 0, |
| "min": -9999, |
| "max": 9999, |
| "tooltip": ( |
| "Frame index to start the conditioning at. " |
| "Negative values are counted from the end of the video." |
| ), |
| }, |
| ), |
| "strength": ( |
| "FLOAT", |
| { |
| "default": 1.0, |
| "min": 0.0, |
| "max": 1.0, |
| "tooltip": "Strength of the conditioning. Higher values make it more exact.", |
| }, |
| ), |
| "crf": ( |
| "INT", |
| { |
| "default": 29, |
| "min": 0, |
| "max": 51, |
| "step": 1, |
| "tooltip": "CRF value. Higher = more motion, lower = higher quality.", |
| }, |
| ), |
| "blur_radius": ( |
| "INT", |
| { |
| "default": 0, |
| "min": 0, |
| "max": 7, |
| "step": 1, |
| "tooltip": "Blur kernel radius. Higher = more motion.", |
| }, |
| ), |
| "interpolation": ( |
| [ |
| "lanczos", |
| "bislerp", |
| "nearest", |
| "bilinear", |
| "bicubic", |
| "area", |
| "nearest-exact", |
| ], |
| {"default": "lanczos"}, |
| ), |
| "crop": (["center", "disabled"], {"default": "disabled"}), |
| "attention_strength": ( |
| "FLOAT", |
| { |
| "default": 1.0, |
| "min": 0.0, |
| "max": 1.0, |
| "step": 0.01, |
| "tooltip": ( |
| "Controls how strongly this guide influences generation via " |
| "self-attention. 1.0 = full conditioning, 0.0 = ignore." |
| ), |
| }, |
| ), |
| }, |
| "optional": { |
| "attention_mask": ( |
| "MASK", |
| { |
| "tooltip": ( |
| "Optional pixel-space spatial mask. Shape (F, H, W) or (H, W). " |
| "Values in [0, 1]. Controls per-region conditioning influence. " |
| "Multiplied by attention_strength." |
| ), |
| }, |
| ), |
| }, |
| } |
|
|
| RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT") |
| RETURN_NAMES = ("positive", "negative", "latent") |
|
|
| CATEGORY = "conditioning/video_models" |
| FUNCTION = "generate" |
|
|
| DESCRIPTION = ( |
| "Adds a conditioning frame/video at a specific frame index with per-guide " |
| "attention strength control. Same preprocessing as LTXVAddGuideAdvanced, " |
| "plus attention_strength and optional spatial attention_mask." |
| ) |
|
|
| def generate( |
| self, |
| positive, |
| negative, |
| vae, |
| latent, |
| image, |
| frame_idx, |
| strength, |
| crf, |
| blur_radius, |
| interpolation, |
| crop, |
| attention_strength=1.0, |
| attention_mask=None, |
| ): |
| from .iclora_attention import append_guide_attention_entry, normalize_mask |
|
|
| |
| scale_factors = vae.downscale_index_formula |
| _, width_scale_factor, height_scale_factor = scale_factors |
| latent_image = latent["samples"] |
| noise_mask = nodes_lt.get_noise_mask(latent) |
| _, _, latent_length, latent_height, latent_width = latent_image.shape |
|
|
| width = latent_width * width_scale_factor |
| height = latent_height * height_scale_factor |
| image = ( |
| comfy.utils.common_upscale( |
| image.movedim(-1, 1), width, height, interpolation, crop=crop |
| ) |
| .movedim(1, -1) |
| .clamp(0, 1) |
| ) |
| image = nodes_lt.LTXVPreprocess().execute(image, crf)[0] |
| image = blur_internal(image, blur_radius) |
|
|
| |
| _, t = nodes_lt.LTXVAddGuide.encode( |
| vae, latent_width, latent_height, image, scale_factors |
| ) |
|
|
| |
| frame_idx, latent_idx = nodes_lt.LTXVAddGuide.get_latent_index( |
| positive, latent_length, len(image), frame_idx, scale_factors |
| ) |
| assert ( |
| latent_idx + t.shape[2] <= latent_length |
| ), "Conditioning frames exceed the length of the latent sequence." |
|
|
| |
| positive, negative, latent_image, noise_mask = ( |
| nodes_lt.LTXVAddGuide.append_keyframe( |
| positive, |
| negative, |
| frame_idx, |
| latent_image, |
| noise_mask, |
| t, |
| strength, |
| scale_factors, |
| ) |
| ) |
|
|
| |
| pre_filter_count = t.shape[2] * t.shape[3] * t.shape[4] |
| guide_latent_shape = list(t.shape[2:]) |
| norm_mask = normalize_mask(attention_mask) |
| positive = append_guide_attention_entry( |
| positive, |
| pre_filter_count, |
| guide_latent_shape, |
| attention_strength=attention_strength, |
| attention_mask=norm_mask, |
| ) |
| negative = append_guide_attention_entry( |
| negative, |
| pre_filter_count, |
| guide_latent_shape, |
| attention_strength=attention_strength, |
| attention_mask=norm_mask, |
| ) |
|
|
| return ( |
| positive, |
| negative, |
| {"samples": latent_image, "noise_mask": noise_mask}, |
| ) |
|
|