aliensmn commited on Oct 12, 2025

Commit

cf812a0

verified ·

1 Parent(s): 63d5317

Mirror from https://github.com/kijai/ComfyUI-WanVideoWrapper

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +16 -35
.github/FUNDING.yml +1 -0
.github/workflows/publish.yml +25 -0
.gitignore +13 -0
ATI/motion.py +42 -0
ATI/motion_patch.py +142 -0
ATI/nodes.py +329 -0
HuMo/audio_proj.py +87 -0
HuMo/nodes.py +287 -0
HuMo/whisper_config.json +50 -0
LICENSE +201 -0
MTV/data/mean.npy +3 -0
MTV/data/std.npy +3 -0
MTV/draw_pose.py +142 -0
MTV/motion4d/__init__.py +1 -0
MTV/motion4d/vqvae.py +329 -0
MTV/mtv.py +193 -0
MTV/nlf.py +0 -0
MTV/nodes.py +242 -0
__init__.py +113 -0
cache_methods/cache_methods.py +158 -0
cache_methods/nodes_cache.py +140 -0
configs/T5_tokenizer/special_tokens_map.json +308 -0
configs/T5_tokenizer/spiece.model +3 -0
configs/T5_tokenizer/tokenizer.json +3 -0
configs/T5_tokenizer/tokenizer_config.json +2748 -0
configs/transformer_config_i2v.json +14 -0
context_windows/context.py +258 -0
controlnet/nodes.py +173 -0
controlnet/wan_controlnet.py +281 -0
custom_linear.py +115 -0
diffsynth/vram_management/LICENSE +201 -0
diffsynth/vram_management/__init__.py +1 -0
diffsynth/vram_management/layers.py +103 -0
diffsynth/vram_management/utils.py +51 -0
echoshot/echoshot.py +104 -0
enhance_a_video/LICENSE +562 -0
enhance_a_video/__init__.py +0 -0
enhance_a_video/enhance.py +55 -0
enhance_a_video/globals.py +36 -0
example_workflows/example_inputs/MTV_crafter_example_pose.mp4 +3 -0
example_workflows/example_inputs/env.png +3 -0
example_workflows/example_inputs/human.png +3 -0
example_workflows/example_inputs/jeep.mp4 +3 -0
example_workflows/example_inputs/thing.png +0 -0
example_workflows/example_inputs/wolf_interpolated.mp4 +3 -0
example_workflows/example_inputs/woman.jpg +3 -0
example_workflows/example_inputs/woman.wav +3 -0
example_workflows/wanvideo2_2_I2V_A14B_example_WIP.json +2074 -0
example_workflows/wanvideo_14B_pusa_I2V_example_01.json +1326 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,16 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+# Auto detect text files and perform LF normalization
+* text=auto
+MTV/data/mean.npy filter=lfs diff=lfs merge=lfs -text
+MTV/data/std.npy filter=lfs diff=lfs merge=lfs -text
+configs/T5_tokenizer/spiece.model filter=lfs diff=lfs merge=lfs -text
+configs/T5_tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+example_workflows/example_inputs/MTV_crafter_example_pose.mp4 filter=lfs diff=lfs merge=lfs -text
+example_workflows/example_inputs/env.png filter=lfs diff=lfs merge=lfs -text
+example_workflows/example_inputs/human.png filter=lfs diff=lfs merge=lfs -text
+example_workflows/example_inputs/jeep.mp4 filter=lfs diff=lfs merge=lfs -text
+example_workflows/example_inputs/wolf_interpolated.mp4 filter=lfs diff=lfs merge=lfs -text
+example_workflows/example_inputs/woman.jpg filter=lfs diff=lfs merge=lfs -text
+example_workflows/example_inputs/woman.wav filter=lfs diff=lfs merge=lfs -text
+fantasyportrait/models/face_det.onnx filter=lfs diff=lfs merge=lfs -text
+fantasyportrait/models/face_landmark.onnx filter=lfs diff=lfs merge=lfs -text
+multitalk/encoded_silence.safetensors filter=lfs diff=lfs merge=lfs -text

.github/FUNDING.yml ADDED Viewed

	@@ -0,0 +1 @@


1	+ github: [kijai]

.github/workflows/publish.yml ADDED Viewed

	@@ -0,0 +1,25 @@

+name: Publish to Comfy registry
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - main
+    paths:
+      - "pyproject.toml"
+permissions:
+  issues: write
+jobs:
+  publish-node:
+    name: Publish Custom Node to registry
+    runs-on: ubuntu-latest
+    if: ${{ github.repository_owner == 'kijai' }}
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v4
+      - name: Publish Custom Node
+        uses: Comfy-Org/publish-node-action@v1
+        with:
+          ## Add your own personal access token to your Github Repository secrets and reference it here.
+          personal_access_token: ${{ secrets.REGISTRY_ACCESS_TOKEN }}

.gitignore ADDED Viewed

	@@ -0,0 +1,13 @@

+output/
+*__pycache__/
+samples*/
+runs/
+checkpoints/
+master_ip
+logs/
+*.DS_Store
+.idea
+tools/
+.vscode/
+convert_*
+*.pt

ATI/motion.py ADDED Viewed

	@@ -0,0 +1,42 @@

+# Copyright (c) 2024-2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Dict, List, Optional, Tuple, Union
+import numpy as np
+import torch
+def process_tracks(tracks_np: np.ndarray, frame_size: Tuple[int, int], quant_multi: int = 8, **kwargs):
+    # tracks: shape [t, h, w, 3] => samples align with 24 fps, model trained with 16 fps.
+    # frame_size: tuple (W, H)
+    tracks = torch.from_numpy(tracks_np).float()
+    if tracks.shape[1] == 121:
+        tracks = torch.permute(tracks, (1, 0, 2, 3))
+    tracks, visibles = tracks[..., :2], tracks[..., 2:3]
+    short_edge = min(*frame_size)
+    tracks = tracks - torch.tensor([*frame_size]).type_as(tracks) / 2
+    tracks = tracks / short_edge * 2
+    visibles = visibles * 2 - 1
+    trange = torch.linspace(-1, 1, tracks.shape[0]).view(-1, 1, 1, 1).expand(*visibles.shape)
+    out_ = torch.cat([trange, tracks, visibles], dim=-1).view(121, -1, 4)
+    out_0 = out_[:1]
+    out_l = out_[1:] # 121 => 120 | 1
+    out_l = torch.repeat_interleave(out_l, 2, dim=0)[1::3]  # 120 => 240 => 80
+    return torch.cat([out_0, out_l], dim=0)

ATI/motion_patch.py ADDED Viewed

	@@ -0,0 +1,142 @@

+# Copyright (c) 2024-2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import List, Optional, Tuple, Union
+import torch
+# Refer to https://github.com/Angtian/VoGE/blob/main/VoGE/Utils.py
+def ind_sel(target: torch.Tensor, ind: torch.Tensor, dim: int = 1):
+    """
+    :param target: [... (can be k or 1), n > M, ...]
+    :param ind: [... (k), M]
+    :param dim: dim to apply index on
+    :return: sel_target [... (k), M, ...]
+    """
+    assert (
+        len(ind.shape) > dim
+    ), "Index must have the target dim, but get dim: %d, ind shape: %s" % (dim, str(ind.shape))
+    target = target.expand(
+        *tuple(
+            [ind.shape[k] if target.shape[k] == 1 else -1 for k in range(dim)]
+            + [
+                -1,
+            ]
+            * (len(target.shape) - dim)
+        )
+    )
+    ind_pad = ind
+    if len(target.shape) > dim + 1:
+        for _ in range(len(target.shape) - (dim + 1)):
+            ind_pad = ind_pad.unsqueeze(-1)
+        ind_pad = ind_pad.expand(*(-1,) * (dim + 1), *target.shape[(dim + 1) : :])
+    return torch.gather(target, dim=dim, index=ind_pad)
+def merge_final(vert_attr: torch.Tensor, weight: torch.Tensor, vert_assign: torch.Tensor):
+    """
+    :param vert_attr: [n, d] or [b, n, d] color or feature of each vertex
+    :param weight: [b(optional), w, h, M] weight of selected vertices
+    :param vert_assign: [b(optional), w, h, M] selective index
+    :return:
+    """
+    target_dim = len(vert_assign.shape) - 1
+    if len(vert_attr.shape) == 2:
+        assert vert_attr.shape[0] > vert_assign.max()
+        # [n, d] ind: [b(optional), w, h, M]-> [b(optional), w, h, M, d]
+        # sel_attr = ind_sel(
+        #     vert_attr[(None,) * target_dim], vert_assign.type(torch.long), dim=target_dim
+        # )
+        new_shape = [1] * target_dim + list(vert_attr.shape)
+        tensor = vert_attr.reshape(new_shape)
+        sel_attr = ind_sel(tensor, vert_assign.type(torch.long), dim=target_dim)
+    else:
+        assert vert_attr.shape[1] > vert_assign.max()
+        #sel_attr = ind_sel(
+        #    vert_attr[:, *(None,) * (target_dim - 1)], vert_assign.type(torch.long), dim=target_dim
+        #)
+        new_shape = [vert_attr.shape[0]] + [1] * (target_dim - 1) + list(vert_attr.shape[1:])
+        tensor = vert_attr.reshape(new_shape)
+        sel_attr = ind_sel(tensor, vert_assign.type(torch.long), dim=target_dim)
+    # [b(optional), w, h, M]
+    final_attr = torch.sum(sel_attr * weight.unsqueeze(-1), dim=-2)
+    return final_attr
+def patch_motion(
+    tracks: torch.FloatTensor,  # (B, T, N, 4)
+    vid: torch.FloatTensor,  # (C, T, H, W)
+    temperature: float = 220.0,
+    vae_divide: tuple = (4, 16),
+    topk: int = 2,
+):
+    with torch.no_grad():
+        _, T, H, W = vid.shape
+        N = tracks.shape[2]
+        _, tracks, visible = torch.split(
+            tracks, [1, 2, 1], dim=-1
+        )  # (B, T, N, 2) | (B, T, N, 1)
+        tracks_n = tracks / torch.tensor([W / min(H, W), H / min(H, W)], device=tracks.device)
+        tracks_n = tracks_n.clamp(-1, 1)
+        visible = visible.clamp(0, 1)
+        xx = torch.linspace(-W / min(H, W), W / min(H, W), W)
+        yy = torch.linspace(-H / min(H, W), H / min(H, W), H)
+        grid = torch.stack(torch.meshgrid(yy, xx, indexing="ij")[::-1], dim=-1).to(
+            tracks.device
+        )
+        tracks_pad = tracks[:, 1:]
+        visible_pad = visible[:, 1:]
+        visible_align = visible_pad.view(T - 1, 4, *visible_pad.shape[2:]).sum(1)
+        tracks_align = (tracks_pad * visible_pad).view(T - 1, 4, *tracks_pad.shape[2:]).sum(
+            1
+        ) / (visible_align + 1e-5)
+        dist_ = (
+            (tracks_align[:, None, None] - grid[None, :, :, None]).pow(2).sum(-1)
+        )  # T, H, W, N
+        weight = torch.exp(-dist_ * temperature) * visible_align.clamp(0, 1).view(
+            T - 1, 1, 1, N
+        )
+        vert_weight, vert_index = torch.topk(
+            weight, k=min(topk, weight.shape[-1]), dim=-1
+        )
+    grid_mode = "bilinear"
+    point_feature = torch.nn.functional.grid_sample(
+        vid[vae_divide[0]:].permute(1, 0, 2, 3)[:1],
+        tracks_n[:, :1].type(vid.dtype),
+        mode=grid_mode,
+        padding_mode="zeros",
+        align_corners=False,
+    )
+    point_feature = point_feature.squeeze(0).squeeze(1).permute(1, 0) # N, C=16
+    out_feature = merge_final(point_feature, vert_weight, vert_index).permute(3, 0, 1, 2) # T - 1, H, W, C => C, T - 1, H, W
+    out_weight = vert_weight.sum(-1) # T - 1, H, W
+    # out feature -> already soft weighted
+    mix_feature = out_feature + vid[vae_divide[0]:, 1:] * (1 - out_weight.clamp(0, 1))
+    out_feature_full = torch.cat([vid[vae_divide[0]:, :1], mix_feature], dim=1) # C, T, H, W
+    out_mask_full = torch.cat([torch.ones_like(out_weight[:1]), out_weight], dim=0)  # T, H, W
+    return torch.cat([out_mask_full[None].expand(vae_divide[0], -1, -1, -1), out_feature_full], dim=0)

ATI/nodes.py ADDED Viewed

	@@ -0,0 +1,329 @@

+import json
+from .motion import process_tracks
+import numpy as np
+from typing import List, Tuple
+import torch
+FIXED_LENGTH = 121
+def pad_pts(tr):
+    """Convert list of {x,y} to (FIXED_LENGTH,1,3) array, padding/truncating."""
+    pts = np.array([[p['x'], p['y'], 1] for p in tr], dtype=np.float32)
+    n = pts.shape[0]
+    if n < FIXED_LENGTH:
+        pad = np.zeros((FIXED_LENGTH - n, 3), dtype=np.float32)
+        pts = np.vstack((pts, pad))
+    else:
+        pts = pts[:FIXED_LENGTH]
+    return pts.reshape(FIXED_LENGTH, 1, 3)
+def age_to_bgr(ratio: float) -> Tuple[int,int,int]:
+    """
+    Map ratio∈[0,1] through: 0→blue, 1/3→green, 2/3→yellow, 1→red.
+    Returns (B,G,R) for OpenCV.
+    """
+    if ratio <= 1/3:
+        # blue→green
+        t = ratio / (1/3)
+        b = int(255 * (1 - t))
+        g = int(255 * t)
+        r = 0
+    elif ratio <= 2/3:
+        # green→yellow
+        t = (ratio - 1/3) / (1/3)
+        b = 0
+        g = 255
+        r = int(255 * t)
+    else:
+        # yellow→red
+        t = (ratio - 2/3) / (1/3)
+        b = 0
+        g = int(255 * (1 - t))
+        r = 255
+    return (r, g, b)
+def paint_point_track(
+    frames: np.ndarray,
+    point_tracks: np.ndarray,
+    visibles: np.ndarray,
+    min_radius: int = 1,
+    max_radius: int = 6,
+    max_retain: int = 50
+) -> np.ndarray:
+    """
+    Draws every past point of each track on each frame, with radius and color
+    interpolated by the point's age (old→small to new→large).
+    Args:
+      frames:      [F, H, W, 3] uint8 RGB
+      point_tracks:[N, F, 2] float32  – (x,y) in pixel coords
+      visibles:    [N, F] bool        – visibility mask
+      min_radius:  radius for the very first point (oldest)
+      max_radius:  radius for the current point (newest)
+    Returns:
+      video: [F, H, W, 3] uint8 RGB
+    """
+    import cv2
+    num_points, num_frames = point_tracks.shape[:2]
+    H, W = frames.shape[1:3]
+    video = frames.copy()
+    for t in range(num_frames):
+        # start from the original frame
+        frame = video[t].copy()
+        for i in range(num_points):
+            # draw every past step τ = 0..t
+            for τ in range(t + 1):
+                if not visibles[i, τ]:
+                    continue
+                if t - τ > max_retain:
+                    continue
+                # sub-pixel offset + clamp
+                x, y = point_tracks[i, τ] + 0.5
+                xi = int(np.clip(x, 0, W - 1))
+                yi = int(np.clip(y, 0, H - 1))
+                # age‐ratio in [0,1]
+                if num_frames > 1:
+                    ratio = 1 - float(t - τ) / max_retain
+                else:
+                    ratio = 1.0
+                # interpolated radius
+                radius = int(round(min_radius + (max_radius - min_radius) * ratio))
+                # OpenCV draws in BGR order:
+                color_rgb = age_to_bgr(ratio)
+                # filled circle
+                cv2.circle(frame, (xi, yi), radius, color_rgb, thickness=-1)
+        video[t] = frame
+    return video
+def parse_json_tracks(tracks):
+    tracks_data = []
+    try:
+        # If tracks is a string, try to parse it as JSON
+        if isinstance(tracks, str):
+            parsed = json.loads(tracks.replace("'", '"'))
+            tracks_data.extend(parsed)
+        else:
+            # If tracks is a list of strings, parse each one
+            for track_str in tracks:
+                parsed = json.loads(track_str.replace("'", '"'))
+                tracks_data.append(parsed)
+        # Check if we have a single track (dict with x,y) or a list of tracks
+        if tracks_data and isinstance(tracks_data[0], dict) and 'x' in tracks_data[0]:
+            # Single track detected, wrap it in a list
+            tracks_data = [tracks_data]
+        elif tracks_data and isinstance(tracks_data[0], list) and tracks_data[0] and isinstance(tracks_data[0][0], dict) and 'x' in tracks_data[0][0]:
+            # Already a list of tracks, nothing to do
+            pass
+        else:
+            # Unexpected format
+            print(f"Warning: Unexpected track format: {type(tracks_data[0])}")
+    except json.JSONDecodeError as e:
+        print(f"Error parsing tracks JSON: {e}")
+        tracks_data = []
+    return tracks_data
+class WanVideoATITracks:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {
+            "model": ("WANVIDEOMODEL", ),
+            "tracks": ("STRING",),
+            "width": ("INT", {"default": 832, "min": 64, "max": 2048, "step": 8, "tooltip": "Width of the image to encode"}),
+            "height": ("INT", {"default": 480, "min": 64, "max": 29048, "step": 8, "tooltip": "Height of the image to encode"}),
+            "temperature": ("FLOAT", {"default": 220.0, "min": 0.0, "max": 1000.0, "step": 0.1}),
+            "topk": ("INT", {"default": 2, "min": 1, "max": 10, "step": 1}),
+            "start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "Start percent of the steps to apply ATI"}),
+            "end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "End percent of the steps to apply ATI"}),
+        },
+        }
+    RETURN_TYPES = ("WANVIDEOMODEL",)
+    RETURN_NAMES = ("model",)
+    FUNCTION = "patchmodel"
+    CATEGORY = "WanVideoWrapper"
+    def patchmodel(self, model, tracks, width, height, temperature, topk, start_percent, end_percent):
+        tracks_data = parse_json_tracks(tracks)
+        arrs = []
+        for track in tracks_data:
+            pts = pad_pts(track)
+            arrs.append(pts)
+        tracks_np = np.stack(arrs, axis=0)
+        processed_tracks = process_tracks(tracks_np, (width, height))
+        patcher = model.clone()
+        patcher.model_options["transformer_options"]["ati_tracks"] = processed_tracks.unsqueeze(0)
+        patcher.model_options["transformer_options"]["ati_temperature"] = temperature
+        patcher.model_options["transformer_options"]["ati_topk"] = topk
+        patcher.model_options["transformer_options"]["ati_start_percent"] = start_percent
+        patcher.model_options["transformer_options"]["ati_end_percent"] = end_percent
+        return (patcher,)
+class WanVideoATITracksVisualize:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {
+            "images": ("IMAGE",),
+            "tracks": ("STRING",),
+            "min_radius": ("INT", {"default": 1, "min": 0, "max": 100, "step": 1, "tooltip": "radius for the very first point (oldest)"}),
+            "max_radius": ("INT", {"default": 6, "min": 0, "max": 100, "step": 1, "tooltip": "radius for the current point (newest)"}),
+            "max_retain": ("INT", {"default": 50, "min": 0, "max": 100, "step": 1, "tooltip": "Maximum number of points to retain"}),
+        },
+        }
+    RETURN_TYPES = ("IMAGE",)
+    RETURN_NAMES = ("images",)
+    FUNCTION = "patchmodel"
+    CATEGORY = "WanVideoWrapper"
+    def patchmodel(self, images, tracks, min_radius, max_radius, max_retain):
+        tracks_data = parse_json_tracks(tracks)
+        arrs = []
+        for track in tracks_data:
+            pts = pad_pts(track)
+            arrs.append(pts)
+        tracks_np = np.stack(arrs, axis=0)
+        track = np.repeat(tracks_np, 2, axis=1)[:, ::3]
+        points = track[:, :, 0, :2].astype(np.float32)
+        visibles = track[:, :, 0, 2].astype(np.float32)
+        if images.shape[0] < points.shape[1]:
+            repeat_count = (points.shape[1] + images.shape[0] - 1) // images.shape[0]
+            images = images.repeat(repeat_count, 1, 1, 1)
+            images = images[:points.shape[1]]
+        elif images.shape[0] > points.shape[1]:
+            images = images[:points.shape[1]]
+        video_viz = paint_point_track(images.cpu().numpy(), points, visibles, min_radius, max_radius, max_retain)
+        video_viz = torch.from_numpy(video_viz).float()
+        return (video_viz,)
+from comfy import utils
+import types
+from .motion_patch import patch_motion
+class WanConcatCondPatch:
+    def __init__(self, tracks, temperature, topk):
+        self.tracks = tracks
+        self.temperature = temperature
+        self.topk = topk
+    def __get__(self, obj, objtype=None):
+        # Create bound method with stored parameters
+        def wrapped_concat_cond(self_module, *args, **kwargs):
+            return modified_concat_cond(self_module, self.tracks, self.temperature, self.topk, *args, **kwargs)
+        return types.MethodType(wrapped_concat_cond, obj)
+def modified_concat_cond(self, tracks, temperature, topk, **kwargs):
+    noise = kwargs.get("noise", None)
+    extra_channels = self.diffusion_model.patch_embedding.weight.shape[1] - noise.shape[1]
+    if extra_channels == 0:
+        return None
+    image = kwargs.get("concat_latent_image", None)
+    device = kwargs["device"]
+    if image is None:
+        shape_image = list(noise.shape)
+        shape_image[1] = extra_channels
+        image = torch.zeros(shape_image, dtype=noise.dtype, layout=noise.layout, device=noise.device)
+    else:
+        image = utils.common_upscale(image.to(device), noise.shape[-1], noise.shape[-2], "bilinear", "center")
+        for i in range(0, image.shape[1], 16):
+            image[:, i: i + 16] = self.process_latent_in(image[:, i: i + 16])
+        image = utils.resize_to_batch_size(image, noise.shape[0])
+    if not self.image_to_video or extra_channels == image.shape[1]:
+        return image
+    if image.shape[1] > (extra_channels - 4):
+        image = image[:, :(extra_channels - 4)]
+    mask = kwargs.get("concat_mask", kwargs.get("denoise_mask", None))
+    if mask is None:
+        mask = torch.zeros_like(noise)[:, :4]
+    else:
+        if mask.shape[1] != 4:
+            mask = torch.mean(mask, dim=1, keepdim=True)
+        mask = 1.0 - mask
+        mask = utils.common_upscale(mask.to(device), noise.shape[-1], noise.shape[-2], "bilinear", "center")
+        if mask.shape[-3] < noise.shape[-3]:
+            mask = torch.nn.functional.pad(mask, (0, 0, 0, 0, 0, noise.shape[-3] - mask.shape[-3]), mode='constant', value=0)
+        if mask.shape[1] == 1:
+            mask = mask.repeat(1, 4, 1, 1, 1)
+        mask = utils.resize_to_batch_size(mask, noise.shape[0])
+    image_cond = torch.cat((mask, image), dim=1)
+    image_cond_ati = patch_motion(tracks.to(image_cond.device, image_cond.dtype), image_cond[0],
+                                  temperature=temperature, topk=topk)
+    return image_cond_ati.unsqueeze(0)
+class WanVideoATI_comfy:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {
+            "model": ("MODEL", ),
+            "width": ("INT", {"default": 832, "min": 64, "max": 2048, "step": 8, "tooltip": "Width of the image to encode"}),
+            "height": ("INT", {"default": 480, "min": 64, "max": 29048, "step": 8, "tooltip": "Height of the image to encode"}),
+            "tracks": ("STRING",),
+            "temperature": ("FLOAT", {"default": 220.0, "min": 0.0, "max": 1000.0, "step": 0.1}),
+            "topk": ("INT", {"default": 2, "min": 1, "max": 10, "step": 1}),
+            },
+        }
+    RETURN_TYPES = ("MODEL",)
+    RETURN_NAMES = ("model", )
+    FUNCTION = "patchcond"
+    CATEGORY = "WanVideoWrapper"
+    def patchcond(self, model, tracks, width, height, temperature, topk):
+        tracks_data = parse_json_tracks(tracks)
+        arrs = []
+        for track in tracks_data:
+            pts = pad_pts(track)
+            arrs.append(pts)
+        tracks_np = np.stack(arrs, axis=0)
+        processed_tracks = process_tracks(tracks_np, (width, height))
+        model_clone = model.clone()
+        model_clone.add_object_patch(
+            "concat_cond",
+            WanConcatCondPatch(
+                processed_tracks.unsqueeze(0), temperature, topk
+                ).__get__(model.model, model.model.__class__)
+            )
+        return (model_clone,)
+NODE_CLASS_MAPPINGS = {
+    "WanVideoATITracks": WanVideoATITracks,
+    "WanVideoATITracksVisualize": WanVideoATITracksVisualize,
+    "WanVideoATI_comfy": WanVideoATI_comfy,
+    }
+NODE_DISPLAY_NAME_MAPPINGS = {
+    "WanVideoATITracks": "WanVideo ATI Tracks",
+    "WanVideoATITracksVisualize": "WanVideo ATI Tracks Visualize",
+    "WanVideoATI_comfy": "WanVideo ATI Comfy",
+    }

HuMo/audio_proj.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import torch
+from einops import rearrange
+from torch import nn
+from einops import rearrange
+class WanRMSNorm(nn.Module):
+    def __init__(self, dim, eps=1e-5):
+        super().__init__()
+        self.dim = dim
+        self.eps = eps
+        self.weight = nn.Parameter(torch.ones(dim))
+    def forward(self, x):
+        r"""
+        Args:
+            x(Tensor): Shape [B, L, C]
+        """
+        return self._norm(x.float()).type_as(x) * self.weight
+    def _norm(self, x):
+        return x * torch.rsqrt(x.pow(2).mean(dim=-1, keepdim=True) + self.eps)
+class DummyAdapterLayer(nn.Module):
+    def __init__(self, layer):
+        super().__init__()
+        self.layer = layer
+    def forward(self, *args, **kwargs):
+        return self.layer(*args, **kwargs)
+class AudioProjModel(nn.Module):
+    def __init__(
+        self,
+        seq_len=5,
+        blocks=13,  # add a new parameter blocks
+        channels=768,  # add a new parameter channels
+        intermediate_dim=512,
+        output_dim=1536,
+        context_tokens=16,
+    ):
+        super().__init__()
+        self.seq_len = seq_len
+        self.blocks = blocks
+        self.channels = channels
+        self.input_dim = seq_len * blocks * channels  # update input_dim to be the product of blocks and channels.
+        self.intermediate_dim = intermediate_dim
+        self.context_tokens = context_tokens
+        self.output_dim = output_dim
+        # define multiple linear layers
+        self.audio_proj_glob_1 = DummyAdapterLayer(nn.Linear(self.input_dim, intermediate_dim))
+        self.audio_proj_glob_2 = DummyAdapterLayer(nn.Linear(intermediate_dim, intermediate_dim))
+        self.audio_proj_glob_3 = DummyAdapterLayer(nn.Linear(intermediate_dim, context_tokens * output_dim))
+        self.audio_proj_glob_norm = DummyAdapterLayer(nn.LayerNorm(output_dim))
+        self.initialize_weights()
+    def initialize_weights(self):
+        # Initialize transformer layers:
+        def _basic_init(module):
+            if isinstance(module, nn.Linear):
+                torch.nn.init.xavier_uniform_(module.weight)
+                if module.bias is not None:
+                    nn.init.constant_(module.bias, 0)
+        self.apply(_basic_init)
+    def forward(self, audio_embeds):
+        video_length = audio_embeds.shape[1]
+        audio_embeds = rearrange(audio_embeds, "bz f w b c -> (bz f) w b c")
+        batch_size, window_size, blocks, channels = audio_embeds.shape
+        audio_embeds = audio_embeds.view(batch_size, window_size * blocks * channels)
+        audio_embeds = torch.relu(self.audio_proj_glob_1(audio_embeds))
+        audio_embeds = torch.relu(self.audio_proj_glob_2(audio_embeds))
+        context_tokens = self.audio_proj_glob_3(audio_embeds).reshape(batch_size, self.context_tokens, self.output_dim)
+        context_tokens = self.audio_proj_glob_norm(context_tokens)
+        context_tokens = rearrange(context_tokens, "(bz f) m c -> bz f m c", f=video_length)
+        return context_tokens

HuMo/nodes.py ADDED Viewed

	@@ -0,0 +1,287 @@

+import folder_paths
+import torch
+import torch.nn.functional as F
+import os
+import json
+import torchaudio
+from comfy.utils import load_torch_file, common_upscale
+import comfy.model_management as mm
+from accelerate import init_empty_weights
+from ..utils import set_module_tensor_to_device, log
+from ..nodes import WanVideoEncodeLatentBatch
+script_directory = os.path.dirname(os.path.abspath(__file__))
+device = mm.get_torch_device()
+offload_device = mm.unet_offload_device()
+def linear_interpolation_fps(features, input_fps, output_fps, output_len=None):
+    features = features.transpose(1, 2)  # [1, C, T]
+    seq_len = features.shape[2] / float(input_fps)
+    if output_len is None:
+        output_len = int(seq_len * output_fps)
+    output_features = F.interpolate(features, size=output_len, align_corners=True, mode='linear')
+    return output_features.transpose(1, 2)
+def get_audio_emb_window(audio_emb, frame_num, frame0_idx, audio_shift=2):
+    zero_audio_embed = torch.zeros((audio_emb.shape[1], audio_emb.shape[2]), dtype=audio_emb.dtype, device=audio_emb.device)
+    zero_audio_embed_3 = torch.zeros((3, audio_emb.shape[1], audio_emb.shape[2]), dtype=audio_emb.dtype, device=audio_emb.device)
+    iter_ = 1 + (frame_num - 1) // 4
+    audio_emb_wind = []
+    for lt_i in range(iter_):
+        if lt_i == 0:
+            st = frame0_idx + lt_i - 2
+            ed = frame0_idx + lt_i + 3
+            wind_feat = torch.stack([
+                audio_emb[i] if (0 <= i < audio_emb.shape[0]) else zero_audio_embed
+                for i in range(st, ed)
+            ], dim=0)
+            wind_feat = torch.cat((zero_audio_embed_3, wind_feat), dim=0)
+        else:
+            st = frame0_idx + 1 + 4 * (lt_i - 1) - audio_shift
+            ed = frame0_idx + 1 + 4 * lt_i + audio_shift
+            wind_feat = torch.stack([
+                audio_emb[i] if (0 <= i < audio_emb.shape[0]) else zero_audio_embed
+                for i in range(st, ed)
+            ], dim=0)
+        audio_emb_wind.append(wind_feat)
+    audio_emb_wind = torch.stack(audio_emb_wind, dim=0)
+    return audio_emb_wind, ed - audio_shift
+class WhisperModelLoader:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {
+            "required": {
+                "model": (folder_paths.get_filename_list("audio_encoders"), {"tooltip": "These models are loaded from the 'ComfyUI/models/audio_encoders' folder",}),
+                "base_precision": (["fp32", "bf16", "fp16"], {"default": "fp16"}),
+                "load_device": (["main_device", "offload_device"], {"default": "main_device", "tooltip": "Initial device to load the model to, NOT recommended with the larger models unless you have 48GB+ VRAM"}),
+            },
+        }
+    RETURN_TYPES = ("WHISPERMODEL",)
+    RETURN_NAMES = ("whisper_model", )
+    FUNCTION = "loadmodel"
+    CATEGORY = "WanVideoWrapper"
+    def loadmodel(self, model, base_precision, load_device):
+        from transformers import WhisperConfig, WhisperModel, WhisperFeatureExtractor
+        base_dtype = {"fp8_e4m3fn": torch.float8_e4m3fn, "fp8_e4m3fn_fast": torch.float8_e4m3fn, "bf16": torch.bfloat16, "fp16": torch.float16, "fp16_fast": torch.float16, "fp32": torch.float32}[base_precision]
+        if load_device == "offload_device":
+            transformer_load_device = offload_device
+        else:
+            transformer_load_device = device
+        config_path = os.path.join(script_directory, "whisper_config.json")
+        whisper_config = WhisperConfig(**json.load(open(config_path)))
+        with init_empty_weights():
+            whisper = WhisperModel(whisper_config).eval()
+            whisper.decoder = None  # we only need the encoder
+        feature_extractor_config = {
+            "chunk_length": 30,
+            "feature_extractor_type": "WhisperFeatureExtractor",
+            "feature_size": 128,
+            "hop_length": 160,
+            "n_fft": 400,
+            "n_samples": 480000,
+            "nb_max_frames": 3000,
+            "padding_side": "right",
+            "padding_value": 0.0,
+            "processor_class": "WhisperProcessor",
+            "return_attention_mask": False,
+            "sampling_rate": 16000
+            }
+        feature_extractor = WhisperFeatureExtractor(**feature_extractor_config)
+        model_path = folder_paths.get_full_path_or_raise("audio_encoders", model)
+        sd = load_torch_file(model_path, device=transformer_load_device, safe_load=True)
+        for name, param in whisper.named_parameters():
+            key = "model." + name
+            value=sd[key]
+            set_module_tensor_to_device(whisper, name, device=offload_device, dtype=base_dtype, value=value)
+        whisper_model = {
+            "feature_extractor": feature_extractor,
+            "model": whisper,
+            "dtype": base_dtype,
+        }
+        return (whisper_model,)
+class HuMoEmbeds:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {
+            "num_frames": ("INT", {"default": 81, "min": -1, "max": 10000, "step": 1, "tooltip": "The total frame count to generate."}),
+            "width": ("INT", {"default": 832, "min": 64, "max": 4096, "step": 16}),
+            "height": ("INT", {"default": 480, "min": 64, "max": 4096, "step": 16}),
+            "audio_scale": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01, "tooltip": "Strength of the audio conditioning"}),
+            "audio_cfg_scale": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01, "tooltip": "When not 1.0, an extra model pass without audio conditioning is done: slower inference but more motion is allowed"}),
+            "audio_start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "The percent of the video to start applying audio conditioning"}),
+            "audio_end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "The percent of the video to stop applying audio conditioning"})
+        },
+            "optional" : {
+                "whisper_model": ("WHISPERMODEL",),
+                "vae": ("WANVAE", ),
+                "reference_images": ("IMAGE", {"tooltip": "reference images for the humo model"}),
+                "audio": ("AUDIO",),
+                "tiled_vae": ("BOOLEAN", {"default": False, "tooltip": "Use tiled VAE encoding for reduced memory use"}),
+            }
+        }
+    RETURN_TYPES = ("WANVIDIMAGE_EMBEDS", )
+    RETURN_NAMES = ("image_embeds", )
+    FUNCTION = "process"
+    CATEGORY = "WanVideoWrapper"
+    def process(self, num_frames, width, height, audio_scale, audio_cfg_scale, audio_start_percent, audio_end_percent, whisper_model=None, vae=None, reference_images=None, audio=None, tiled_vae=False):
+        if reference_images is not None and vae is None:
+            raise ValueError("VAE is required when reference images are provided")
+        if whisper_model is None and audio is not None:
+            raise ValueError("Whisper model is required when audio is provided")
+        model = whisper_model["model"]
+        feature_extractor = whisper_model["feature_extractor"]
+        dtype = whisper_model["dtype"]
+        sampling_rate = 16000
+        if audio is not None:
+            audio_input = audio["waveform"][0]
+            sample_rate = audio["sample_rate"]
+            if sample_rate != sampling_rate:
+                audio_input = torchaudio.functional.resample(audio_input, sample_rate, sampling_rate)
+            if audio_input.shape[1] == 2:
+                audio_input = audio_input.mean(dim=0, keepdim=False)
+            else:
+                audio_input = audio_input[0]
+            model.to(device)
+            audio_len = len(audio_input) // 640
+            # feature extraction
+            audio_features = []
+            window = 750*640
+            for i in range(0, len(audio_input), window):
+                audio_feature = feature_extractor(audio_input[i:i+window], sampling_rate=sampling_rate, return_tensors="pt").input_features
+                audio_features.append(audio_feature)
+            audio_features = torch.cat(audio_features, dim=-1).to(device, dtype)
+            # preprocess
+            window = 3000
+            audio_prompts = []
+            for i in range(0, audio_features.shape[-1], window):
+                audio_prompt = model.encoder(audio_features[:,:,i:i+window], output_hidden_states=True).hidden_states
+                audio_prompt = torch.stack(audio_prompt, dim=2)
+                audio_prompts.append(audio_prompt)
+            model.to(offload_device)
+            audio_prompts = torch.cat(audio_prompts, dim=1)
+            audio_prompts = audio_prompts[:,:audio_len*2]
+            feat0 = linear_interpolation_fps(audio_prompts[:, :, 0: 8].mean(dim=2), 50, 25)
+            feat1 = linear_interpolation_fps(audio_prompts[:, :, 8: 16].mean(dim=2), 50, 25)
+            feat2 = linear_interpolation_fps(audio_prompts[:, :, 16: 24].mean(dim=2), 50, 25)
+            feat3 = linear_interpolation_fps(audio_prompts[:, :, 24: 32].mean(dim=2), 50, 25)
+            feat4 = linear_interpolation_fps(audio_prompts[:, :, 32], 50, 25)
+            audio_emb = torch.stack([feat0, feat1, feat2, feat3, feat4], dim=2)[0]  # [T, 5, 1280]
+        else:
+            audio_emb = torch.zeros(num_frames, 5, 1280, device=device)
+            audio_len = num_frames
+        pixel_frame_num = num_frames if num_frames != -1 else audio_len
+        pixel_frame_num = 4 * ((pixel_frame_num - 1) // 4) + 1
+        latent_frame_num = (pixel_frame_num - 1) // 4 + 1
+        log.info(f"HuMo set to generate {pixel_frame_num} frames")
+        #audio_emb, _ = get_audio_emb_window(audio_emb, pixel_frame_num, frame0_idx=0)
+        num_refs = 0
+        if reference_images is not None:
+            if reference_images.shape[1] != height or reference_images.shape[2] != width:
+                reference_images_in = common_upscale(reference_images.movedim(-1, 1), width, height, "lanczos", "disabled").movedim(1, -1)
+            else:
+                reference_images_in = reference_images
+            samples, = WanVideoEncodeLatentBatch.encode(self, vae, reference_images_in, tiled_vae, None, None, None, None)
+            samples = samples["samples"].transpose(0, 2).squeeze(0)
+            num_refs = samples.shape[1]
+        vae.to(device)
+        zero_frames = torch.zeros(1, 3, pixel_frame_num + 4*num_refs, height, width, device=device, dtype=vae.dtype)
+        zero_latents = vae.encode(zero_frames, device=device, tiled=tiled_vae)[0].to(offload_device)
+        vae.to(offload_device)
+        mm.soft_empty_cache()
+        target_shape = (16, latent_frame_num + num_refs, height // 8, width // 8)
+        mask = torch.ones(4, target_shape[1], target_shape[2], target_shape[3], device=offload_device, dtype=vae.dtype)
+        if reference_images is not None:
+            mask[:,:-num_refs] = 0
+            image_cond = torch.cat([zero_latents[:, :(target_shape[1]-num_refs)], samples], dim=1)
+            #zero_audio_pad = torch.zeros(num_refs, *audio_emb.shape[1:]).to(audio_emb.device)
+            #audio_emb = torch.cat([audio_emb, zero_audio_pad], dim=0)
+        else:
+            image_cond = zero_latents
+            mask = torch.zeros_like(mask)
+        image_cond = torch.cat([mask, image_cond], dim=0)
+        image_cond_neg = torch.cat([mask, zero_latents], dim=0)
+        embeds = {
+            "humo_audio_emb": audio_emb,
+            "humo_audio_emb_neg": torch.zeros_like(audio_emb, dtype=audio_emb.dtype, device=audio_emb.device),
+            "humo_image_cond": image_cond,
+            "humo_image_cond_neg": image_cond_neg,
+            "humo_reference_count": num_refs,
+            "target_shape": target_shape,
+            "num_frames": pixel_frame_num,
+            "humo_audio_scale": audio_scale,
+            "humo_audio_cfg_scale": audio_cfg_scale,
+            "humo_start_percent": audio_start_percent,
+            "humo_end_percent": audio_end_percent,
+        }
+        return (embeds, )
+class WanVideoCombineEmbeds:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {
+                    "embeds_1": ("WANVIDIMAGE_EMBEDS",),
+                    "embeds_2": ("WANVIDIMAGE_EMBEDS",),
+                }
+        }
+    RETURN_TYPES = ("WANVIDIMAGE_EMBEDS",)
+    RETURN_NAMES = ("image_embeds",)
+    FUNCTION = "add"
+    CATEGORY = "WanVideoWrapper"
+    EXPERIMENTAL = True
+    def add(self, embeds_1, embeds_2):
+        # Combine the two sets of embeds
+        combined = {**embeds_1, **embeds_2}
+        return (combined,)
+NODE_CLASS_MAPPINGS = {
+    "WhisperModelLoader": WhisperModelLoader,
+    "HuMoEmbeds": HuMoEmbeds,
+    "WanVideoCombineEmbeds": WanVideoCombineEmbeds,
+}
+NODE_DISPLAY_NAME_MAPPINGS = {
+    "WhisperModelLoader": "Whisper Model Loader",
+    "HuMoEmbeds": "HuMo Embeds",
+    "WanVideoCombineEmbeds": "WanVideo Combine Embeds",
+}

HuMo/whisper_config.json ADDED Viewed

	@@ -0,0 +1,50 @@

+{
+  "_name_or_path": "openai/whisper-large-v3",
+  "activation_dropout": 0.0,
+  "activation_function": "gelu",
+  "apply_spec_augment": false,
+  "architectures": [
+    "WhisperForConditionalGeneration"
+  ],
+  "attention_dropout": 0.0,
+  "begin_suppress_tokens": [
+    220,
+    50257
+  ],
+  "bos_token_id": 50257,
+  "classifier_proj_size": 256,
+  "d_model": 1280,
+  "decoder_attention_heads": 20,
+  "decoder_ffn_dim": 5120,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 32,
+  "decoder_start_token_id": 50258,
+  "dropout": 0.0,
+  "encoder_attention_heads": 20,
+  "encoder_ffn_dim": 5120,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 32,
+  "eos_token_id": 50257,
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": 448,
+  "max_source_positions": 1500,
+  "max_target_positions": 448,
+  "median_filter_width": 7,
+  "model_type": "whisper",
+  "num_hidden_layers": 32,
+  "num_mel_bins": 128,
+  "pad_token_id": 50256,
+  "scale_embedding": false,
+  "torch_dtype": "float16",
+  "transformers_version": "4.36.0.dev0",
+  "use_cache": true,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 51866
+}

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

MTV/data/mean.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3ababeaabf5ac096ce7c7714ada14aa1de8355c0016de25695be611d51285141
+size 416

MTV/data/std.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:650e46902a0878e6947be401e4e1995e54a8fd407f2be3ded0dda62bda99a9b3
+size 416

MTV/draw_pose.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import cv2
+import math
+import torch
+import numpy as np
+from PIL import Image
+from torchvision import transforms
+def intrinsic_matrix_from_field_of_view(imshape, fov_degrees:float =55 ):   # nlf default fov_degrees 55
+    imshape = np.array(imshape)
+    fov_radians = fov_degrees * np.array(np.pi / 180)
+    larger_side = np.max(imshape)
+    focal_length = larger_side / (np.tan(fov_radians / 2) * 2)
+    # intrinsic_matrix 3*3
+    return np.array([
+        [focal_length, 0, imshape[1] / 2],
+        [0, focal_length, imshape[0] / 2],
+        [0, 0, 1],
+    ])
+def p3d_to_p2d(point_3d, height, width):    # point3d n*1024*3
+    camera_matrix = intrinsic_matrix_from_field_of_view((height,width))
+    camera_matrix = np.expand_dims(camera_matrix, axis=0)
+    camera_matrix = np.expand_dims(camera_matrix, axis=0)    # 1*1*3*3
+    point_3d = np.expand_dims(point_3d,axis=-1)     # n*1024*3*1
+    point_2d = (camera_matrix@point_3d).squeeze(-1)
+    point_2d[:,:,:2] = point_2d[:,:,:2]/point_2d[:,:,2:3]
+    return point_2d[:,:,:]      # n*1024*2
+def get_pose_images(smpl_data, offset):
+    pose_images = []
+    for data in smpl_data:
+        if isinstance(data, np.ndarray):
+            joints3d = data
+        else:
+            joints3d = data.numpy()
+        canvas = np.zeros(shape=(offset[0], offset[1], 3), dtype=np.uint8)
+        joints3d = p3d_to_p2d(joints3d, offset[0], offset[1])
+        canvas = draw_3d_points(canvas, joints3d[0], stickwidth=int(offset[1]/350))
+        pose_images.append(Image.fromarray(canvas))
+    return pose_images
+def get_control_conditions(poses, h, w):
+    video_transforms = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], inplace=True)
+    control_images = []
+    for idx, pose in enumerate(poses):
+        canvas = np.zeros(shape=(h, w, 3), dtype=np.uint8)
+        try:
+            joints3d = p3d_to_p2d(pose, h, w)
+            canvas = draw_3d_points(
+                canvas,
+                joints3d[0],
+                stickwidth=int(h / 350),
+            )
+            resized_canvas = cv2.resize(canvas, (w, h))
+            # Image.fromarray(resized_canvas).save(f'tmp/{idx}_pose.jpg')
+            control_images.append(resized_canvas)
+        except Exception as e:
+            print("wrong:", e)
+            control_images.append(Image.fromarray(canvas))
+    control_pixel_values = np.array(control_images)
+    control_pixel_values = torch.from_numpy(control_pixel_values).contiguous() / 255.
+    print("control_pixel_values.shape", control_pixel_values.shape)
+    #control_pixel_values = video_transforms(control_pixel_values)
+    return control_pixel_values
+def draw_3d_points(canvas, points, stickwidth=2, r=2, draw_line=True):
+    colors = [
+        [255, 0, 0],    # 0
+        [0, 255, 0],    # 1
+        [0, 0, 255],    # 2
+        [255, 0, 255],  # 3
+        [255, 255, 0],  # 4
+        [85, 255, 0],   # 5
+        [0, 75, 255],   # 6
+        [0, 255, 85],   # 7
+        [0, 255, 170],  # 8
+        [170, 0, 255],  # 9
+        [85, 0, 255],   # 10
+        [0, 85, 255],   # 11
+        [0, 255, 255],  # 12
+        [85, 0, 255],   # 13
+        [170, 0, 255],  # 14
+        [255, 0, 255],  # 15
+        [255, 0, 170],  # 16
+        [255, 0, 85],   # 17
+    ]
+    connetions = [
+        [15,12],[12, 16],[16, 18],[18, 20],[20, 22],
+        [12,17],[17,19],[19,21],
+        [21,23],[12,9],[9,6],
+        [6,3],[3,0],[0,1],
+        [1,4],[4,7],[7,10],[0,2],[2,5],[5,8],[8,11]
+    ]
+    connection_colors = [
+        [255, 0, 0],    # 0
+        [0, 255, 0],    # 1
+        [0, 0, 255],    # 2
+        [255, 255, 0],  # 3
+        [255, 0, 255],  # 4
+        [0, 255, 0],    # 5
+        [0, 85, 255],   # 6
+        [255, 175, 0],  # 7
+        [0, 0, 255],    # 8
+        [255, 85, 0],   # 9
+        [0, 255, 85],   # 10
+        [255, 0, 255],  # 11
+        [255, 0, 0],    # 12
+        [0, 175, 255],  # 13
+        [255, 255, 0],  # 14
+        [0, 0, 255],    # 15
+        [0, 255, 0],    # 16
+    ]
+    # draw point
+    for i in range(len(points)):
+        x,y = points[i][0:2]
+        x,y = int(x),int(y)
+        if i==13 or i == 14:
+            continue
+        cv2.circle(canvas, (x, y), r, colors[i%17], thickness=-1)
+    # draw line
+    if draw_line:
+        for i in range(len(connetions)):
+            point1_idx,point2_idx = connetions[i][0:2]
+            point1 = points[point1_idx]
+            point2 = points[point2_idx]
+            Y = [point2[0],point1[0]]
+            X = [point2[1],point1[1]]
+            mX = int(np.mean(X))
+            mY = int(np.mean(Y))
+            length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
+            angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
+            polygon = cv2.ellipse2Poly((mY, mX), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
+            cv2.fillConvexPoly(canvas, polygon, connection_colors[i%17])
+    return canvas

MTV/motion4d/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .vqvae import SMPL_VQVAE, VectorQuantizer, Encoder, Decoder

MTV/motion4d/vqvae.py ADDED Viewed

	@@ -0,0 +1,329 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+class Encoder(nn.Module):
+    def __init__(
+        self,
+        in_channels=3,
+        mid_channels=[128, 512],
+        out_channels=3072,
+        downsample_time=[1, 1],
+        downsample_joint=[1, 1],
+        num_attention_heads=8,
+        attention_head_dim=64,
+        dim=3072,
+        ):
+        super(Encoder, self).__init__()
+        self.conv_in = nn.Conv2d(in_channels, mid_channels[0], kernel_size=3, stride=1, padding=1)
+        self.resnet1 = nn.ModuleList([ResBlock(mid_channels[0], mid_channels[0]) for _ in range(3)])
+        self.downsample1 = Downsample(mid_channels[0], mid_channels[0], downsample_time[0], downsample_joint[0])
+        self.resnet2 = ResBlock(mid_channels[0], mid_channels[1])
+        self.resnet3 = nn.ModuleList([ResBlock(mid_channels[1], mid_channels[1]) for _ in range(3)])
+        self.downsample2 = Downsample(mid_channels[1], mid_channels[1], downsample_time[1], downsample_joint[1])
+        self.conv_out = nn.Conv2d(mid_channels[-1], out_channels, kernel_size=3, stride=1, padding=1)
+    def forward(self, x):
+        x = self.conv_in(x)
+        for resnet in self.resnet1:
+            x = resnet(x)
+        x = self.downsample1(x)
+        x = self.resnet2(x)
+        for resnet in self.resnet3:
+            x = resnet(x)
+        x = self.downsample2(x)
+        x = self.conv_out(x)
+        return x
+class VectorQuantizer(nn.Module):
+    def __init__(self, nb_code, code_dim):
+        super().__init__()
+        self.nb_code = nb_code
+        self.code_dim = code_dim
+        self.mu = 0.99
+        self.reset_codebook()
+        self.reset_count = 0
+        self.usage = torch.zeros((self.nb_code, 1))
+    def reset_codebook(self):
+        self.init = False
+        self.code_sum = None
+        self.code_count = None
+        self.register_buffer('codebook', torch.zeros(self.nb_code, self.code_dim).cuda())
+    def _tile(self, x):
+        nb_code_x, code_dim = x.shape
+        if nb_code_x < self.nb_code:
+            n_repeats = (self.nb_code + nb_code_x - 1) // nb_code_x
+            std = 0.01 / np.sqrt(code_dim)
+            out = x.repeat(n_repeats, 1)
+            out = out + torch.randn_like(out) * std
+        else:
+            out = x
+        return out
+    def preprocess(self, x):
+        # [bs, c, f, j] -> [bs * f * j, c]
+        x = x.permute(0, 2, 3, 1).contiguous()
+        x = x.view(-1, x.shape[-1])
+        return x
+    def quantize(self, x):
+        # [bs * f * j, dim=3072]
+        # Calculate latent code x_l
+        k_w = self.codebook.t()
+        distance = torch.sum(x ** 2, dim=-1, keepdim=True) - 2 * torch.matmul(x, k_w) + torch.sum(k_w ** 2, dim=0, keepdim=True)
+        _, code_idx = torch.min(distance, dim=-1)
+        return code_idx
+    def dequantize(self, code_idx):
+        x = F.embedding(code_idx, self.codebook)    # indexing: [bs * f * j, 32]
+        return x
+    def forward(self, x, return_vq=False):
+        bs, c, f, j = x.shape   # SMPL data frames: [bs, 3072, f, j]
+        # Preprocess
+        x = self.preprocess(x)
+        # return x.view(bs, f*j, c).contiguous(), None
+        assert x.shape[-1] == self.code_dim
+        # quantize and dequantize through bottleneck
+        code_idx = self.quantize(x)
+        x_d = self.dequantize(code_idx)
+        # Loss
+        commit_loss = F.mse_loss(x, x_d.detach())
+        # Passthrough
+        x_d = x + (x_d - x).detach()
+        if return_vq:
+            return x_d.view(bs, f*j, c).contiguous(), commit_loss
+            # return (x_d, x_d.view(bs, f, j, c).permute(0, 3, 1, 2).contiguous()), commit_loss, perplexity
+        # Postprocess
+        x_d = x_d.view(bs, f, j, c).permute(0, 3, 1, 2).contiguous()
+        return x_d, commit_loss
+class Decoder(nn.Module):
+    def __init__(
+        self,
+        in_channels=3072,
+        mid_channels=[512, 128],
+        out_channels=3,
+        upsample_rate=None,
+        frame_upsample_rate=[1.0, 1.0],
+        joint_upsample_rate=[1.0, 1.0],
+        dim=128,
+        attention_head_dim=64,
+        num_attention_heads=8,
+        ):
+        super(Decoder, self).__init__()
+        self.conv_in = nn.Conv2d(in_channels, mid_channels[0], kernel_size=3, stride=1, padding=1)
+        self.resnet1 = nn.ModuleList([ResBlock(mid_channels[0], mid_channels[0]) for _ in range(3)])
+        self.upsample1 = Upsample(mid_channels[0], mid_channels[0], frame_upsample_rate=frame_upsample_rate[0], joint_upsample_rate=joint_upsample_rate[0])
+        self.resnet2 = ResBlock(mid_channels[0], mid_channels[1])
+        self.resnet3 = nn.ModuleList([ResBlock(mid_channels[1], mid_channels[1]) for _ in range(3)])
+        self.upsample2 = Upsample(mid_channels[1], mid_channels[1], frame_upsample_rate=frame_upsample_rate[1], joint_upsample_rate=joint_upsample_rate[1])
+        self.conv_out = nn.Conv2d(mid_channels[-1], out_channels, kernel_size=3, stride=1, padding=1)
+    def forward(self, x):
+        x = self.conv_in(x)
+        for resnet in self.resnet1:
+            x = resnet(x)
+        x = self.upsample1(x)
+        x = self.resnet2(x)
+        for resnet in self.resnet3:
+            x = resnet(x)
+        x = self.upsample2(x)
+        x = self.conv_out(x)
+        return x
+class Upsample(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        upsample_rate=None,
+        frame_upsample_rate=None,
+        joint_upsample_rate=None,
+        ):
+        super(Upsample, self).__init__()
+        self.upsampler = nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
+        self.upsample_rate = upsample_rate
+        self.frame_upsample_rate = frame_upsample_rate
+        self.joint_upsample_rate = joint_upsample_rate
+        self.upsample_rate = upsample_rate
+    def forward(self, inputs):
+        if inputs.shape[2] > 1 and inputs.shape[2] % 2 == 1:
+            # split first frame
+            x_first, x_rest = inputs[:, :, 0], inputs[:, :, 1:]
+            if self.upsample_rate is not None:
+                # import pdb; pdb.set_trace()
+                x_first = F.interpolate(x_first, scale_factor=self.upsample_rate)
+                x_rest = F.interpolate(x_rest, scale_factor=self.upsample_rate)
+            else:
+                # import pdb; pdb.set_trace()
+                # x_first = F.interpolate(x_first, scale_factor=(self.frame_upsample_rate, self.joint_upsample_rate), mode="bilinear", align_corners=True)
+                x_rest = F.interpolate(x_rest, scale_factor=(self.frame_upsample_rate, self.joint_upsample_rate), mode="bilinear", align_corners=True)
+            x_first = x_first[:, :, None, :]
+            inputs = torch.cat([x_first, x_rest], dim=2)
+        elif inputs.shape[2] > 1:
+            if self.upsample_rate is not None:
+                inputs = F.interpolate(inputs, scale_factor=self.upsample_rate)
+            else:
+                inputs = F.interpolate(inputs, scale_factor=(self.frame_upsample_rate, self.joint_upsample_rate), mode="bilinear", align_corners=True)
+        else:
+            inputs = inputs.squeeze(2)
+            if self.upsample_rate is not None:
+                inputs = F.interpolate(inputs, scale_factor=self.upsample_rate)
+            else:
+                inputs = F.interpolate(inputs, scale_factor=(self.frame_upsample_rate, self.joint_upsample_rate), mode="linear", align_corners=True)
+            inputs = inputs[:, :, None, :, :]
+        b, c, t, j = inputs.shape
+        inputs = inputs.permute(0, 2, 1, 3).reshape(b * t, c, j)
+        inputs = self.upsampler(inputs)
+        inputs = inputs.reshape(b, t, *inputs.shape[1:]).permute(0, 2, 1, 3)
+        return inputs
+class Downsample(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        frame_downsample_rate,
+        joint_downsample_rate
+        ):
+        super(Downsample, self).__init__()
+        self.frame_downsample_rate = frame_downsample_rate
+        self.joint_downsample_rate = joint_downsample_rate
+        self.joint_downsample = nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=self.joint_downsample_rate, padding=1)
+    def forward(self, x):
+        # (batch_size, channels, frames, joints) -> (batch_size * joints, channels, frames)
+        if self.frame_downsample_rate > 1:
+            batch_size, channels, frames, joints = x.shape
+            x = x.permute(0, 3, 1, 2).reshape(batch_size * joints, channels, frames)
+            if x.shape[-1] % 2 == 1:
+                x_first, x_rest = x[..., 0], x[..., 1:]
+                if x_rest.shape[-1] > 0:
+                    # (batch_size * height * width, channels, frames - 1) -> (batch_size * height * width, channels, (frames - 1) // 2)
+                    x_rest = F.avg_pool1d(x_rest, kernel_size=self.frame_downsample_rate, stride=self.frame_downsample_rate)
+                x = torch.cat([x_first[..., None], x_rest], dim=-1)
+                # (batch_size * joints, channels, (frames // 2) + 1) -> (batch_size, channels, (frames // 2) + 1, joints)
+                x = x.reshape(batch_size, joints, channels, x.shape[-1]).permute(0, 2, 3, 1)
+            else:
+                # (batch_size * joints, channels, frames) -> (batch_size * joints, channels, frames // 2)
+                x = F.avg_pool1d(x, kernel_size=2, stride=2)
+                # (batch_size * joints, channels, frames // 2) -> (batch_size, height, width, channels, frames // 2) -> (batch_size, channels, frames // 2, height, width)
+                x = x.reshape(batch_size, joints, channels, x.shape[-1]).permute(0, 2, 3, 1)
+        # Pad the tensor
+        # pad = (0, 1)
+        # x = F.pad(x, pad, mode="constant", value=0)
+        batch_size, channels, frames, joints = x.shape
+        # (batch_size, channels, frames, joints) -> (batch_size * frames, channels, joints)
+        x = x.permute(0, 2, 1, 3).reshape(batch_size * frames, channels, joints)
+        x = self.joint_downsample(x)
+        # (batch_size * frames, channels, joints) -> (batch_size, channels, frames, joints)
+        x = x.reshape(batch_size, frames, x.shape[1], x.shape[2]).permute(0, 2, 1, 3)
+        return x
+class ResBlock(nn.Module):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 group_num=32,
+                 max_channels=512):
+        super(ResBlock, self).__init__()
+        skip = max(1, max_channels // out_channels - 1)
+        self.block = nn.Sequential(
+            nn.GroupNorm(group_num, in_channels, eps=1e-06, affine=True),
+            nn.SiLU(),
+            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=skip, dilation=skip),
+            nn.GroupNorm(group_num, out_channels, eps=1e-06, affine=True),
+            nn.SiLU(),
+            nn.Conv2d(out_channels, out_channels, kernel_size=1, stride=1, padding=0),
+        )
+        self.conv_short = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0) if in_channels != out_channels else nn.Identity()
+    def forward(self, x):
+        hidden_states = self.block(x)
+        if hidden_states.shape != x.shape:
+            x = self.conv_short(x)
+        x = x + hidden_states
+        return x
+class SMPL_VQVAE(nn.Module):
+    def __init__(self, encoder, decoder, vq):
+        super(SMPL_VQVAE, self).__init__()
+        self.encoder = encoder
+        self.decoder = decoder
+        self.vq = vq
+    def to(self, device):
+        self.encoder = self.encoder.to(device)
+        self.decoder = self.decoder.to(device)
+        self.vq = self.vq.to(device)
+        self.device = device
+        return self
+    def encdec_slice_frames(self, x, frame_batch_size, encdec, return_vq):
+        num_frames = x.shape[2]
+        remaining_frames = num_frames % frame_batch_size
+        x_output = []
+        for i in range(num_frames // frame_batch_size):
+            remaining_frames = num_frames % frame_batch_size
+            start_frame = frame_batch_size * i + (0 if i == 0 else remaining_frames)
+            end_frame = frame_batch_size * (i + 1) + remaining_frames
+            x_intermediate = x[:, :, start_frame:end_frame]
+            x_intermediate = encdec(x_intermediate)
+            x_output.append(x_intermediate)
+        if encdec == self.encoder and self.vq is not None:
+            x_output, loss = self.vq(torch.cat(x_output, dim=2), return_vq=return_vq)
+            return x_output, loss
+        else:
+            return torch.cat(x_output, dim=2), None, None
+    def forward(self, x, return_vq=False):
+        x = x.permute(0, 3, 1, 2)
+        x, loss = self.encdec_slice_frames(x, frame_batch_size=8, encdec=self.encoder, return_vq=return_vq)
+        if return_vq:
+            return x, loss
+        x, _, _ = self.encdec_slice_frames(x, frame_batch_size=2, encdec=self.decoder, return_vq=return_vq)
+        x = x.permute(0, 2, 3, 1)
+        return x, loss

MTV/mtv.py ADDED Viewed

	@@ -0,0 +1,193 @@

+import torch
+import numpy as np
+from typing import Union, Tuple
+def get_1d_rotary_pos_embed(
+    dim: int,
+    pos: Union[np.ndarray, int],
+    theta: float = 10000.0,
+    use_real=False,
+    linear_factor=1.0,
+    ntk_factor=1.0,
+    repeat_interleave_real=True,
+    freqs_dtype=torch.float32,      # torch.float32, torch.float64 (flux)
+):
+    """
+    Precompute the frequency tensor for complex exponentials (cis) with given dimensions.
+    This function calculates a frequency tensor with complex exponentials using the given dimension 'dim' and the end
+    index 'end'. The 'theta' parameter scales the frequencies. The returned tensor contains complex values in complex64
+    data type.
+    Args:
+        dim (`int`): Dimension of the frequency tensor.
+        pos (`np.ndarray` or `int`): Position indices for the frequency tensor. [S] or scalar
+        theta (`float`, *optional*, defaults to 10000.0):
+            Scaling factor for frequency computation. Defaults to 10000.0.
+        use_real (`bool`, *optional*):
+            If True, return real part and imaginary part separately. Otherwise, return complex numbers.
+        linear_factor (`float`, *optional*, defaults to 1.0):
+            Scaling factor for the context extrapolation. Defaults to 1.0.
+        ntk_factor (`float`, *optional*, defaults to 1.0):
+            Scaling factor for the NTK-Aware RoPE. Defaults to 1.0.
+        repeat_interleave_real (`bool`, *optional*, defaults to `True`):
+            If `True` and `use_real`, real part and imaginary part are each interleaved with themselves to reach `dim`.
+            Otherwise, they are concateanted with themselves.
+        freqs_dtype (`torch.float32` or `torch.float64`, *optional*, defaults to `torch.float32`):
+            the dtype of the frequency tensor.
+    Returns:
+        `torch.Tensor`: Precomputed frequency tensor with complex exponentials. [S, D/2]
+    """
+    assert dim % 2 == 0
+    if isinstance(pos, int):
+        pos = torch.arange(pos)
+    if isinstance(pos, np.ndarray):
+        pos = torch.from_numpy(pos)  # type: ignore  # [S]
+    theta = theta * ntk_factor
+    freqs = (
+        1.0
+        / (theta ** (torch.arange(0, dim, 2, dtype=freqs_dtype, device=pos.device)[: (dim // 2)] / dim))
+        / linear_factor
+    )  # [D/2]
+    freqs = torch.outer(pos, freqs)  # type: ignore   # [S, D/2]
+    if use_real and repeat_interleave_real:
+        freqs_cos = freqs.cos().repeat_interleave(2, dim=1).float()  # [S, D]
+        freqs_sin = freqs.sin().repeat_interleave(2, dim=1).float()  # [S, D]
+        return freqs_cos, freqs_sin
+    elif use_real:
+        freqs_cos = torch.cat([freqs.cos(), freqs.cos()], dim=-1).float()  # [S, D]
+        freqs_sin = torch.cat([freqs.sin(), freqs.sin()], dim=-1).float()  # [S, D]
+        return freqs_cos, freqs_sin
+    else:
+        freqs_cis = torch.polar(torch.ones_like(freqs), freqs)  # complex64     # [S, D/2]
+        return freqs_cis
+def get_3d_rotary_pos_embed(
+    embed_dim, crops_coords, grid_size, temporal_size, theta: int = 10000, use_real: bool = True
+) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
+    """
+    RoPE for video tokens with 3D structure.
+    Args:
+    embed_dim: (`int`):
+        The embedding dimension size, corresponding to hidden_size_head.
+    crops_coords (`Tuple[int]`):
+        The top-left and bottom-right coordinates of the crop.
+    grid_size (`Tuple[int]`):
+        The grid size of the spatial positional embedding (height, width).
+    temporal_size (`int`):
+        The size of the temporal dimension.
+    theta (`float`):
+        Scaling factor for frequency computation.
+    Returns:
+        `torch.Tensor`: positional embedding with shape `(temporal_size * grid_size[0] * grid_size[1], embed_dim/2)`.
+    """
+    if use_real is not True:
+        raise ValueError(" `use_real = False` is not currently supported for get_3d_rotary_pos_embed")
+    start, stop = crops_coords
+    grid_size_h, grid_size_w = grid_size
+    grid_h = np.linspace(start[0], stop[0], grid_size_h, endpoint=False, dtype=np.float32)
+    grid_w = np.linspace(start[1], stop[1], grid_size_w, endpoint=False, dtype=np.float32)
+    grid_t = np.linspace(0, temporal_size, temporal_size, endpoint=False, dtype=np.float32)
+    # Compute dimensions for each axis
+    dim_t = embed_dim // 4
+    dim_h = embed_dim // 8 * 3
+    dim_w = embed_dim // 8 * 3
+    # Temporal frequencies
+    freqs_t = get_1d_rotary_pos_embed(dim_t, grid_t, use_real=True)
+    # Spatial frequencies for height and width
+    freqs_h = get_1d_rotary_pos_embed(dim_h, grid_h, use_real=True)
+    freqs_w = get_1d_rotary_pos_embed(dim_w, grid_w, use_real=True)
+    # BroadCast and concatenate temporal and spaial frequencie (height and width) into a 3d tensor
+    def combine_time_height_width(freqs_t, freqs_h, freqs_w):
+        freqs_t = freqs_t[:, None, None, :].expand(
+            -1, grid_size_h, grid_size_w, -1
+        )  # temporal_size, grid_size_h, grid_size_w, dim_t
+        freqs_h = freqs_h[None, :, None, :].expand(
+            temporal_size, -1, grid_size_w, -1
+        )  # temporal_size, grid_size_h, grid_size_2, dim_h
+        freqs_w = freqs_w[None, None, :, :].expand(
+            temporal_size, grid_size_h, -1, -1
+        )  # temporal_size, grid_size_h, grid_size_2, dim_w
+        freqs = torch.cat(
+            [freqs_t, freqs_h, freqs_w], dim=-1
+        )  # temporal_size, grid_size_h, grid_size_w, (dim_t + dim_h + dim_w)
+        freqs = freqs.view(
+            temporal_size * grid_size_h * grid_size_w, -1
+        )  # (temporal_size * grid_size_h * grid_size_w), (dim_t + dim_h + dim_w)
+        return freqs
+    t_cos, t_sin = freqs_t  # both t_cos and t_sin has shape: temporal_size, dim_t
+    h_cos, h_sin = freqs_h  # both h_cos and h_sin has shape: grid_size_h, dim_h
+    w_cos, w_sin = freqs_w  # both w_cos and w_sin has shape: grid_size_w, dim_w
+    cos = combine_time_height_width(t_cos, h_cos, w_cos)
+    sin = combine_time_height_width(t_sin, h_sin, w_sin)
+    return cos, sin
+def get_3d_motion_spatial_embed(
+    embed_dim: int, num_joints: int, joints_mean: np.ndarray, joints_std: np.ndarray, theta: float = 10000.0
+) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
+    assert embed_dim % 2 == 0 and embed_dim % 3 == 0
+    def create_rope_pe(dim, pos, freqs_dtype=torch.float32):
+        if isinstance(pos, np.ndarray):
+            pos = torch.from_numpy(pos)
+        freqs = (
+            1.0
+            / (theta ** (torch.arange(0, dim, 2, dtype=freqs_dtype, device=pos.device)[: (dim // 2)] / dim))
+        )  # [D/2]
+        freqs = torch.outer(pos, freqs)  # type: ignore   # [S, D/2]
+        freqs_cos = freqs.cos().repeat_interleave(2, dim=1).float()  # [S, D]
+        freqs_sin = freqs.sin().repeat_interleave(2, dim=1).float()  # [S, D]
+        return freqs_cos, freqs_sin
+    pos_x = joints_mean[:, 0]
+    pos_y = joints_mean[:, 1]
+    pos_z = joints_mean[:, 2]
+    normalized_pos_x = (pos_x - pos_x.mean())
+    normalized_pos_y = (pos_y - pos_y.mean())
+    normalized_pos_z = (pos_z - pos_z.mean())
+    freqs_cos_x, freqs_sin_x = create_rope_pe(embed_dim // 3, normalized_pos_x)
+    freqs_cos_y, freqs_sin_y = create_rope_pe(embed_dim // 3, normalized_pos_y)
+    freqs_cos_z, freqs_sin_z = create_rope_pe(embed_dim // 3, normalized_pos_z)
+    freqs_cos = torch.cat([freqs_cos_x, freqs_cos_y, freqs_cos_z], dim=-1)
+    freqs_sin = torch.cat([freqs_sin_x, freqs_sin_y, freqs_sin_z], dim=-1)
+    return freqs_cos, freqs_sin
+def prepare_motion_embeddings(num_frames, num_joints, joints_mean, joints_std, theta=10000, device='cuda'):
+    time_embed = get_1d_rotary_pos_embed(44, num_frames, theta, use_real=True)
+    time_embed_cos = time_embed[0][:, None, :].expand(-1, num_joints, -1).reshape(num_frames*num_joints, -1)
+    time_embed_sin = time_embed[1][:, None, :].expand(-1, num_joints, -1).reshape(num_frames*num_joints, -1)
+    spatial_motion_embed = get_3d_motion_spatial_embed(84, num_joints, joints_mean, joints_std, theta)
+    spatial_embed_cos = spatial_motion_embed[0][None, :, :].expand(num_frames, -1, -1).reshape(num_frames*num_joints, -1)
+    spatial_embed_sin = spatial_motion_embed[1][None, :, :].expand(num_frames, -1, -1).reshape(num_frames*num_joints, -1)
+    motion_embed_cos = torch.cat([time_embed_cos, spatial_embed_cos], dim=-1).to(device=device)
+    motion_embed_sin = torch.cat([time_embed_sin, spatial_embed_sin], dim=-1).to(device=device)
+    return motion_embed_cos, motion_embed_sin
+def apply_rotary_emb(x, freqs_cis):
+    cos, sin = freqs_cis  # [S, D]
+    cos = cos[None, None]
+    sin = sin[None, None]
+    cos, sin = cos.to(x.device), sin.to(x.device)
+    x_real, x_imag = x.reshape(*x.shape[:-1], -1, 2).unbind(-1)  # [B, S, H, D//2]
+    x_rotated = torch.stack([-x_imag, x_real], dim=-1).flatten(3)
+    out = (x.float() * cos + x_rotated.float() * sin).to(x.dtype)
+    return out

MTV/nlf.py ADDED Viewed

File without changes

MTV/nodes.py ADDED Viewed

	@@ -0,0 +1,242 @@

+import os
+import torch
+import gc
+from ..utils import log, dict_to_device
+import numpy as np
+from accelerate import init_empty_weights
+from accelerate.utils import set_module_tensor_to_device
+import comfy.model_management as mm
+from comfy.utils import load_torch_file
+import folder_paths
+script_directory = os.path.dirname(os.path.abspath(__file__))
+device = mm.get_torch_device()
+offload_device = mm.unet_offload_device()
+local_model_path = os.path.join(folder_paths.models_dir, "nlf", "nlf_l_multi_0.3.2.torchscript")
+from .motion4d import SMPL_VQVAE, VectorQuantizer, Encoder, Decoder
+from .mtv import prepare_motion_embeddings
+class DownloadAndLoadNLFModel:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {
+            "required": {
+                "url": (
+                    [
+                    "https://github.com/isarandi/nlf/releases/download/v0.3.2/nlf_l_multi_0.3.2.torchscript"
+                    ],
+                )
+             },
+        }
+    RETURN_TYPES = ("NLFMODEL",)
+    RETURN_NAMES = ("nlf_model", )
+    FUNCTION = "loadmodel"
+    CATEGORY = "WanVideoWrapper"
+    def loadmodel(self, url):
+        if not os.path.exists(local_model_path):
+            log.info(f"Downloading NLF model to: {local_model_path}")
+            import requests
+            os.makedirs(os.path.dirname(local_model_path), exist_ok=True)
+            response = requests.get(url)
+            if response.status_code == 200:
+                with open(local_model_path, "wb") as f:
+                    f.write(response.content)
+            else:
+                print("Failed to download file:", response.status_code)
+        model = torch.jit.load(local_model_path).eval()
+        return (model,)
+class LoadNLFModel:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {
+            "required": {
+                "path": ("STRING", {"default": local_model_path}),
+            },
+        }
+    RETURN_TYPES = ("NLFMODEL",)
+    RETURN_NAMES = ("nlf_model", )
+    FUNCTION = "loadmodel"
+    CATEGORY = "WanVideoWrapper"
+    def loadmodel(self, path):
+        model = torch.jit.load(path).eval()
+        return model,
+class LoadVQVAE:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {
+            "required": {
+                "model_name": (folder_paths.get_filename_list("vae"), {"tooltip": "These models are loaded from 'ComfyUI/models/vae'"}),
+            },
+        }
+    RETURN_TYPES = ("VQVAE",)
+    RETURN_NAMES = ("vqvae", )
+    FUNCTION = "loadmodel"
+    CATEGORY = "WanVideoWrapper"
+    def loadmodel(self, model_name):
+        model_path = folder_paths.get_full_path("vae", model_name)
+        vae_sd = load_torch_file(model_path, safe_load=True)
+        # Get motion tokenizer
+        motion_encoder = Encoder(
+            in_channels=3,
+            mid_channels=[128, 512],
+            out_channels=3072,
+            downsample_time=[2, 2],
+            downsample_joint=[1, 1]
+        )
+        motion_quant = VectorQuantizer(nb_code=8192, code_dim=3072)
+        motion_decoder = Decoder(
+            in_channels=3072,
+            mid_channels=[512, 128],
+            out_channels=3,
+            upsample_rate=2.0,
+            frame_upsample_rate=[2.0, 2.0],
+            joint_upsample_rate=[1.0, 1.0]
+        )
+        vqvae = SMPL_VQVAE(motion_encoder, motion_decoder, motion_quant).to(device)
+        vqvae.load_state_dict(vae_sd, strict=True)
+        return vqvae,
+class MTVCrafterEncodePoses:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {
+            "required": {
+                "vqvae": ("VQVAE", {"tooltip": "VQVAE model"}),
+                "poses": ("NLFPRED", {"tooltip": "Input poses for the model"}),
+            },
+        }
+    RETURN_TYPES = ("MTVCRAFTERMOTION", "NLFPRED")
+    RETURN_NAMES = ("mtvcrafter_motion", "pose_results")
+    FUNCTION = "encode"
+    CATEGORY = "WanVideoWrapper"
+    def encode(self, vqvae, poses):
+        # import pickle
+        # with open(os.path.join(script_directory, "data", "sampled_data.pkl"), 'rb') as f:
+        #     data_list = pickle.load(f)
+        # if not isinstance(data_list, list):
+        #     data_list = [data_list]
+        # print(data_list)
+        # smpl_poses = data_list[1]['pose']
+        global_mean = np.load(os.path.join(script_directory, "data", "mean.npy")) #global_mean.shape: (24, 3)
+        global_std = np.load(os.path.join(script_directory, "data", "std.npy"))
+        smpl_poses = []
+        for pose in poses['joints3d_nonparam'][0]:
+            smpl_poses.append(pose[0].cpu().numpy())
+        smpl_poses = np.array(smpl_poses)
+        norm_poses = torch.tensor((smpl_poses - global_mean) / global_std).unsqueeze(0)
+        print(f"norm_poses shape: {norm_poses.shape}, dtype: {norm_poses.dtype}")
+        vqvae.to(device)
+        motion_tokens, vq_loss = vqvae(norm_poses.to(device), return_vq=True)
+        recon_motion = vqvae(norm_poses.to(device))[0][0].to(dtype=torch.float32).cpu().detach() * global_std + global_mean
+        vqvae.to(offload_device)
+        poses_dict = {
+            'mtv_motion_tokens': motion_tokens,
+            'global_mean': global_mean,
+            'global_std': global_std
+        }
+        return poses_dict, recon_motion
+class NLFPredict:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {
+            "model": ("NLFMODEL",),
+            "images": ("IMAGE", {"tooltip": "Input images for the model"}),
+            },
+        }
+    RETURN_TYPES = ("NLFPRED", )
+    RETURN_NAMES = ("pose_results",)
+    FUNCTION = "predict"
+    CATEGORY = "WanVideoWrapper"
+    def predict(self, model, images):
+        model.to(device)
+        pred = model.detect_smpl_batched(images.permute(0, 3, 1, 2).to(device))
+        model.to(offload_device)
+        pred = dict_to_device(pred, offload_device)
+        pose_results = {
+            'joints3d_nonparam': [],
+        }
+        # Collect pose data
+        for key in pose_results.keys():
+            if key in pred:
+                pose_results[key].append(pred[key])
+            else:
+                pose_results[key].append(None)
+        return (pose_results,)
+class DrawNLFPoses:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {
+            "poses": ("NLFPRED", {"tooltip": "Input poses for the model"}),
+            "width": ("INT", {"default": 512}),
+            "height": ("INT", {"default": 512}),
+            },
+        }
+    RETURN_TYPES = ("IMAGE", )
+    RETURN_NAMES = ("image",)
+    FUNCTION = "predict"
+    CATEGORY = "WanVideoWrapper"
+    def predict(self, poses, width, height):
+        from .draw_pose import get_control_conditions
+        print(type(poses))
+        if isinstance(poses, dict):
+            pose_input = poses['joints3d_nonparam'][0] if 'joints3d_nonparam' in poses else poses
+        else:
+            pose_input = poses
+        control_conditions = get_control_conditions(pose_input, height, width)
+        return (control_conditions,)
+NODE_CLASS_MAPPINGS = {
+    "DownloadAndLoadNLFModel": DownloadAndLoadNLFModel,
+    "NLFPredict": NLFPredict,
+    "DrawNLFPoses": DrawNLFPoses,
+    "LoadVQVAE": LoadVQVAE,
+    "MTVCrafterEncodePoses": MTVCrafterEncodePoses
+    }
+NODE_DISPLAY_NAME_MAPPINGS = {
+    "DownloadAndLoadNLFModel": "(Download)Load NLF Model",
+    "NLFPredict": "NLF Predict",
+    "DrawNLFPoses": "Draw NLF Poses",
+    "LoadVQVAE": "Load VQVAE",
+    "MTVCrafterEncodePoses": "MTV Crafter Encode Poses"
+}

__init__.py ADDED Viewed

	@@ -0,0 +1,113 @@

+try:
+    from .utils import check_duplicate_nodes, log
+    duplicate_dirs = check_duplicate_nodes()
+    if duplicate_dirs:
+        warning_msg = f"WARNING:  Found {len(duplicate_dirs)} other WanVideoWrapper directories:\n"
+        for dir_path in duplicate_dirs:
+            warning_msg += f"  - {dir_path}\n"
+        log.warning(warning_msg + "Please remove duplicates to avoid possible conflicts.")
+except:
+    pass
+from .nodes import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS
+from .recammaster.nodes import NODE_CLASS_MAPPINGS as RECAM_MASTER_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as RECAM_MASTER_NODE_DISPLAY_NAME_MAPPINGS
+from .skyreels.nodes import NODE_CLASS_MAPPINGS as SKYREELS_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as SKYREELS_NODE_DISPLAY_NAME_MAPPINGS
+from .fantasytalking.nodes import NODE_CLASS_MAPPINGS as FANTASYTALKING_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as FANTASYTALKING_NODE_DISPLAY_NAME_MAPPINGS
+from .nodes_sampler import NODE_CLASS_MAPPINGS as SAMPLER_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as SAMPLER_NODE_DISPLAY_NAME_MAPPINGS
+from .fun_camera.nodes import NODE_CLASS_MAPPINGS as FUN_CAMERA_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as FUN_CAMERA_NODE_DISPLAY_NAME_MAPPINGS
+from .uni3c.nodes import NODE_CLASS_MAPPINGS as UNI3C_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as UNI3C_NODE_DISPLAY_NAME_MAPPINGS
+from .controlnet.nodes import NODE_CLASS_MAPPINGS as CONTROLNET_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as CONTROLNET_NODE_DISPLAY_NAME_MAPPINGS
+from .ATI.nodes import NODE_CLASS_MAPPINGS as ATI_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as ATI_NODE_DISPLAY_NAME_MAPPINGS
+from .multitalk.nodes import NODE_CLASS_MAPPINGS as MULTITALK_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as MULTITALK_NODE_DISPLAY_NAME_MAPPINGS
+from .nodes_model_loading import NODE_CLASS_MAPPINGS as MODEL_LOADING_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as MODEL_LOADING_NODE_DISPLAY_NAME_MAPPINGS
+from .nodes_utility import NODE_CLASS_MAPPINGS as UTILITY_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as UTILITY_NODE_DISPLAY_NAME_MAPPINGS
+from .cache_methods.nodes_cache import NODE_CLASS_MAPPINGS as NODE_CACHE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as NODE_CACHE_DISPLAY_NAME_MAPPINGS
+from .nodes_deprecated import NODE_CLASS_MAPPINGS as DEPRECATED_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as DEPRECATED_NODE_DISPLAY_NAME_MAPPINGS
+from .s2v.nodes import NODE_CLASS_MAPPINGS as S2V_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as S2V_NODE_DISPLAY_NAME_MAPPINGS
+try:
+    from .qwen.qwen import NODE_CLASS_MAPPINGS as QWEN_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as QWEN_NODE_DISPLAY_NAME_MAPPINGS
+except Exception as e:
+    log.warning(f"WanVideoWrapper WARNING: Qwen nodes not available due to error in importing them: {e}")
+    QWEN_NODE_CLASS_MAPPINGS = {}
+    QWEN_NODE_DISPLAY_NAME_MAPPINGS = {}
+try:
+    from .fantasyportrait.nodes import NODE_CLASS_MAPPINGS as FANTASYPORTRAIT_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as FANTASYPORTRAIT_NODE_DISPLAY_NAME_MAPPINGS
+except Exception as e:
+    log.warning(f"WanVideoWrapper WARNING: FantasyPortrait nodes not available due to error in importing them: {e}")
+    FANTASYPORTRAIT_NODE_CLASS_MAPPINGS = {}
+    FANTASYPORTRAIT_NODE_DISPLAY_NAME_MAPPINGS = {}
+try:
+    from .unianimate.nodes import NODE_CLASS_MAPPINGS as UNIANIMATE_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as UNIANIMATE_NODE_DISPLAY_NAME_MAPPINGS
+except Exception as e:
+    log.warning(f"WanVideoWrapper WARNING: UniAnimate nodes not available due to error in importing them: {e}")
+    UNIANIMATE_NODE_CLASS_MAPPINGS = {}
+    UNIANIMATE_NODE_DISPLAY_NAME_MAPPINGS = {}
+try:
+    from .MTV.nodes import NODE_CLASS_MAPPINGS as MTV_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as MTV_NODE_DISPLAY_NAME_MAPPINGS
+except Exception as e:
+    log.warning(f"WanVideoWrapper WARNING: MTV nodes not available due to error in importing them: {e}")
+    MTV_NODE_CLASS_MAPPINGS = {}
+    MTV_NODE_DISPLAY_NAME_MAPPINGS = {}
+try:
+    from .HuMo.nodes import NODE_CLASS_MAPPINGS as HUMO_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as HUMO_NODE_DISPLAY_NAME_MAPPINGS
+except Exception as e:
+    log.warning(f"WanVideoWrapper WARNING: HuMo nodes not available due to error in importing them: {e}")
+    HUMO_NODE_CLASS_MAPPINGS = {}
+    HUMO_NODE_DISPLAY_NAME_MAPPINGS = {}
+try:
+    from .lynx.nodes import NODE_CLASS_MAPPINGS as LYNX_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as LYNX_NODE_DISPLAY_NAME_MAPPINGS
+except Exception as e:
+    log.warning(f"WanVideoWrapper WARNING: Lynx nodes not available due to error in importing them: {e}")
+    LYNX_NODE_CLASS_MAPPINGS = {}
+    LYNX_NODE_DISPLAY_NAME_MAPPINGS = {}
+NODE_CLASS_MAPPINGS.update(RECAM_MASTER_NODE_CLASS_MAPPINGS)
+NODE_CLASS_MAPPINGS.update(UNIANIMATE_NODE_CLASS_MAPPINGS)
+NODE_CLASS_MAPPINGS.update(SKYREELS_NODE_CLASS_MAPPINGS)
+NODE_CLASS_MAPPINGS.update(FANTASYTALKING_NODE_CLASS_MAPPINGS)
+NODE_CLASS_MAPPINGS.update(FANTASYPORTRAIT_NODE_CLASS_MAPPINGS)
+NODE_CLASS_MAPPINGS.update(FUN_CAMERA_NODE_CLASS_MAPPINGS)
+NODE_CLASS_MAPPINGS.update(UNI3C_NODE_CLASS_MAPPINGS)
+NODE_CLASS_MAPPINGS.update(CONTROLNET_NODE_CLASS_MAPPINGS)
+NODE_CLASS_MAPPINGS.update(ATI_NODE_CLASS_MAPPINGS)
+NODE_CLASS_MAPPINGS.update(MULTITALK_NODE_CLASS_MAPPINGS)
+NODE_CLASS_MAPPINGS.update(MODEL_LOADING_NODE_CLASS_MAPPINGS)
+NODE_CLASS_MAPPINGS.update(UTILITY_NODE_CLASS_MAPPINGS)
+NODE_CLASS_MAPPINGS.update(NODE_CACHE_CLASS_MAPPINGS)
+NODE_CLASS_MAPPINGS.update(DEPRECATED_NODE_CLASS_MAPPINGS)
+NODE_CLASS_MAPPINGS.update(QWEN_NODE_CLASS_MAPPINGS)
+NODE_CLASS_MAPPINGS.update(MTV_NODE_CLASS_MAPPINGS)
+NODE_CLASS_MAPPINGS.update(S2V_NODE_CLASS_MAPPINGS)
+NODE_CLASS_MAPPINGS.update(HUMO_NODE_CLASS_MAPPINGS)
+NODE_CLASS_MAPPINGS.update(SAMPLER_NODE_CLASS_MAPPINGS)
+NODE_CLASS_MAPPINGS.update(LYNX_NODE_CLASS_MAPPINGS)
+NODE_DISPLAY_NAME_MAPPINGS.update(RECAM_MASTER_NODE_DISPLAY_NAME_MAPPINGS)
+NODE_DISPLAY_NAME_MAPPINGS.update(UNIANIMATE_NODE_DISPLAY_NAME_MAPPINGS)
+NODE_DISPLAY_NAME_MAPPINGS.update(SKYREELS_NODE_DISPLAY_NAME_MAPPINGS)
+NODE_DISPLAY_NAME_MAPPINGS.update(FANTASYTALKING_NODE_DISPLAY_NAME_MAPPINGS)
+NODE_DISPLAY_NAME_MAPPINGS.update(FANTASYPORTRAIT_NODE_DISPLAY_NAME_MAPPINGS)
+NODE_DISPLAY_NAME_MAPPINGS.update(FUN_CAMERA_NODE_DISPLAY_NAME_MAPPINGS)
+NODE_DISPLAY_NAME_MAPPINGS.update(UNI3C_NODE_DISPLAY_NAME_MAPPINGS)
+NODE_DISPLAY_NAME_MAPPINGS.update(CONTROLNET_NODE_DISPLAY_NAME_MAPPINGS)
+NODE_DISPLAY_NAME_MAPPINGS.update(ATI_NODE_DISPLAY_NAME_MAPPINGS)
+NODE_DISPLAY_NAME_MAPPINGS.update(MULTITALK_NODE_DISPLAY_NAME_MAPPINGS)
+NODE_DISPLAY_NAME_MAPPINGS.update(MODEL_LOADING_NODE_DISPLAY_NAME_MAPPINGS)
+NODE_DISPLAY_NAME_MAPPINGS.update(UTILITY_NODE_DISPLAY_NAME_MAPPINGS)
+NODE_DISPLAY_NAME_MAPPINGS.update(NODE_CACHE_DISPLAY_NAME_MAPPINGS)
+NODE_DISPLAY_NAME_MAPPINGS.update(DEPRECATED_NODE_DISPLAY_NAME_MAPPINGS)
+NODE_DISPLAY_NAME_MAPPINGS.update(QWEN_NODE_DISPLAY_NAME_MAPPINGS)
+NODE_DISPLAY_NAME_MAPPINGS.update(MTV_NODE_DISPLAY_NAME_MAPPINGS)
+NODE_DISPLAY_NAME_MAPPINGS.update(S2V_NODE_DISPLAY_NAME_MAPPINGS)
+NODE_DISPLAY_NAME_MAPPINGS.update(HUMO_NODE_DISPLAY_NAME_MAPPINGS)
+NODE_DISPLAY_NAME_MAPPINGS.update(SAMPLER_NODE_DISPLAY_NAME_MAPPINGS)
+NODE_DISPLAY_NAME_MAPPINGS.update(LYNX_NODE_DISPLAY_NAME_MAPPINGS)
+__all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]

cache_methods/cache_methods.py ADDED Viewed

	@@ -0,0 +1,158 @@

+from ..utils import log
+import torch
+def set_transformer_cache_method(transformer, timesteps, cache_args=None):
+    transformer.cache_device = cache_args["cache_device"]
+    if cache_args["cache_type"] == "TeaCache":
+        log.info(f"TeaCache: Using cache device: {transformer.cache_device}")
+        transformer.teacache_state.clear_all()
+        transformer.enable_teacache = True
+        transformer.rel_l1_thresh = cache_args["rel_l1_thresh"]
+        transformer.teacache_start_step = cache_args["start_step"]
+        transformer.teacache_end_step = len(timesteps)-1 if cache_args["end_step"] == -1 else cache_args["end_step"]
+        transformer.teacache_use_coefficients = cache_args["use_coefficients"]
+        transformer.teacache_mode = cache_args["mode"]
+    elif cache_args["cache_type"] == "MagCache":
+        log.info(f"MagCache: Using cache device: {transformer.cache_device}")
+        transformer.magcache_state.clear_all()
+        transformer.enable_magcache = True
+        transformer.magcache_start_step = cache_args["start_step"]
+        transformer.magcache_end_step = len(timesteps)-1 if cache_args["end_step"] == -1 else cache_args["end_step"]
+        transformer.magcache_thresh = cache_args["magcache_thresh"]
+        transformer.magcache_K = cache_args["magcache_K"]
+    elif cache_args["cache_type"] == "EasyCache":
+        log.info(f"EasyCache: Using cache device: {transformer.cache_device}")
+        transformer.easycache_state.clear_all()
+        transformer.enable_easycache = True
+        transformer.easycache_start_step = cache_args["start_step"]
+        transformer.easycache_end_step = len(timesteps)-1 if cache_args["end_step"] == -1 else cache_args["end_step"]
+        transformer.easycache_thresh = cache_args["easycache_thresh"]
+    return transformer
+class TeaCacheState:
+    def __init__(self, cache_device='cpu'):
+        self.cache_device = cache_device
+        self.states = {}
+        self._next_pred_id = 0
+    def new_prediction(self, cache_device='cpu'):
+        """Create new prediction state and return its ID"""
+        self.cache_device = cache_device
+        pred_id = self._next_pred_id
+        self._next_pred_id += 1
+        self.states[pred_id] = {
+            'previous_residual': None,
+            'accumulated_rel_l1_distance': 0,
+            'previous_modulated_input': None,
+            'skipped_steps': [],
+        }
+        return pred_id
+    def update(self, pred_id, **kwargs):
+        """Update state for specific prediction"""
+        if pred_id not in self.states:
+            return None
+        for key, value in kwargs.items():
+            self.states[pred_id][key] = value
+    def get(self, pred_id):
+        return self.states.get(pred_id, {})
+    def clear_all(self):
+        self.states = {}
+        self._next_pred_id = 0
+class MagCacheState:
+    def __init__(self, cache_device='cpu'):
+        self.cache_device = cache_device
+        self.states = {}
+        self._next_pred_id = 0
+    def new_prediction(self, cache_device='cpu'):
+        """Create new prediction state and return its ID"""
+        self.cache_device = cache_device
+        pred_id = self._next_pred_id
+        self._next_pred_id += 1
+        self.states[pred_id] = {
+            'residual_cache': None,
+            'accumulated_ratio': 1.0,
+            'accumulated_steps': 0,
+            'accumulated_err': 0,
+            'skipped_steps': [],
+        }
+        return pred_id
+    def update(self, pred_id, **kwargs):
+        """Update state for specific prediction"""
+        if pred_id not in self.states:
+            return None
+        for key, value in kwargs.items():
+            self.states[pred_id][key] = value
+    def get(self, pred_id):
+        return self.states.get(pred_id, {})
+    def clear_all(self):
+        self.states = {}
+        self._next_pred_id = 0
+class EasyCacheState:
+    def __init__(self, cache_device='cpu'):
+        self.cache_device = cache_device
+        self.states = {}
+        self._next_pred_id = 0
+    def new_prediction(self, cache_device='cpu'):
+        """Create a new prediction state and return its ID."""
+        self.cache_device = cache_device
+        pred_id = self._next_pred_id
+        self._next_pred_id += 1
+        self.states[pred_id] = {
+            'previous_raw_input': None,
+            'previous_raw_output': None,
+            'cache': None,
+            'accumulated_error': 0.0,
+            'skipped_steps': [],
+        }
+        return pred_id
+    def update(self, pred_id, **kwargs):
+        """Update state for a specific prediction."""
+        if pred_id not in self.states:
+            return None
+        for key, value in kwargs.items():
+            self.states[pred_id][key] = value
+    def get(self, pred_id):
+        return self.states.get(pred_id, {})
+    def clear_all(self):
+        self.states = {}
+        self._next_pred_id = 0
+def relative_l1_distance(last_tensor, current_tensor):
+    l1_distance = torch.abs(last_tensor.to(current_tensor.device) - current_tensor).mean()
+    norm = torch.abs(last_tensor).mean()
+    relative_l1_distance = l1_distance / norm
+    return relative_l1_distance.to(torch.float32).to(current_tensor.device)
+def cache_report(transformer, cache_args):
+    cache_type = cache_args["cache_type"]
+    states = (
+        transformer.teacache_state.states if cache_type == "TeaCache" else
+        transformer.magcache_state.states if cache_type == "MagCache" else
+        transformer.easycache_state.states if cache_type == "EasyCache" else
+        None
+    )
+    state_names = {
+        0: "conditional",
+        1: "unconditional"
+    }
+    for pred_id, state in states.items():
+        name = state_names.get(pred_id, f"prediction_{pred_id}")
+        if 'skipped_steps' in state:
+            log.info(f"{cache_type} skipped: {len(state['skipped_steps'])} {name} steps: {state['skipped_steps']}")
+    transformer.teacache_state.clear_all()
+    transformer.magcache_state.clear_all()
+    transformer.easycache_state.clear_all()
+    del states

cache_methods/nodes_cache.py ADDED Viewed

	@@ -0,0 +1,140 @@

+from comfy import model_management as mm
+class WanVideoTeaCache:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {
+            "required": {
+                "rel_l1_thresh": ("FLOAT", {"default": 0.3, "min": 0.0, "max": 1.0, "step": 0.001,
+                                            "tooltip": "Higher values will make TeaCache more aggressive, faster, but may cause artifacts. Good value range for 1.3B: 0.05 - 0.08, for other models 0.15-0.30"}),
+                "start_step": ("INT", {"default": 1, "min": 0, "max": 9999, "step": 1, "tooltip": "Start percentage of the steps to apply TeaCache"}),
+                "end_step": ("INT", {"default": -1, "min": -1, "max": 9999, "step": 1, "tooltip": "End steps to apply TeaCache"}),
+                "cache_device": (["main_device", "offload_device"], {"default": "offload_device", "tooltip": "Device to cache to"}),
+                "use_coefficients": ("BOOLEAN", {"default": True, "tooltip": "Use calculated coefficients for more accuracy. When enabled therel_l1_thresh should be about 10 times higher than without"}),
+            },
+            "optional": {
+                "mode": (["e", "e0"], {"default": "e", "tooltip": "Choice between using e (time embeds, default) or e0 (modulated time embeds)"}),
+            },
+        }
+    RETURN_TYPES = ("CACHEARGS",)
+    RETURN_NAMES = ("cache_args",)
+    FUNCTION = "process"
+    CATEGORY = "WanVideoWrapper"
+    DESCRIPTION = """
+Patch WanVideo model to use TeaCache. Speeds up inference by caching the output and
+applying it instead of doing the step.  Best results are achieved by choosing the
+appropriate coefficients for the model. Early steps should never be skipped, with too
+aggressive values this can happen and the motion suffers. Starting later can help with that too.
+When NOT using coefficients, the threshold value should be
+about 10 times smaller than the value used with coefficients.
+Official recommended values https://github.com/ali-vilab/TeaCache/tree/main/TeaCache4Wan2.1:
+<pre style='font-family:monospace'>
++-------------------+--------+---------+--------+
+|       Model       |  Low   | Medium  |  High  |
++-------------------+--------+---------+--------+
+| Wan2.1 t2v 1.3B  |  0.05  |  0.07   |  0.08  |
+| Wan2.1 t2v 14B   |  0.14  |  0.15   |  0.20  |
+| Wan2.1 i2v 480P  |  0.13  |  0.19   |  0.26  |
+| Wan2.1 i2v 720P  |  0.18  |  0.20   |  0.30  |
++-------------------+--------+---------+--------+
+</pre>
+"""
+    def process(self, rel_l1_thresh, start_step, end_step, cache_device, use_coefficients, mode="e"):
+        if cache_device == "main_device":
+            cache_device = mm.get_torch_device()
+        else:
+            cache_device = mm.unet_offload_device()
+        cache_args = {
+            "cache_type": "TeaCache",
+            "rel_l1_thresh": rel_l1_thresh,
+            "start_step": start_step,
+            "end_step": end_step,
+            "cache_device": cache_device,
+            "use_coefficients": use_coefficients,
+            "mode": mode,
+        }
+        return (cache_args,)
+class WanVideoMagCache:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {
+            "required": {
+                "magcache_thresh": ("FLOAT", {"default": 0.02, "min": 0.0, "max": 0.3, "step": 0.001, "tooltip": "How strongly to cache the output of diffusion model. This value must be non-negative."}),
+                "magcache_K": ("INT", {"default": 4, "min": 0, "max": 6, "step": 1, "tooltip": "The maxium skip steps of MagCache."}),
+                "start_step": ("INT", {"default": 1, "min": 0, "max": 9999, "step": 1, "tooltip": "Step to start applying MagCache"}),
+                "end_step": ("INT", {"default": -1, "min": -1, "max": 9999, "step": 1, "tooltip": "Step to end applying MagCache"}),
+                "cache_device": (["main_device", "offload_device"], {"default": "offload_device", "tooltip": "Device to cache to"}),
+            },
+        }
+    RETURN_TYPES = ("CACHEARGS",)
+    RETURN_NAMES = ("cache_args",)
+    FUNCTION = "setargs"
+    CATEGORY = "WanVideoWrapper"
+    EXPERIMENTAL = True
+    DESCRIPTION = "MagCache for WanVideoWrapper, source https://github.com/Zehong-Ma/MagCache"
+    def setargs(self, magcache_thresh, magcache_K, start_step, end_step, cache_device):
+        if cache_device == "main_device":
+            cache_device = mm.get_torch_device()
+        else:
+            cache_device = mm.unet_offload_device()
+        cache_args = {
+            "cache_type": "MagCache",
+            "magcache_thresh": magcache_thresh,
+            "magcache_K": magcache_K,
+            "start_step": start_step,
+            "end_step": end_step,
+            "cache_device": cache_device,
+        }
+        return (cache_args,)
+class WanVideoEasyCache:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {
+            "required": {
+                "easycache_thresh": ("FLOAT", {"default": 0.015, "min": 0.0, "max": 1.0, "step": 0.001, "tooltip": "How strongly to cache the output of diffusion model. This value must be non-negative."}),
+                "start_step": ("INT", {"default": 10, "min": 0, "max": 9999, "step": 1, "tooltip": "Step to start applying EasyCache"}),
+                "end_step": ("INT", {"default": -1, "min": -1, "max": 9999, "step": 1, "tooltip": "Step to end applying EasyCache"}),
+                "cache_device": (["main_device", "offload_device"], {"default": "offload_device", "tooltip": "Device to cache to"}),
+            },
+        }
+    RETURN_TYPES = ("CACHEARGS",)
+    RETURN_NAMES = ("cache_args",)
+    FUNCTION = "setargs"
+    CATEGORY = "WanVideoWrapper"
+    EXPERIMENTAL = True
+    DESCRIPTION = "EasyCache for WanVideoWrapper, source https://github.com/H-EmbodVis/EasyCache"
+    def setargs(self, easycache_thresh, start_step, end_step, cache_device):
+        if cache_device == "main_device":
+            cache_device = mm.get_torch_device()
+        else:
+            cache_device = mm.unet_offload_device()
+        cache_args = {
+            "cache_type": "EasyCache",
+            "easycache_thresh": easycache_thresh,
+            "start_step": start_step,
+            "end_step": end_step,
+            "cache_device": cache_device,
+        }
+        return (cache_args,)
+NODE_CLASS_MAPPINGS = {
+    "WanVideoTeaCache": WanVideoTeaCache,
+    "WanVideoMagCache": WanVideoMagCache,
+    "WanVideoEasyCache": WanVideoEasyCache,
+    }
+NODE_DISPLAY_NAME_MAPPINGS = {
+    "WanVideoTeaCache": "WanVideo TeaCache",
+    "WanVideoMagCache": "WanVideo MagCache",
+    "WanVideoEasyCache": "WanVideo EasyCache"
+    }

configs/T5_tokenizer/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,308 @@

+{
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>",
+    "<extra_id_100>",
+    "<extra_id_101>",
+    "<extra_id_102>",
+    "<extra_id_103>",
+    "<extra_id_104>",
+    "<extra_id_105>",
+    "<extra_id_106>",
+    "<extra_id_107>",
+    "<extra_id_108>",
+    "<extra_id_109>",
+    "<extra_id_110>",
+    "<extra_id_111>",
+    "<extra_id_112>",
+    "<extra_id_113>",
+    "<extra_id_114>",
+    "<extra_id_115>",
+    "<extra_id_116>",
+    "<extra_id_117>",
+    "<extra_id_118>",
+    "<extra_id_119>",
+    "<extra_id_120>",
+    "<extra_id_121>",
+    "<extra_id_122>",
+    "<extra_id_123>",
+    "<extra_id_124>",
+    "<extra_id_125>",
+    "<extra_id_126>",
+    "<extra_id_127>",
+    "<extra_id_128>",
+    "<extra_id_129>",
+    "<extra_id_130>",
+    "<extra_id_131>",
+    "<extra_id_132>",
+    "<extra_id_133>",
+    "<extra_id_134>",
+    "<extra_id_135>",
+    "<extra_id_136>",
+    "<extra_id_137>",
+    "<extra_id_138>",
+    "<extra_id_139>",
+    "<extra_id_140>",
+    "<extra_id_141>",
+    "<extra_id_142>",
+    "<extra_id_143>",
+    "<extra_id_144>",
+    "<extra_id_145>",
+    "<extra_id_146>",
+    "<extra_id_147>",
+    "<extra_id_148>",
+    "<extra_id_149>",
+    "<extra_id_150>",
+    "<extra_id_151>",
+    "<extra_id_152>",
+    "<extra_id_153>",
+    "<extra_id_154>",
+    "<extra_id_155>",
+    "<extra_id_156>",
+    "<extra_id_157>",
+    "<extra_id_158>",
+    "<extra_id_159>",
+    "<extra_id_160>",
+    "<extra_id_161>",
+    "<extra_id_162>",
+    "<extra_id_163>",
+    "<extra_id_164>",
+    "<extra_id_165>",
+    "<extra_id_166>",
+    "<extra_id_167>",
+    "<extra_id_168>",
+    "<extra_id_169>",
+    "<extra_id_170>",
+    "<extra_id_171>",
+    "<extra_id_172>",
+    "<extra_id_173>",
+    "<extra_id_174>",
+    "<extra_id_175>",
+    "<extra_id_176>",
+    "<extra_id_177>",
+    "<extra_id_178>",
+    "<extra_id_179>",
+    "<extra_id_180>",
+    "<extra_id_181>",
+    "<extra_id_182>",
+    "<extra_id_183>",
+    "<extra_id_184>",
+    "<extra_id_185>",
+    "<extra_id_186>",
+    "<extra_id_187>",
+    "<extra_id_188>",
+    "<extra_id_189>",
+    "<extra_id_190>",
+    "<extra_id_191>",
+    "<extra_id_192>",
+    "<extra_id_193>",
+    "<extra_id_194>",
+    "<extra_id_195>",
+    "<extra_id_196>",
+    "<extra_id_197>",
+    "<extra_id_198>",
+    "<extra_id_199>",
+    "<extra_id_200>",
+    "<extra_id_201>",
+    "<extra_id_202>",
+    "<extra_id_203>",
+    "<extra_id_204>",
+    "<extra_id_205>",
+    "<extra_id_206>",
+    "<extra_id_207>",
+    "<extra_id_208>",
+    "<extra_id_209>",
+    "<extra_id_210>",
+    "<extra_id_211>",
+    "<extra_id_212>",
+    "<extra_id_213>",
+    "<extra_id_214>",
+    "<extra_id_215>",
+    "<extra_id_216>",
+    "<extra_id_217>",
+    "<extra_id_218>",
+    "<extra_id_219>",
+    "<extra_id_220>",
+    "<extra_id_221>",
+    "<extra_id_222>",
+    "<extra_id_223>",
+    "<extra_id_224>",
+    "<extra_id_225>",
+    "<extra_id_226>",
+    "<extra_id_227>",
+    "<extra_id_228>",
+    "<extra_id_229>",
+    "<extra_id_230>",
+    "<extra_id_231>",
+    "<extra_id_232>",
+    "<extra_id_233>",
+    "<extra_id_234>",
+    "<extra_id_235>",
+    "<extra_id_236>",
+    "<extra_id_237>",
+    "<extra_id_238>",
+    "<extra_id_239>",
+    "<extra_id_240>",
+    "<extra_id_241>",
+    "<extra_id_242>",
+    "<extra_id_243>",
+    "<extra_id_244>",
+    "<extra_id_245>",
+    "<extra_id_246>",
+    "<extra_id_247>",
+    "<extra_id_248>",
+    "<extra_id_249>",
+    "<extra_id_250>",
+    "<extra_id_251>",
+    "<extra_id_252>",
+    "<extra_id_253>",
+    "<extra_id_254>",
+    "<extra_id_255>",
+    "<extra_id_256>",
+    "<extra_id_257>",
+    "<extra_id_258>",
+    "<extra_id_259>",
+    "<extra_id_260>",
+    "<extra_id_261>",
+    "<extra_id_262>",
+    "<extra_id_263>",
+    "<extra_id_264>",
+    "<extra_id_265>",
+    "<extra_id_266>",
+    "<extra_id_267>",
+    "<extra_id_268>",
+    "<extra_id_269>",
+    "<extra_id_270>",
+    "<extra_id_271>",
+    "<extra_id_272>",
+    "<extra_id_273>",
+    "<extra_id_274>",
+    "<extra_id_275>",
+    "<extra_id_276>",
+    "<extra_id_277>",
+    "<extra_id_278>",
+    "<extra_id_279>",
+    "<extra_id_280>",
+    "<extra_id_281>",
+    "<extra_id_282>",
+    "<extra_id_283>",
+    "<extra_id_284>",
+    "<extra_id_285>",
+    "<extra_id_286>",
+    "<extra_id_287>",
+    "<extra_id_288>",
+    "<extra_id_289>",
+    "<extra_id_290>",
+    "<extra_id_291>",
+    "<extra_id_292>",
+    "<extra_id_293>",
+    "<extra_id_294>",
+    "<extra_id_295>",
+    "<extra_id_296>",
+    "<extra_id_297>",
+    "<extra_id_298>",
+    "<extra_id_299>"
+  ],
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

configs/T5_tokenizer/spiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3909a67b780650b35cf529ac782ad2b6b26e6d1f849d3fbb6a872905f452458
+size 4548313

configs/T5_tokenizer/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6e197b4d3dbd71da14b4eb255f4fa91c9c1f2068b20a2de2472967ca3d22602b
+size 16837417

configs/T5_tokenizer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,2748 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256000": {
+      "content": "<extra_id_299>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256001": {
+      "content": "<extra_id_298>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256002": {
+      "content": "<extra_id_297>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256003": {
+      "content": "<extra_id_296>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256004": {
+      "content": "<extra_id_295>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256005": {
+      "content": "<extra_id_294>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256006": {
+      "content": "<extra_id_293>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256007": {
+      "content": "<extra_id_292>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256008": {
+      "content": "<extra_id_291>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256009": {
+      "content": "<extra_id_290>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256010": {
+      "content": "<extra_id_289>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256011": {
+      "content": "<extra_id_288>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256012": {
+      "content": "<extra_id_287>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256013": {
+      "content": "<extra_id_286>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256014": {
+      "content": "<extra_id_285>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256015": {
+      "content": "<extra_id_284>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256016": {
+      "content": "<extra_id_283>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256017": {
+      "content": "<extra_id_282>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256018": {
+      "content": "<extra_id_281>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256019": {
+      "content": "<extra_id_280>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256020": {
+      "content": "<extra_id_279>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256021": {
+      "content": "<extra_id_278>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256022": {
+      "content": "<extra_id_277>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256023": {
+      "content": "<extra_id_276>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256024": {
+      "content": "<extra_id_275>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256025": {
+      "content": "<extra_id_274>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256026": {
+      "content": "<extra_id_273>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256027": {
+      "content": "<extra_id_272>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256028": {
+      "content": "<extra_id_271>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256029": {
+      "content": "<extra_id_270>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256030": {
+      "content": "<extra_id_269>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256031": {
+      "content": "<extra_id_268>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256032": {
+      "content": "<extra_id_267>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256033": {
+      "content": "<extra_id_266>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256034": {
+      "content": "<extra_id_265>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256035": {
+      "content": "<extra_id_264>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256036": {
+      "content": "<extra_id_263>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256037": {
+      "content": "<extra_id_262>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256038": {
+      "content": "<extra_id_261>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256039": {
+      "content": "<extra_id_260>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256040": {
+      "content": "<extra_id_259>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256041": {
+      "content": "<extra_id_258>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256042": {
+      "content": "<extra_id_257>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256043": {
+      "content": "<extra_id_256>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256044": {
+      "content": "<extra_id_255>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256045": {
+      "content": "<extra_id_254>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256046": {
+      "content": "<extra_id_253>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256047": {
+      "content": "<extra_id_252>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256048": {
+      "content": "<extra_id_251>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256049": {
+      "content": "<extra_id_250>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256050": {
+      "content": "<extra_id_249>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256051": {
+      "content": "<extra_id_248>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256052": {
+      "content": "<extra_id_247>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256053": {
+      "content": "<extra_id_246>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256054": {
+      "content": "<extra_id_245>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256055": {
+      "content": "<extra_id_244>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256056": {
+      "content": "<extra_id_243>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256057": {
+      "content": "<extra_id_242>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256058": {
+      "content": "<extra_id_241>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256059": {
+      "content": "<extra_id_240>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256060": {
+      "content": "<extra_id_239>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256061": {
+      "content": "<extra_id_238>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256062": {
+      "content": "<extra_id_237>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256063": {
+      "content": "<extra_id_236>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256064": {
+      "content": "<extra_id_235>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256065": {
+      "content": "<extra_id_234>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256066": {
+      "content": "<extra_id_233>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256067": {
+      "content": "<extra_id_232>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256068": {
+      "content": "<extra_id_231>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256069": {
+      "content": "<extra_id_230>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256070": {
+      "content": "<extra_id_229>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256071": {
+      "content": "<extra_id_228>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256072": {
+      "content": "<extra_id_227>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256073": {
+      "content": "<extra_id_226>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256074": {
+      "content": "<extra_id_225>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256075": {
+      "content": "<extra_id_224>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256076": {
+      "content": "<extra_id_223>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256077": {
+      "content": "<extra_id_222>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256078": {
+      "content": "<extra_id_221>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256079": {
+      "content": "<extra_id_220>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256080": {
+      "content": "<extra_id_219>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256081": {
+      "content": "<extra_id_218>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256082": {
+      "content": "<extra_id_217>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256083": {
+      "content": "<extra_id_216>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256084": {
+      "content": "<extra_id_215>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256085": {
+      "content": "<extra_id_214>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256086": {
+      "content": "<extra_id_213>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256087": {
+      "content": "<extra_id_212>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256088": {
+      "content": "<extra_id_211>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256089": {
+      "content": "<extra_id_210>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256090": {
+      "content": "<extra_id_209>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256091": {
+      "content": "<extra_id_208>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256092": {
+      "content": "<extra_id_207>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256093": {
+      "content": "<extra_id_206>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256094": {
+      "content": "<extra_id_205>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256095": {
+      "content": "<extra_id_204>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256096": {
+      "content": "<extra_id_203>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256097": {
+      "content": "<extra_id_202>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256098": {
+      "content": "<extra_id_201>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256099": {
+      "content": "<extra_id_200>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256100": {
+      "content": "<extra_id_199>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256101": {
+      "content": "<extra_id_198>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256102": {
+      "content": "<extra_id_197>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256103": {
+      "content": "<extra_id_196>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256104": {
+      "content": "<extra_id_195>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256105": {
+      "content": "<extra_id_194>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256106": {
+      "content": "<extra_id_193>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256107": {
+      "content": "<extra_id_192>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256108": {
+      "content": "<extra_id_191>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256109": {
+      "content": "<extra_id_190>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256110": {
+      "content": "<extra_id_189>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256111": {
+      "content": "<extra_id_188>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256112": {
+      "content": "<extra_id_187>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256113": {
+      "content": "<extra_id_186>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256114": {
+      "content": "<extra_id_185>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256115": {
+      "content": "<extra_id_184>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256116": {
+      "content": "<extra_id_183>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256117": {
+      "content": "<extra_id_182>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256118": {
+      "content": "<extra_id_181>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256119": {
+      "content": "<extra_id_180>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256120": {
+      "content": "<extra_id_179>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256121": {
+      "content": "<extra_id_178>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256122": {
+      "content": "<extra_id_177>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256123": {
+      "content": "<extra_id_176>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256124": {
+      "content": "<extra_id_175>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256125": {
+      "content": "<extra_id_174>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256126": {
+      "content": "<extra_id_173>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256127": {
+      "content": "<extra_id_172>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256128": {
+      "content": "<extra_id_171>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256129": {
+      "content": "<extra_id_170>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256130": {
+      "content": "<extra_id_169>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256131": {
+      "content": "<extra_id_168>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256132": {
+      "content": "<extra_id_167>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256133": {
+      "content": "<extra_id_166>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256134": {
+      "content": "<extra_id_165>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256135": {
+      "content": "<extra_id_164>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256136": {
+      "content": "<extra_id_163>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256137": {
+      "content": "<extra_id_162>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256138": {
+      "content": "<extra_id_161>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256139": {
+      "content": "<extra_id_160>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256140": {
+      "content": "<extra_id_159>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256141": {
+      "content": "<extra_id_158>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256142": {
+      "content": "<extra_id_157>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256143": {
+      "content": "<extra_id_156>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256144": {
+      "content": "<extra_id_155>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256145": {
+      "content": "<extra_id_154>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256146": {
+      "content": "<extra_id_153>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256147": {
+      "content": "<extra_id_152>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256148": {
+      "content": "<extra_id_151>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256149": {
+      "content": "<extra_id_150>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256150": {
+      "content": "<extra_id_149>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256151": {
+      "content": "<extra_id_148>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256152": {
+      "content": "<extra_id_147>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256153": {
+      "content": "<extra_id_146>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256154": {
+      "content": "<extra_id_145>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256155": {
+      "content": "<extra_id_144>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256156": {
+      "content": "<extra_id_143>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256157": {
+      "content": "<extra_id_142>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256158": {
+      "content": "<extra_id_141>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256159": {
+      "content": "<extra_id_140>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256160": {
+      "content": "<extra_id_139>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256161": {
+      "content": "<extra_id_138>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256162": {
+      "content": "<extra_id_137>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256163": {
+      "content": "<extra_id_136>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256164": {
+      "content": "<extra_id_135>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256165": {
+      "content": "<extra_id_134>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256166": {
+      "content": "<extra_id_133>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256167": {
+      "content": "<extra_id_132>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256168": {
+      "content": "<extra_id_131>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256169": {
+      "content": "<extra_id_130>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256170": {
+      "content": "<extra_id_129>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256171": {
+      "content": "<extra_id_128>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256172": {
+      "content": "<extra_id_127>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256173": {
+      "content": "<extra_id_126>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256174": {
+      "content": "<extra_id_125>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256175": {
+      "content": "<extra_id_124>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256176": {
+      "content": "<extra_id_123>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256177": {
+      "content": "<extra_id_122>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256178": {
+      "content": "<extra_id_121>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256179": {
+      "content": "<extra_id_120>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256180": {
+      "content": "<extra_id_119>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256181": {
+      "content": "<extra_id_118>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256182": {
+      "content": "<extra_id_117>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256183": {
+      "content": "<extra_id_116>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256184": {
+      "content": "<extra_id_115>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256185": {
+      "content": "<extra_id_114>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256186": {
+      "content": "<extra_id_113>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256187": {
+      "content": "<extra_id_112>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256188": {
+      "content": "<extra_id_111>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256189": {
+      "content": "<extra_id_110>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256190": {
+      "content": "<extra_id_109>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256191": {
+      "content": "<extra_id_108>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256192": {
+      "content": "<extra_id_107>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256193": {
+      "content": "<extra_id_106>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256194": {
+      "content": "<extra_id_105>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256195": {
+      "content": "<extra_id_104>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256196": {
+      "content": "<extra_id_103>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256197": {
+      "content": "<extra_id_102>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256198": {
+      "content": "<extra_id_101>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256199": {
+      "content": "<extra_id_100>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256200": {
+      "content": "<extra_id_99>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256201": {
+      "content": "<extra_id_98>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256202": {
+      "content": "<extra_id_97>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256203": {
+      "content": "<extra_id_96>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256204": {
+      "content": "<extra_id_95>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256205": {
+      "content": "<extra_id_94>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256206": {
+      "content": "<extra_id_93>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256207": {
+      "content": "<extra_id_92>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256208": {
+      "content": "<extra_id_91>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256209": {
+      "content": "<extra_id_90>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256210": {
+      "content": "<extra_id_89>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256211": {
+      "content": "<extra_id_88>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256212": {
+      "content": "<extra_id_87>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256213": {
+      "content": "<extra_id_86>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256214": {
+      "content": "<extra_id_85>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256215": {
+      "content": "<extra_id_84>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256216": {
+      "content": "<extra_id_83>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256217": {
+      "content": "<extra_id_82>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256218": {
+      "content": "<extra_id_81>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256219": {
+      "content": "<extra_id_80>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256220": {
+      "content": "<extra_id_79>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256221": {
+      "content": "<extra_id_78>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256222": {
+      "content": "<extra_id_77>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256223": {
+      "content": "<extra_id_76>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256224": {
+      "content": "<extra_id_75>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256225": {
+      "content": "<extra_id_74>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256226": {
+      "content": "<extra_id_73>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256227": {
+      "content": "<extra_id_72>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256228": {
+      "content": "<extra_id_71>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256229": {
+      "content": "<extra_id_70>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256230": {
+      "content": "<extra_id_69>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256231": {
+      "content": "<extra_id_68>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256232": {
+      "content": "<extra_id_67>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256233": {
+      "content": "<extra_id_66>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256234": {
+      "content": "<extra_id_65>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256235": {
+      "content": "<extra_id_64>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256236": {
+      "content": "<extra_id_63>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256237": {
+      "content": "<extra_id_62>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256238": {
+      "content": "<extra_id_61>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256239": {
+      "content": "<extra_id_60>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256240": {
+      "content": "<extra_id_59>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256241": {
+      "content": "<extra_id_58>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256242": {
+      "content": "<extra_id_57>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256243": {
+      "content": "<extra_id_56>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256244": {
+      "content": "<extra_id_55>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256245": {
+      "content": "<extra_id_54>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256246": {
+      "content": "<extra_id_53>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256247": {
+      "content": "<extra_id_52>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256248": {
+      "content": "<extra_id_51>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256249": {
+      "content": "<extra_id_50>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256250": {
+      "content": "<extra_id_49>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256251": {
+      "content": "<extra_id_48>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256252": {
+      "content": "<extra_id_47>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256253": {
+      "content": "<extra_id_46>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256254": {
+      "content": "<extra_id_45>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256255": {
+      "content": "<extra_id_44>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256256": {
+      "content": "<extra_id_43>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256257": {
+      "content": "<extra_id_42>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256258": {
+      "content": "<extra_id_41>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256259": {
+      "content": "<extra_id_40>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256260": {
+      "content": "<extra_id_39>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256261": {
+      "content": "<extra_id_38>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256262": {
+      "content": "<extra_id_37>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256263": {
+      "content": "<extra_id_36>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256264": {
+      "content": "<extra_id_35>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256265": {
+      "content": "<extra_id_34>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256266": {
+      "content": "<extra_id_33>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256267": {
+      "content": "<extra_id_32>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256268": {
+      "content": "<extra_id_31>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256269": {
+      "content": "<extra_id_30>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256270": {
+      "content": "<extra_id_29>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256271": {
+      "content": "<extra_id_28>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256272": {
+      "content": "<extra_id_27>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256273": {
+      "content": "<extra_id_26>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256274": {
+      "content": "<extra_id_25>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256275": {
+      "content": "<extra_id_24>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256276": {
+      "content": "<extra_id_23>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256277": {
+      "content": "<extra_id_22>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256278": {
+      "content": "<extra_id_21>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256279": {
+      "content": "<extra_id_20>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256280": {
+      "content": "<extra_id_19>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256281": {
+      "content": "<extra_id_18>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256282": {
+      "content": "<extra_id_17>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256283": {
+      "content": "<extra_id_16>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256284": {
+      "content": "<extra_id_15>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256285": {
+      "content": "<extra_id_14>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256286": {
+      "content": "<extra_id_13>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256287": {
+      "content": "<extra_id_12>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256288": {
+      "content": "<extra_id_11>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256289": {
+      "content": "<extra_id_10>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256290": {
+      "content": "<extra_id_9>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256291": {
+      "content": "<extra_id_8>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256292": {
+      "content": "<extra_id_7>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256293": {
+      "content": "<extra_id_6>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256294": {
+      "content": "<extra_id_5>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256295": {
+      "content": "<extra_id_4>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256296": {
+      "content": "<extra_id_3>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256297": {
+      "content": "<extra_id_2>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256298": {
+      "content": "<extra_id_1>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256299": {
+      "content": "<extra_id_0>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>",
+    "<extra_id_100>",
+    "<extra_id_101>",
+    "<extra_id_102>",
+    "<extra_id_103>",
+    "<extra_id_104>",
+    "<extra_id_105>",
+    "<extra_id_106>",
+    "<extra_id_107>",
+    "<extra_id_108>",
+    "<extra_id_109>",
+    "<extra_id_110>",
+    "<extra_id_111>",
+    "<extra_id_112>",
+    "<extra_id_113>",
+    "<extra_id_114>",
+    "<extra_id_115>",
+    "<extra_id_116>",
+    "<extra_id_117>",
+    "<extra_id_118>",
+    "<extra_id_119>",
+    "<extra_id_120>",
+    "<extra_id_121>",
+    "<extra_id_122>",
+    "<extra_id_123>",
+    "<extra_id_124>",
+    "<extra_id_125>",
+    "<extra_id_126>",
+    "<extra_id_127>",
+    "<extra_id_128>",
+    "<extra_id_129>",
+    "<extra_id_130>",
+    "<extra_id_131>",
+    "<extra_id_132>",
+    "<extra_id_133>",
+    "<extra_id_134>",
+    "<extra_id_135>",
+    "<extra_id_136>",
+    "<extra_id_137>",
+    "<extra_id_138>",
+    "<extra_id_139>",
+    "<extra_id_140>",
+    "<extra_id_141>",
+    "<extra_id_142>",
+    "<extra_id_143>",
+    "<extra_id_144>",
+    "<extra_id_145>",
+    "<extra_id_146>",
+    "<extra_id_147>",
+    "<extra_id_148>",
+    "<extra_id_149>",
+    "<extra_id_150>",
+    "<extra_id_151>",
+    "<extra_id_152>",
+    "<extra_id_153>",
+    "<extra_id_154>",
+    "<extra_id_155>",
+    "<extra_id_156>",
+    "<extra_id_157>",
+    "<extra_id_158>",
+    "<extra_id_159>",
+    "<extra_id_160>",
+    "<extra_id_161>",
+    "<extra_id_162>",
+    "<extra_id_163>",
+    "<extra_id_164>",
+    "<extra_id_165>",
+    "<extra_id_166>",
+    "<extra_id_167>",
+    "<extra_id_168>",
+    "<extra_id_169>",
+    "<extra_id_170>",
+    "<extra_id_171>",
+    "<extra_id_172>",
+    "<extra_id_173>",
+    "<extra_id_174>",
+    "<extra_id_175>",
+    "<extra_id_176>",
+    "<extra_id_177>",
+    "<extra_id_178>",
+    "<extra_id_179>",
+    "<extra_id_180>",
+    "<extra_id_181>",
+    "<extra_id_182>",
+    "<extra_id_183>",
+    "<extra_id_184>",
+    "<extra_id_185>",
+    "<extra_id_186>",
+    "<extra_id_187>",
+    "<extra_id_188>",
+    "<extra_id_189>",
+    "<extra_id_190>",
+    "<extra_id_191>",
+    "<extra_id_192>",
+    "<extra_id_193>",
+    "<extra_id_194>",
+    "<extra_id_195>",
+    "<extra_id_196>",
+    "<extra_id_197>",
+    "<extra_id_198>",
+    "<extra_id_199>",
+    "<extra_id_200>",
+    "<extra_id_201>",
+    "<extra_id_202>",
+    "<extra_id_203>",
+    "<extra_id_204>",
+    "<extra_id_205>",
+    "<extra_id_206>",
+    "<extra_id_207>",
+    "<extra_id_208>",
+    "<extra_id_209>",
+    "<extra_id_210>",
+    "<extra_id_211>",
+    "<extra_id_212>",
+    "<extra_id_213>",
+    "<extra_id_214>",
+    "<extra_id_215>",
+    "<extra_id_216>",
+    "<extra_id_217>",
+    "<extra_id_218>",
+    "<extra_id_219>",
+    "<extra_id_220>",
+    "<extra_id_221>",
+    "<extra_id_222>",
+    "<extra_id_223>",
+    "<extra_id_224>",
+    "<extra_id_225>",
+    "<extra_id_226>",
+    "<extra_id_227>",
+    "<extra_id_228>",
+    "<extra_id_229>",
+    "<extra_id_230>",
+    "<extra_id_231>",
+    "<extra_id_232>",
+    "<extra_id_233>",
+    "<extra_id_234>",
+    "<extra_id_235>",
+    "<extra_id_236>",
+    "<extra_id_237>",
+    "<extra_id_238>",
+    "<extra_id_239>",
+    "<extra_id_240>",
+    "<extra_id_241>",
+    "<extra_id_242>",
+    "<extra_id_243>",
+    "<extra_id_244>",
+    "<extra_id_245>",
+    "<extra_id_246>",
+    "<extra_id_247>",
+    "<extra_id_248>",
+    "<extra_id_249>",
+    "<extra_id_250>",
+    "<extra_id_251>",
+    "<extra_id_252>",
+    "<extra_id_253>",
+    "<extra_id_254>",
+    "<extra_id_255>",
+    "<extra_id_256>",
+    "<extra_id_257>",
+    "<extra_id_258>",
+    "<extra_id_259>",
+    "<extra_id_260>",
+    "<extra_id_261>",
+    "<extra_id_262>",
+    "<extra_id_263>",
+    "<extra_id_264>",
+    "<extra_id_265>",
+    "<extra_id_266>",
+    "<extra_id_267>",
+    "<extra_id_268>",
+    "<extra_id_269>",
+    "<extra_id_270>",
+    "<extra_id_271>",
+    "<extra_id_272>",
+    "<extra_id_273>",
+    "<extra_id_274>",
+    "<extra_id_275>",
+    "<extra_id_276>",
+    "<extra_id_277>",
+    "<extra_id_278>",
+    "<extra_id_279>",
+    "<extra_id_280>",
+    "<extra_id_281>",
+    "<extra_id_282>",
+    "<extra_id_283>",
+    "<extra_id_284>",
+    "<extra_id_285>",
+    "<extra_id_286>",
+    "<extra_id_287>",
+    "<extra_id_288>",
+    "<extra_id_289>",
+    "<extra_id_290>",
+    "<extra_id_291>",
+    "<extra_id_292>",
+    "<extra_id_293>",
+    "<extra_id_294>",
+    "<extra_id_295>",
+    "<extra_id_296>",
+    "<extra_id_297>",
+    "<extra_id_298>",
+    "<extra_id_299>"
+  ],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "</s>",
+  "extra_ids": 300,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "T5Tokenizer",
+  "unk_token": "<unk>"
+}

configs/transformer_config_i2v.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+    "_class_name": "WanModel",
+    "_diffusers_version": "0.30.0",
+    "dim": 5120,
+    "eps": 1e-06,
+    "ffn_dim": 13824,
+    "freq_dim": 256,
+    "in_dim": 36,
+    "model_type": "i2v",
+    "num_heads": 40,
+    "num_layers": 40,
+    "out_dim": 16,
+    "text_len": 512
+  }

context_windows/context.py ADDED Viewed

	@@ -0,0 +1,258 @@

+import numpy as np
+from typing import Callable, Optional, List
+import torch
+from ..utils import log
+def ordered_halving(val):
+    bin_str = f"{val:064b}"
+    bin_flip = bin_str[::-1]
+    as_int = int(bin_flip, 2)
+    return as_int / (1 << 64)
+def does_window_roll_over(window: list[int], num_frames: int) -> tuple[bool, int]:
+    prev_val = -1
+    for i, val in enumerate(window):
+        val = val % num_frames
+        if val < prev_val:
+            return True, i
+        prev_val = val
+    return False, -1
+def shift_window_to_start(window: list[int], num_frames: int):
+    start_val = window[0]
+    for i in range(len(window)):
+        # 1) subtract each element by start_val to move vals relative to the start of all frames
+        # 2) add num_frames and take modulus to get adjusted vals
+        window[i] = ((window[i] - start_val) + num_frames) % num_frames
+def shift_window_to_end(window: list[int], num_frames: int):
+    # 1) shift window to start
+    shift_window_to_start(window, num_frames)
+    end_val = window[-1]
+    end_delta = num_frames - end_val - 1
+    for i in range(len(window)):
+        # 2) add end_delta to each val to slide windows to end
+        window[i] = window[i] + end_delta
+def get_missing_indexes(windows: list[list[int]], num_frames: int) -> list[int]:
+    all_indexes = list(range(num_frames))
+    for w in windows:
+        for val in w:
+            try:
+                all_indexes.remove(val)
+            except ValueError:
+                pass
+    return all_indexes
+def uniform_looped(
+    step: int = ...,
+    num_steps: Optional[int] = None,
+    num_frames: int = ...,
+    context_size: Optional[int] = None,
+    context_stride: int = 3,
+    context_overlap: int = 4,
+    closed_loop: bool = True,
+):
+    if num_frames <= context_size:
+        yield list(range(num_frames))
+        return
+    context_stride = min(context_stride, int(np.ceil(np.log2(num_frames / context_size))) + 1)
+    for context_step in 1 << np.arange(context_stride):
+        pad = int(round(num_frames * ordered_halving(step)))
+        for j in range(
+            int(ordered_halving(step) * context_step) + pad,
+            num_frames + pad + (0 if closed_loop else -context_overlap),
+            (context_size * context_step - context_overlap),
+        ):
+            yield [e % num_frames for e in range(j, j + context_size * context_step, context_step)]
+#from AnimateDiff-Evolved by Kosinkadink (https://github.com/Kosinkadink/ComfyUI-AnimateDiff-Evolved)
+def uniform_standard(
+    step: int = ...,
+    num_steps: Optional[int] = None,
+    num_frames: int = ...,
+    context_size: Optional[int] = None,
+    context_stride: int = 3,
+    context_overlap: int = 4,
+    closed_loop: bool = True,
+):
+    windows = []
+    if num_frames <= context_size:
+        windows.append(list(range(num_frames)))
+        return windows
+    context_stride = min(context_stride, int(np.ceil(np.log2(num_frames / context_size))) + 1)
+    for context_step in 1 << np.arange(context_stride):
+        pad = int(round(num_frames * ordered_halving(step)))
+        for j in range(
+            int(ordered_halving(step) * context_step) + pad,
+            num_frames + pad + (0 if closed_loop else -context_overlap),
+            (context_size * context_step - context_overlap),
+        ):
+            windows.append([e % num_frames for e in range(j, j + context_size * context_step, context_step)])
+    # now that windows are created, shift any windows that loop, and delete duplicate windows
+    delete_idxs = []
+    win_i = 0
+    while win_i < len(windows):
+        # if window is rolls over itself, need to shift it
+        is_roll, roll_idx = does_window_roll_over(windows[win_i], num_frames)
+        if is_roll:
+            roll_val = windows[win_i][roll_idx]  # roll_val might not be 0 for windows of higher strides
+            shift_window_to_end(windows[win_i], num_frames=num_frames)
+            # check if next window (cyclical) is missing roll_val
+            if roll_val not in windows[(win_i+1) % len(windows)]:
+                # need to insert new window here - just insert window starting at roll_val
+                windows.insert(win_i+1, list(range(roll_val, roll_val + context_size)))
+        # delete window if it's not unique
+        for pre_i in range(0, win_i):
+            if windows[win_i] == windows[pre_i]:
+                delete_idxs.append(win_i)
+                break
+        win_i += 1
+    # reverse delete_idxs so that they will be deleted in an order that doesn't break idx correlation
+    delete_idxs.reverse()
+    for i in delete_idxs:
+        windows.pop(i)
+    return windows
+def static_standard(
+    step: int = ...,
+    num_steps: Optional[int] = None,
+    num_frames: int = ...,
+    context_size: Optional[int] = None,
+    context_stride: int = 3,
+    context_overlap: int = 4,
+    closed_loop: bool = True,
+):
+    windows = []
+    if num_frames <= context_size:
+        windows.append(list(range(num_frames)))
+        return windows
+    # always return the same set of windows
+    delta = context_size - context_overlap
+    for start_idx in range(0, num_frames, delta):
+        # if past the end of frames, move start_idx back to allow same context_length
+        ending = start_idx + context_size
+        if ending >= num_frames:
+            final_delta = ending - num_frames
+            final_start_idx = start_idx - final_delta
+            windows.append(list(range(final_start_idx, final_start_idx + context_size)))
+            break
+        windows.append(list(range(start_idx, start_idx + context_size)))
+    return windows
+def get_context_scheduler(name: str) -> Callable:
+    if name == "uniform_looped":
+        return uniform_looped
+    elif name == "uniform_standard":
+        return uniform_standard
+    elif name == "static_standard":
+        return static_standard
+    else:
+        raise ValueError(f"Unknown context_overlap policy {name}")
+def get_total_steps(
+    scheduler,
+    timesteps: List[int],
+    num_steps: Optional[int] = None,
+    num_frames: int = ...,
+    context_size: Optional[int] = None,
+    context_stride: int = 3,
+    context_overlap: int = 4,
+    closed_loop: bool = True,
+):
+    return sum(
+        len(
+            list(
+                scheduler(
+                    i,
+                    num_steps,
+                    num_frames,
+                    context_size,
+                    context_stride,
+                    context_overlap,
+                )
+            )
+        )
+        for i in range(len(timesteps))
+    )
+def create_window_mask(noise_pred_context, c, latent_video_length, context_overlap, looped=False, window_type="linear"):
+    window_mask = torch.ones_like(noise_pred_context)
+    if window_type == "pyramid":
+        # Create pyramid weights that peak in the middle
+        length = noise_pred_context.shape[1]
+        if length % 2 == 0:
+            max_weight = length // 2
+            weight_sequence = list(range(1, max_weight + 1, 1)) + list(range(max_weight, 0, -1))
+        else:
+            max_weight = (length + 1) // 2
+            weight_sequence = list(range(1, max_weight, 1)) + [max_weight] + list(range(max_weight - 1, 0, -1))
+        # Normalize weights to range from 0 to 1
+        max_val = max(weight_sequence)
+        weight_sequence = [w / max_val for w in weight_sequence]
+        # Apply the weights to create the mask
+        weights_tensor = torch.tensor(weight_sequence, device=noise_pred_context.device)
+        weights_tensor = weights_tensor.view(1, -1, 1, 1)
+        window_mask = weights_tensor.expand_as(window_mask).clone()
+        # Adjust for position in sequence if needed
+        if not looped:
+            if min(c) == 0:  # First chunk
+                left_ramp = torch.linspace(0, 1, context_overlap, device=noise_pred_context.device).view(1, -1, 1, 1)
+                # Clone to avoid in-place memory conflict
+                left_section = window_mask[:, :context_overlap].clone()
+                window_mask[:, :context_overlap] = torch.maximum(left_section, left_ramp)
+            if max(c) == latent_video_length - 1:  # Last chunk
+                right_ramp = torch.linspace(1, 0, context_overlap, device=noise_pred_context.device).view(1, -1, 1, 1)
+                # Clone to avoid in-place memory conflict
+                right_section = window_mask[:, -context_overlap:].clone()
+                window_mask[:, -context_overlap:] = torch.maximum(right_section, right_ramp)
+    else:  # Original "linear" window masking
+        # Apply left-side blending for all except first chunk (or always in loop mode)
+        if min(c) > 0 or (looped and max(c) == latent_video_length - 1):
+            ramp_up = torch.linspace(0, 1, context_overlap, device=noise_pred_context.device)
+            ramp_up = ramp_up.view(1, -1, 1, 1)
+            window_mask[:, :context_overlap] = ramp_up
+        # Apply right-side blending for all except last chunk (or always in loop mode)
+        if max(c) < latent_video_length - 1 or (looped and min(c) == 0):
+            ramp_down = torch.linspace(1, 0, context_overlap, device=noise_pred_context.device)
+            ramp_down = ramp_down.view(1, -1, 1, 1)
+            window_mask[:, -context_overlap:] = ramp_down
+    return window_mask
+class WindowTracker:
+    def __init__(self, verbose=False):
+        self.window_map = {}  # Maps frame sequence to persistent ID
+        self.next_id = 0
+        self.cache_states = {}  # Maps persistent ID to teacache state
+        self.verbose = verbose
+    def get_window_id(self, frames):
+        key = tuple(sorted(frames))  # Order-independent frame sequence
+        if key not in self.window_map:
+            self.window_map[key] = self.next_id
+            if self.verbose:
+                log.info(f"New window pattern {key} -> ID {self.next_id}")
+            self.next_id += 1
+        return self.window_map[key]
+    def get_teacache(self, window_id, base_state):
+        if window_id not in self.cache_states:
+            if self.verbose:
+                log.info(f"Initializing persistent teacache for window {window_id}")
+            self.cache_states[window_id] = base_state.copy()
+        return self.cache_states[window_id]

controlnet/nodes.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import torch
+from ..utils import log
+import comfy.model_management as mm
+from comfy.utils import load_torch_file
+from tqdm import tqdm
+import gc
+from accelerate import init_empty_weights
+from accelerate.utils import set_module_tensor_to_device
+import folder_paths
+class WanVideoControlnetLoader:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {
+            "required": {
+                "model": (folder_paths.get_filename_list("controlnet"), {"tooltip": "These models are loaded from the 'ComfyUI/models/controlnet' -folder",}),
+            "base_precision": (["fp32", "bf16", "fp16"], {"default": "bf16"}),
+            "quantization": (['disabled', 'fp8_e4m3fn', 'fp8_e4m3fn_fast', 'fp8_e5m2', 'fp8_e4m3fn_fast_no_ffn'], {"default": 'disabled', "tooltip": "optional quantization method"}),
+            "load_device": (["main_device", "offload_device"], {"default": "main_device", "tooltip": "Initial device to load the model to, NOT recommended with the larger models unless you have 48GB+ VRAM"}),
+            },
+        }
+    RETURN_TYPES = ("WANVIDEOCONTROLNET",)
+    RETURN_NAMES = ("controlnet", )
+    FUNCTION = "loadmodel"
+    CATEGORY = "WanVideoWrapper"
+    DESCRIPTION = "Loads ControlNet model from 'https://huggingface.co/collections/TheDenk/wan21-controlnets-68302b430411dafc0d74d2fc'"
+    def loadmodel(self, model, base_precision, load_device, quantization):
+        device = mm.get_torch_device()
+        offload_device = mm.unet_offload_device()
+        transformer_load_device = device if load_device == "main_device" else offload_device
+        base_dtype = {"fp8_e4m3fn": torch.float8_e4m3fn, "fp8_e4m3fn_fast": torch.float8_e4m3fn, "bf16": torch.bfloat16, "fp16": torch.float16, "fp16_fast": torch.float16, "fp32": torch.float32}[base_precision]
+        model_path = folder_paths.get_full_path_or_raise("controlnet", model)
+        sd = load_torch_file(model_path, device=transformer_load_device, safe_load=True)
+        num_layers = 8 if "blocks.7.scale_shift_table" in sd else 6
+        out_proj_dim = sd["controlnet_blocks.0.bias"].shape[0]
+        downscale_coef = 16 if out_proj_dim == 3072 else 8
+        vae_channels = 48 if out_proj_dim == 3072 else 16
+        if not "control_encoder.0.0.weight" in sd:
+            raise ValueError("Invalid ControlNet model")
+        controlnet_cfg = {
+            "added_kv_proj_dim": None,
+            "attention_head_dim": 128,
+            "cross_attn_norm": None,
+            "downscale_coef": downscale_coef,
+            "eps": 1e-06,
+            "ffn_dim": 8960,
+            "freq_dim": 256,
+            "image_dim": None,
+            "in_channels": 3,
+            "num_attention_heads": 12,
+            "num_layers": num_layers,
+            "out_proj_dim": out_proj_dim,
+            "patch_size": [
+                1,
+                2,
+                2
+            ],
+            "qk_norm": "rms_norm_across_heads",
+            "rope_max_seq_len": 1024,
+            "text_dim": 4096,
+            "vae_channels": vae_channels
+            }
+        print(f"Loading WanControlnet with config: {controlnet_cfg}")
+        from .wan_controlnet import WanControlnet
+        with init_empty_weights():
+            controlnet = WanControlnet(**controlnet_cfg)
+        controlnet.eval()
+        if quantization == "disabled":
+            for k, v in sd.items():
+                if isinstance(v, torch.Tensor):
+                    if v.dtype == torch.float8_e4m3fn:
+                        quantization = "fp8_e4m3fn"
+                        break
+                    elif v.dtype == torch.float8_e5m2:
+                        quantization = "fp8_e5m2"
+                        break
+        if "fp8_e4m3fn" in quantization:
+            dtype = torch.float8_e4m3fn
+        elif quantization == "fp8_e5m2":
+            dtype = torch.float8_e5m2
+        else:
+            dtype = base_dtype
+        params_to_keep = {"norm", "head", "time_in", "vector_in", "controlnet_patch_embedding", "time_", "img_emb", "modulation", "text_embedding", "adapter"}
+        log.info("Using accelerate to load and assign controlnet model weights to device...")
+        param_count = sum(1 for _ in controlnet.named_parameters())
+        for name, param in tqdm(controlnet.named_parameters(),
+                desc=f"Loading transformer parameters to {transformer_load_device}",
+                total=param_count,
+                leave=True):
+            dtype_to_use = base_dtype if any(keyword in name for keyword in params_to_keep) else dtype
+            if "controlnet_patch_embedding" in name:
+                dtype_to_use = torch.float32
+            set_module_tensor_to_device(controlnet, name, device=transformer_load_device, dtype=dtype_to_use, value=sd[name])
+        del sd
+        if load_device == "offload_device" and controlnet.device != offload_device:
+            log.info(f"Moving controlnet model from {controlnet.device} to {offload_device}")
+            controlnet.to(offload_device)
+            gc.collect()
+            mm.soft_empty_cache()
+        return (controlnet,)
+class WanVideoControlnetApply:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {
+            "required": {
+                "model": ("WANVIDEOMODEL", ),
+                "controlnet": ("WANVIDEOCONTROLNET", ),
+                "control_images": ("IMAGE", ),
+                "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.0001, "tooltip": "controlnet strength"}),
+                "control_stride": ("INT", {"default": 3, "min": 1, "max": 8, "step": 1, "tooltip": "controlnet stride"}),
+                "control_start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "Start percent of the steps to apply controlnet"}),
+                "control_end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "End percent of the steps to apply controlnet"}),
+               }
+        }
+    RETURN_TYPES = ("WANVIDEOMODEL",)
+    RETURN_NAMES = ("model", )
+    FUNCTION = "loadmodel"
+    CATEGORY = "WanVideoWrapper"
+    def loadmodel(self, model, controlnet, control_images, strength, control_stride, control_start_percent, control_end_percent):
+        patcher = model.clone()
+        if 'transformer_options' not in patcher.model_options:
+            patcher.model_options['transformer_options'] = {}
+        control_input = control_images.permute(3, 0, 1, 2).unsqueeze(0).contiguous()
+        control_input = control_input * 2.0 - 1.0
+        controlnet = {
+            "controlnet": controlnet,
+            "control_latents": control_input,
+            "controlnet_strength": strength,
+            "control_stride": control_stride,
+            "controlnet_start": control_start_percent,
+            "controlnet_end": control_end_percent
+        }
+        patcher.model_options["transformer_options"]["controlnet"] = controlnet
+        return (patcher,)
+NODE_CLASS_MAPPINGS = {
+    "WanVideoControlnetLoader": WanVideoControlnetLoader,
+    "WanVideoControlnet": WanVideoControlnetApply,
+    }
+NODE_DISPLAY_NAME_MAPPINGS = {
+    "WanVideoControlnetLoader": "WanVideo Controlnet Loader",
+    "WanVideoControlnet": "WanVideo Controlnet Apply",
+    }

controlnet/wan_controlnet.py ADDED Viewed

	@@ -0,0 +1,281 @@

+# source https://github.com/TheDenk/wan2.1-dilated-controlnet/blob/main/wan_controlnet.py
+from typing import Any, Dict, Optional, Tuple, Union
+import torch
+import torch.nn as nn
+from diffusers.configuration_utils import ConfigMixin, register_to_config
+from diffusers.loaders import FromOriginalModelMixin, PeftAdapterMixin
+from diffusers.utils import USE_PEFT_BACKEND, logging, scale_lora_layers, unscale_lora_layers
+from diffusers.models.modeling_outputs import Transformer2DModelOutput
+from diffusers.models.modeling_utils import ModelMixin
+from diffusers.models.transformers.transformer_wan import (
+    WanTimeTextImageEmbedding,
+    WanRotaryPosEmbed,
+    WanTransformerBlock
+)
+logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
+def zero_module(module):
+    for p in module.parameters():
+        nn.init.zeros_(p)
+    return module
+class WanControlnet(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin):
+    r"""
+    A Controlnet Transformer model for video-like data used in the Wan model.
+    Args:
+        patch_size (`Tuple[int]`, defaults to `(1, 2, 2)`):
+            3D patch dimensions for video embedding (t_patch, h_patch, w_patch).
+        num_attention_heads (`int`, defaults to `40`):
+            Fixed length for text embeddings.
+        attention_head_dim (`int`, defaults to `128`):
+            The number of channels in each head.
+        vae_channels (`int`, defaults to `16`):
+            The number of channels in the vae input.
+        in_channels (`int`, defaults to `16`):
+            The number of channels in the controlnet input.
+        text_dim (`int`, defaults to `512`):
+            Input dimension for text embeddings.
+        freq_dim (`int`, defaults to `256`):
+            Dimension for sinusoidal time embeddings.
+        ffn_dim (`int`, defaults to `13824`):
+            Intermediate dimension in feed-forward network.
+        num_layers (`int`, defaults to `40`):
+            The number of layers of transformer blocks to use.
+        window_size (`Tuple[int]`, defaults to `(-1, -1)`):
+            Window size for local attention (-1 indicates global attention).
+        cross_attn_norm (`bool`, defaults to `True`):
+            Enable cross-attention normalization.
+        qk_norm (`bool`, defaults to `True`):
+            Enable query/key normalization.
+        eps (`float`, defaults to `1e-6`):
+            Epsilon value for normalization layers.
+        add_img_emb (`bool`, defaults to `False`):
+            Whether to use img_emb.
+        added_kv_proj_dim (`int`, *optional*, defaults to `None`):
+            The number of channels to use for the added key and value projections. If `None`, no projection is used.
+        downscale_coef (`int`, *optional*, defaults to `8`):
+            Coeficient for downscale controlnet input video.
+        out_proj_dim (`int`, *optional*, defaults to `128 * 12`):
+            Output projection dimention for last linear layers.
+    """
+    _supports_gradient_checkpointing = True
+    _skip_layerwise_casting_patterns = ["patch_embedding", "condition_embedder", "norm"]
+    _no_split_modules = ["WanTransformerBlock"]
+    _keep_in_fp32_modules = ["time_embedder", "scale_shift_table", "norm1", "norm2", "norm3"]
+    _keys_to_ignore_on_load_unexpected = ["norm_added_q"]
+    @register_to_config
+    def __init__(
+        self,
+        patch_size: Tuple[int] = (1, 2, 2),
+        num_attention_heads: int = 40,
+        attention_head_dim: int = 128,
+        in_channels: int = 3,
+        vae_channels: int = 16,
+        text_dim: int = 4096,
+        freq_dim: int = 256,
+        ffn_dim: int = 13824,
+        num_layers: int = 20,
+        cross_attn_norm: bool = True,
+        qk_norm: Optional[str] = "rms_norm_across_heads",
+        eps: float = 1e-6,
+        image_dim: Optional[int] = None,
+        added_kv_proj_dim: Optional[int] = None,
+        rope_max_seq_len: int = 1024,
+        downscale_coef: int = 8,
+        out_proj_dim: int = 128 * 12,
+    ) -> None:
+        super().__init__()
+        start_channels = in_channels * (downscale_coef ** 2)
+        input_channels = [start_channels, start_channels // 2, start_channels // 4]
+        self.control_encoder = nn.ModuleList([
+            ## Spatial compression with time awareness
+            nn.Sequential(
+                nn.Conv3d(
+                    in_channels,
+                    input_channels[0],
+                    kernel_size=(3, downscale_coef  + 1, downscale_coef + 1),
+                    stride=(1, downscale_coef, downscale_coef),
+                    padding=(1, downscale_coef // 2, downscale_coef // 2)
+                ),
+                nn.GELU(approximate="tanh"),
+                nn.GroupNorm(2, input_channels[0]),
+            ),
+            ## Spatio-Temporal compression with spatial awareness
+            nn.Sequential(
+                nn.Conv3d(input_channels[0], input_channels[1], kernel_size=3, stride=(2, 1, 1), padding=1),
+                nn.GELU(approximate="tanh"),
+                nn.GroupNorm(2, input_channels[1]),
+            ),
+            ## Temporal compression with spatial awareness
+            nn.Sequential(
+                nn.Conv3d(input_channels[1], input_channels[2], kernel_size=3, stride=(2, 1, 1), padding=1),
+                nn.GELU(approximate="tanh"),
+                nn.GroupNorm(2, input_channels[2]),
+            )
+        ])
+        inner_dim = num_attention_heads * attention_head_dim
+        # 1. Patch & position embedding
+        self.rope = WanRotaryPosEmbed(attention_head_dim, patch_size, rope_max_seq_len)
+        self.patch_embedding = nn.Conv3d(vae_channels + input_channels[2], inner_dim, kernel_size=patch_size, stride=patch_size)
+        # 2. Condition embeddings
+        # image_embedding_dim=1280 for I2V model
+        self.condition_embedder = WanTimeTextImageEmbedding(
+            dim=inner_dim,
+            time_freq_dim=freq_dim,
+            time_proj_dim=inner_dim * 6,
+            text_embed_dim=text_dim,
+            image_embed_dim=image_dim,
+        )
+        # 3. Transformer blocks
+        self.blocks = nn.ModuleList(
+            [
+                WanTransformerBlock(
+                    inner_dim, ffn_dim, num_attention_heads, qk_norm, cross_attn_norm, eps, added_kv_proj_dim
+                )
+                for _ in range(num_layers)
+            ]
+        )
+        # 4 Controlnet modules
+        self.controlnet_blocks = nn.ModuleList([])
+        for _ in range(len(self.blocks)):
+            controlnet_block = nn.Linear(inner_dim, out_proj_dim)
+            controlnet_block = zero_module(controlnet_block)
+            self.controlnet_blocks.append(controlnet_block)
+        self.gradient_checkpointing = False
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        timestep: torch.LongTensor,
+        encoder_hidden_states: torch.Tensor,
+        controlnet_states: torch.Tensor,
+        encoder_hidden_states_image: Optional[torch.Tensor] = None,
+        return_dict: bool = True,
+        attention_kwargs: Optional[Dict[str, Any]] = None,
+    ) -> Union[torch.Tensor, Dict[str, torch.Tensor]]:
+        if attention_kwargs is not None:
+            attention_kwargs = attention_kwargs.copy()
+            lora_scale = attention_kwargs.pop("scale", 1.0)
+        else:
+            lora_scale = 1.0
+        if USE_PEFT_BACKEND:
+            # weight the lora layers by setting `lora_scale` for each PEFT layer
+            scale_lora_layers(self, lora_scale)
+        else:
+            if attention_kwargs is not None and attention_kwargs.get("scale", None) is not None:
+                logger.warning(
+                    "Passing `scale` via `attention_kwargs` when not using the PEFT backend is ineffective."
+                )
+        rotary_emb = self.rope(hidden_states)
+        # 0. Controlnet encoder
+        for control_encoder_block in self.control_encoder:
+            controlnet_states = control_encoder_block(controlnet_states)
+        hidden_states = torch.cat([hidden_states, controlnet_states], dim=1)
+        ## 1. Patch embedding and stack
+        hidden_states = self.patch_embedding(hidden_states)
+        hidden_states = hidden_states.flatten(2).transpose(1, 2)
+        # timestep shape: batch_size, or batch_size, seq_len (wan 2.2 ti2v)
+        if timestep.ndim == 2:
+            ## for ComfyUI workflow
+            if hidden_states.shape[1] != timestep.shape[1]:
+                timestep = timestep.repeat_interleave(hidden_states.shape[1] // timestep.shape[1], dim=1)
+            ts_seq_len = timestep.shape[1]
+            timestep = timestep.flatten()  # batch_size * seq_len
+        else:
+            ts_seq_len = None
+        temb, timestep_proj, encoder_hidden_states, encoder_hidden_states_image = self.condition_embedder(
+            timestep, encoder_hidden_states, encoder_hidden_states_image, timestep_seq_len=ts_seq_len
+        )
+        if ts_seq_len is not None:
+            # batch_size, seq_len, 6, inner_dim
+            timestep_proj = timestep_proj.unflatten(2, (6, -1))
+        else:
+            # batch_size, 6, inner_dim
+            timestep_proj = timestep_proj.unflatten(1, (6, -1))
+        if encoder_hidden_states_image is not None:
+            encoder_hidden_states = torch.concat([encoder_hidden_states_image, encoder_hidden_states], dim=1)
+        # 4. Transformer blocks
+        controlnet_hidden_states = ()
+        if torch.is_grad_enabled() and self.gradient_checkpointing:
+            for block, controlnet_block in zip(self.blocks, self.controlnet_blocks):
+                hidden_states = self._gradient_checkpointing_func(
+                    block, hidden_states, encoder_hidden_states, timestep_proj, rotary_emb
+                )
+                controlnet_hidden_states += (controlnet_block(hidden_states),)
+        else:
+            for block, controlnet_block in zip(self.blocks, self.controlnet_blocks):
+                hidden_states = block(hidden_states, encoder_hidden_states, timestep_proj, rotary_emb)
+                controlnet_hidden_states += (controlnet_block(hidden_states),)
+        if USE_PEFT_BACKEND:
+            # remove `lora_scale` from each PEFT layer
+            unscale_lora_layers(self, lora_scale)
+        if not return_dict:
+            return (controlnet_hidden_states,)
+        return Transformer2DModelOutput(sample=controlnet_hidden_states)
+if __name__ == "__main__":
+    parameters = {
+        "added_kv_proj_dim": None,
+        "attention_head_dim": 128,
+        "cross_attn_norm": True,
+        "eps": 1e-06,
+        "ffn_dim": 8960,
+        "freq_dim": 256,
+        "image_dim": None,
+        "in_channels": 3,
+        "num_attention_heads": 12,
+        "num_layers": 2,
+        "patch_size": [1, 2, 2],
+        "qk_norm": "rms_norm_across_heads",
+        "rope_max_seq_len": 1024,
+        "text_dim": 4096,
+        "downscale_coef": 8,
+        "out_proj_dim": 12 * 128,
+        "vae_channels": 16
+    }
+    controlnet = WanControlnet(**parameters)
+    hidden_states = torch.rand(1, 16, 13, 60, 90)
+    timestep = torch.tensor([1000]).repeat(17550).unsqueeze(0) #torch.randint(low=0, high=1000, size=(1,), dtype=torch.long)
+    encoder_hidden_states = torch.rand(1, 512, 4096)
+    controlnet_states = torch.rand(1, 3, 49, 480, 720)
+    controlnet_hidden_states = controlnet(
+        hidden_states=hidden_states,
+        timestep=timestep,
+        encoder_hidden_states=encoder_hidden_states,
+        controlnet_states=controlnet_states,
+        return_dict=False
+    )
+    print("Output states count", len(controlnet_hidden_states[0]))
+    for out_hidden_states in controlnet_hidden_states[0]:
+        print(out_hidden_states.shape)

custom_linear.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import torch
+import torch.nn as nn
+from accelerate import init_empty_weights
+from comfy.ops import cast_bias_weight
+#based on https://github.com/huggingface/diffusers/blob/main/src/diffusers/quantizers/gguf/utils.py
+def _replace_linear(model, compute_dtype, state_dict, prefix="", patches=None, scale_weights=None):
+    has_children = list(model.children())
+    if not has_children:
+        return
+    for name, module in model.named_children():
+        module_prefix = prefix + name + "."
+        _replace_linear(module, compute_dtype, state_dict, module_prefix, patches, scale_weights)
+        if isinstance(module, nn.Linear) and "loras" not in module_prefix:
+            in_features = state_dict[module_prefix + "weight"].shape[1]
+            out_features = state_dict[module_prefix + "weight"].shape[0]
+            if scale_weights is not None:
+                scale_key = f"{module_prefix}scale_weight"
+            with init_empty_weights():
+                model._modules[name] = CustomLinear(
+                    in_features,
+                    out_features,
+                    module.bias is not None,
+                    compute_dtype=compute_dtype,
+                    scale_weight=scale_weights.get(scale_key) if scale_weights else None
+                )
+            #set_lora_params(model._modules[name], patches, module_prefix)
+            model._modules[name].source_cls = type(module)
+            # Force requires_grad to False to avoid unexpected errors
+            model._modules[name].requires_grad_(False)
+    return model
+def set_lora_params(module, patches, module_prefix=""):
+    # Recursively set lora_diffs and lora_strengths for all CustomLinear layers
+    for name, child in module.named_children():
+        child_prefix = (f"{module_prefix}{name}.")
+        set_lora_params(child, patches, child_prefix)
+    if isinstance(module, CustomLinear):
+        key = f"diffusion_model.{module_prefix}weight"
+        patch = patches.get(key, [])
+        #print(f"Processing LoRA patches for {key}: {len(patch)} patches found")
+        if len(patch) != 0:
+            lora_diffs = []
+            for p in patch:
+                lora_obj = p[1]
+                if "head" in key:
+                    continue  # For now skip LoRA for head layers
+                elif hasattr(lora_obj, "weights"):
+                    lora_diffs.append(lora_obj.weights)
+                elif isinstance(lora_obj, tuple) and lora_obj[0] == "diff":
+                    lora_diffs.append(lora_obj[1])
+                else:
+                    continue
+            lora_strengths = [p[0] for p in patch]
+            module.lora = (lora_diffs, lora_strengths)
+            module.step = 0  # Initialize step for LoRA scheduling
+class CustomLinear(nn.Linear):
+    def __init__(
+        self,
+        in_features,
+        out_features,
+        bias=False,
+        compute_dtype=None,
+        device=None,
+        scale_weight=None
+    ) -> None:
+        super().__init__(in_features, out_features, bias, device)
+        self.compute_dtype = compute_dtype
+        self.lora = None
+        self.step = 0
+        self.scale_weight = scale_weight
+        self.bias_function = []
+        self.weight_function = []
+    def forward(self, input):
+        weight, bias = cast_bias_weight(self, input)
+        if self.scale_weight is not None:
+            if weight.numel() < input.numel():
+                weight = weight * self.scale_weight
+            else:
+                input = input * self.scale_weight
+        if self.lora is not None:
+            weight = self.apply_lora(weight).to(self.compute_dtype)
+        return torch.nn.functional.linear(input, weight, bias)
+    @torch.compiler.disable()
+    def apply_lora(self, weight):
+        for lora_diff, lora_strength in zip(self.lora[0], self.lora[1]):
+            if isinstance(lora_strength, list):
+                lora_strength = lora_strength[self.step]
+                if lora_strength == 0.0:
+                    continue
+            elif lora_strength == 0.0:
+                continue
+            patch_diff = torch.mm(
+                lora_diff[0].flatten(start_dim=1).to(weight.device),
+                lora_diff[1].flatten(start_dim=1).to(weight.device)
+            ).reshape(weight.shape)
+            alpha = lora_diff[2] / lora_diff[1].shape[0] if lora_diff[2] is not None else 1.0
+            scale = lora_strength * alpha
+            weight = weight.add(patch_diff, alpha=scale)
+        return weight
+def remove_lora_from_module(module):
+    for name, submodule in module.named_modules():
+        submodule.lora = None

diffsynth/vram_management/LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [2023] [Zhongjie Duan]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

diffsynth/vram_management/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .layers import *

diffsynth/vram_management/layers.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import torch, copy
+from .utils import init_weights_on_device
+def cast_to(weight, dtype, device):
+    r = torch.empty_like(weight, dtype=dtype, device=device)
+    r.copy_(weight)
+    return r
+class AutoWrappedModule(torch.nn.Module):
+    def __init__(self, module: torch.nn.Module, offload_dtype, offload_device, onload_dtype, onload_device, computation_dtype, computation_device):
+        super().__init__()
+        self.module = module.to(dtype=offload_dtype, device=offload_device)
+        self.offload_dtype = offload_dtype
+        self.offload_device = offload_device
+        self.onload_dtype = onload_dtype
+        self.onload_device = onload_device
+        self.computation_dtype = computation_dtype
+        self.computation_device = computation_device
+        self.state = 0
+    def offload(self):
+        if self.state == 1 and (self.offload_dtype != self.onload_dtype or self.offload_device != self.onload_device):
+            self.module.to(dtype=self.offload_dtype, device=self.offload_device)
+            self.state = 0
+    def onload(self):
+        if self.state == 0 and (self.offload_dtype != self.onload_dtype or self.offload_device != self.onload_device):
+            self.module.to(dtype=self.onload_dtype, device=self.onload_device)
+            self.state = 1
+    def forward(self, *args, **kwargs):
+        if self.onload_dtype == self.computation_dtype and self.onload_device == self.computation_device:
+            module = self.module
+        else:
+            module = copy.deepcopy(self.module).to(dtype=self.computation_dtype, device=self.computation_device)
+        return module(*args, **kwargs)
+class AutoWrappedLinear(torch.nn.Linear):
+    def __init__(self, module: torch.nn.Linear, offload_dtype, offload_device, onload_dtype, onload_device, computation_dtype, computation_device):
+        with init_weights_on_device(device=torch.device("meta")):
+            super().__init__(in_features=module.in_features, out_features=module.out_features, bias=module.bias is not None, dtype=offload_dtype, device=offload_device)
+        self.weight = module.weight
+        self.bias = module.bias
+        self.offload_dtype = offload_dtype
+        self.offload_device = offload_device
+        self.onload_dtype = onload_dtype
+        self.onload_device = onload_device
+        self.computation_dtype = computation_dtype
+        self.computation_device = computation_device
+        self.state = 0
+    def offload(self):
+        if self.state == 1 and (self.offload_dtype != self.onload_dtype or self.offload_device != self.onload_device):
+            self.to(dtype=self.offload_dtype, device=self.offload_device)
+            self.state = 0
+    def onload(self):
+        if self.state == 0 and (self.offload_dtype != self.onload_dtype or self.offload_device != self.onload_device):
+            self.to(dtype=self.onload_dtype, device=self.onload_device)
+            self.state = 1
+    def forward(self, x, *args, **kwargs):
+        if self.onload_dtype == self.computation_dtype and self.onload_device == self.computation_device:
+            weight, bias = self.weight, self.bias
+        else:
+            weight = cast_to(self.weight, self.computation_dtype, self.computation_device)
+            bias = None if self.bias is None else cast_to(self.bias, self.computation_dtype, self.computation_device)
+        return torch.nn.functional.linear(x, weight, bias)
+def enable_vram_management_recursively(model: torch.nn.Module, module_map: dict, module_config: dict, max_num_param=None, overflow_module_config: dict = None, total_num_param=0, compile_args=None):
+    for name, module in model.named_children():
+        for source_module, target_module in module_map.items():
+            if isinstance(module, source_module):
+                if "rope_embedder" in name or "patch_embedding" in name or "emb_pos" in name:
+                    continue
+                num_param = sum(p.numel() for p in module.parameters())
+                if max_num_param is not None and total_num_param + num_param > max_num_param:
+                    module_config_ = overflow_module_config
+                else:
+                    module_config_ = module_config
+                if compile_args is not None:
+                    print("Compiling", name)
+                    torch._dynamo.config.cache_size_limit = compile_args["dynamo_cache_size_limit"]
+                    torch._dynamo.config.recompile_limit = compile_args["dynamo_cache_size_limit"]
+                    module_ = torch.compile(target_module(module, **module_config_), fullgraph=compile_args["fullgraph"], dynamic=compile_args["dynamic"], backend=compile_args["backend"], mode=compile_args["mode"])
+                else:
+                    module_ = target_module(module, **module_config_)
+                setattr(model, name, module_)
+                total_num_param += num_param
+                break
+        else:
+            total_num_param = enable_vram_management_recursively(module, module_map, module_config, max_num_param, overflow_module_config, total_num_param)
+    return total_num_param
+def enable_vram_management(model: torch.nn.Module, module_map: dict, module_config: dict, max_num_param=None, overflow_module_config: dict = None, compile_args=None):
+    enable_vram_management_recursively(model, module_map, module_config, max_num_param, overflow_module_config, total_num_param=0, compile_args=compile_args)
+    model.vram_management_enabled = True

diffsynth/vram_management/utils.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import torch
+from contextlib import contextmanager
+@contextmanager
+def init_weights_on_device(device = torch.device("meta"), include_buffers :bool = False):
+    old_register_parameter = torch.nn.Module.register_parameter
+    if include_buffers:
+        old_register_buffer = torch.nn.Module.register_buffer
+    def register_empty_parameter(module, name, param):
+        old_register_parameter(module, name, param)
+        if param is not None:
+            param_cls = type(module._parameters[name])
+            kwargs = module._parameters[name].__dict__
+            kwargs["requires_grad"] = param.requires_grad
+            module._parameters[name] = param_cls(module._parameters[name].to(device), **kwargs)
+    def register_empty_buffer(module, name, buffer, persistent=True):
+        old_register_buffer(module, name, buffer, persistent=persistent)
+        if buffer is not None:
+            module._buffers[name] = module._buffers[name].to(device)
+    def patch_tensor_constructor(fn):
+        def wrapper(*args, **kwargs):
+            kwargs["device"] = device
+            return fn(*args, **kwargs)
+        return wrapper
+    if include_buffers:
+        tensor_constructors_to_patch = {
+            torch_function_name: getattr(torch, torch_function_name)
+            for torch_function_name in ["empty", "zeros", "ones", "full"]
+        }
+    else:
+        tensor_constructors_to_patch = {}
+    try:
+        torch.nn.Module.register_parameter = register_empty_parameter
+        if include_buffers:
+            torch.nn.Module.register_buffer = register_empty_buffer
+        for torch_function_name in tensor_constructors_to_patch.keys():
+            setattr(torch, torch_function_name, patch_tensor_constructor(getattr(torch, torch_function_name)))
+        yield
+    finally:
+        torch.nn.Module.register_parameter = old_register_parameter
+        if include_buffers:
+            torch.nn.Module.register_buffer = old_register_buffer
+        for torch_function_name, old_torch_function in tensor_constructors_to_patch.items():
+            setattr(torch, torch_function_name, old_torch_function)

echoshot/echoshot.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import torch
+from comfy.model_management import get_autocast_device, get_torch_device
+@torch.autocast(device_type=get_autocast_device(get_torch_device()), enabled=False)
+@torch.compiler.disable()
+def rope_apply_z(x, grid_sizes, freqs, inner_t, shift=6):
+    n, c = x.size(2), x.size(3) // 2
+    # loop over samples
+    output = []
+    for i, (f, h, w) in enumerate(grid_sizes.tolist()):
+        seq_len = f * h * w
+        # precompute multipliers
+        x_i = torch.view_as_complex(
+            x[i, :seq_len].to(torch.float64).reshape(seq_len, n, -1, 2)
+        )
+        start_ind = [sum(inner_t[i][:_]) for _ in range(len(inner_t[i]))]
+        end_ind = [sum(inner_t[i][:_+1]) for _ in range(len(inner_t[i]))]
+        freq_select = []
+        for shot_ind, (s, e) in enumerate(zip(start_ind, end_ind)):
+            freq_select += [shot_ind * shift] * (e - s)
+        shot_freqs = freqs[freq_select]
+        freqs_i = shot_freqs.view(f, 1, 1, -1).expand(f, h, w, -1).reshape(seq_len, 1, -1)
+        # apply rotary embedding
+        x_i = torch.view_as_real(x_i * freqs_i).flatten(2)
+        x_i = torch.cat([x_i, x[i, seq_len:]])
+        # append to collection
+        output.append(x_i)
+    return torch.stack(output).float()
+@torch.autocast(device_type=get_autocast_device(get_torch_device()), enabled=False)
+@torch.compiler.disable()
+def rope_apply_c(x, freqs, inner_c, shift=6):
+    b, s, n, c = x.size(0), x.size(1), x.size(2), x.size(3) // 2
+    # loop over samples
+    output = []
+    for i in range(b):
+        # precompute multipliers
+        x_i = torch.view_as_complex(
+            x[i].to(torch.float64).reshape(s, n, -1, 2)
+        )
+        freq_select = []
+        for shot_ind, c_len in enumerate(inner_c[i]):
+            freq_select += [shot_ind * shift] * c_len
+        freq_select += [shot_ind+10] * (s-len(freq_select)) # extra suppression for the empty token
+        shot_freqs = freqs[freq_select]
+        freqs_i = shot_freqs.view(s, 1, -1)
+        # apply rotary embedding
+        x_i = torch.view_as_real(x_i * freqs_i).flatten(2)
+        # append to collection
+        output.append(x_i)
+    return torch.stack(output).float()
+@torch.autocast(device_type=get_autocast_device(get_torch_device()), enabled=False)
+@torch.compiler.disable()
+def rope_apply_echoshot(x, grid_sizes, freqs, inner_t, shift=4):
+    n, c = x.size(2), x.size(3) // 2
+    # split freqs
+    freqs = freqs.split([c - 2 * (c // 3), c // 3, c // 3], dim=1)
+    # loop over samples
+    output = []
+    for i, (f, h, w) in enumerate(grid_sizes.tolist()):
+        seq_len = f * h * w
+        # precompute multipliers
+        x_i = torch.view_as_complex(
+            x[i, :seq_len].to(torch.float64).reshape(seq_len, n, -1, 2)
+        )
+        start_ind = [sum(inner_t[i][:_]) for _ in range(len(inner_t[i]))]
+        end_ind = [sum(inner_t[i][:_+1]) for _ in range(len(inner_t[i]))]
+        freq_select = []
+        for shot_ind, (s, e) in enumerate(zip(start_ind, end_ind)):
+            freq_select += list(range(shot_ind * shift + s, shot_ind * shift + e))
+        t_freqs = freqs[0][freq_select]
+        freqs_i = torch.cat([
+            # freqs[0][:f].view(f, 1, 1, -1).expand(f, h, w, -1),
+            t_freqs.view(f, 1, 1, -1).expand(f, h, w, -1), ###
+            freqs[1][:h].view(1, h, 1, -1).expand(f, h, w, -1),
+            freqs[2][:w].view(1, 1, w, -1).expand(f, h, w, -1)
+        ], dim=-1).reshape(seq_len, 1, -1)
+        # apply rotary embedding
+        x_i = torch.view_as_real(x_i * freqs_i).flatten(2)
+        x_i = torch.cat([x_i, x[i, seq_len:]])
+        # append to collection
+        output.append(x_i)
+    return torch.stack(output).float()

enhance_a_video/LICENSE ADDED Viewed

	@@ -0,0 +1,562 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright VideoSys
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+   ## Some of Enhance-A-Video's model is derived from others projects, which is subject to the following copyright notice:
+   ================================= Diffusers =================================
+                                    Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+   ================================= CogVideoX =================================
+   The CogVideoX License
+   1. Definitions
+   “Licensor” means the CogVideoX Model Team that distributes its Software.
+   “Software” means the CogVideoX model parameters made available under this license.
+   2. License Grant
+   Under the terms and conditions of this license, the licensor hereby grants you a non-exclusive, worldwide, non-transferable, non-sublicensable, revocable, royalty-free copyright license. The intellectual property rights of the generated content belong to the user to the extent permitted by applicable local laws.
+   This license allows you to freely use all open-source models in this repository for academic research. Users who wish to use the models for commercial purposes must register and obtain a basic commercial license in https://open.bigmodel.cn/mla/form .
+   Users who have registered and obtained the basic commercial license can use the models for commercial activities for free, but must comply with all terms and conditions of this license. Additionally, the number of service users (visits) for your commercial activities must not exceed 1 million visits per month.
+   If the number of service users (visits) for your commercial activities exceeds 1 million visits per month, you need to contact our business team to obtain more commercial licenses.
+   The above copyright statement and this license statement should be included in all copies or significant portions of this software.
+   3. Restriction
+   You will not use, copy, modify, merge, publish, distribute, reproduce, or create derivative works of the Software, in whole or in part, for any military, or illegal purposes.
+   You will not use the Software for any act that may undermine China's national security and national unity, harm the public interest of society, or infringe upon the rights and interests of human beings.
+   4. Disclaimer
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+   5. Limitation of Liability
+   EXCEPT TO THE EXTENT PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER BASED IN TORT, NEGLIGENCE, CONTRACT, LIABILITY, OR OTHERWISE WILL ANY LICENSOR BE LIABLE TO YOU FOR ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES, OR ANY OTHER COMMERCIAL LOSSES, EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+   6. Dispute Resolution
+   This license shall be governed and construed in accordance with the laws of People’s Republic of China. Any dispute arising from or in connection with this License shall be submitted to Haidian District People's Court in Beijing.
+   Note that the license is subject to update to a more comprehensive version.  For any questions related to the license and copyright, please contact us at license@zhipuai.cn.
+   1. 定义
+   “许可方”是指分发其软件的 CogVideoX 模型团队。
+   “软件”是指根据本许可提供的 CogVideoX 模型参数。
+   2. 许可授予
+   根据本许可的条款和条件，许可方特此授予您非排他性、全球性、不可转让、不可再许可、可撤销、免版税的版权许可。生成内容的知识产权所属，可根据适用当地法律的规定，在法律允许的范围内由用户享有生成内容的知识产权或其他权利。
+   本许可允许您免费使用本仓库中的所有开源模型进行学术研究。对于希望将模型用于商业目的的用户，需在 https://open.bigmodel.cn/mla/form 完成登记并获得基础商用授权。
+   经过登记并获得基础商用授权的用户可以免费使用本模型进行商业活动，但必须遵守本许可的所有条款和条件。
+   在本许可证下，您的商业活动的服务用户数量（访问量）不得超过100万人次访问 / 每月。如果超过，您需要与我们的商业团队联系以获得更多的商业许可。
+   上述版权声明和本许可声明应包含在本软件的所有副本或重要部分中。
+   3.限制
+   您不得出于任何军事或非法目的使用、复制、修改、合并、发布、分发、复制或创建本软件的全部或部分衍生作品。
+   您不得利用本软件从事任何危害国家安全和国家统一、危害社会公共利益、侵犯人身权益的行为。
+   4.免责声明
+   本软件“按原样”提供，不提供任何明示或暗示的保证，包括但不限于对适销性、特定用途的适用性和非侵权性的保证。
+   在任何情况下，作者或版权持有人均不对任何索赔、损害或其他责任负责，无论是在合同诉讼、侵权行为还是其他方面，由软件或软件的使用或其他交易引起、由软件引起或与之相关 软件。
+   5. 责任限制
+   除适用法律禁止的范围外，在任何情况下且根据任何法律理论，无论是基于侵权行为、疏忽、合同、责任或其他原因，任何许可方均不对您承担任何直接、间接、特殊、偶然、示范性、 或间接损害，或任何其他商业损失，即使许可人已被告知此类损害的可能性。
+   6.争议解决
+   本许可受中华人民共和国法律管辖并按其解释。 因本许可引起的或与本许可有关的任何争议应提交北京市海淀区人民法院。
+   请注意，许可证可能会更新到更全面的版本。 有关许可和版权的任何问题，请通过 license@zhipuai.cn 与我们联系。
+   ============================================ HunyuanVideo ============================================
+   TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT
+   Tencent HunyuanVideo Release Date: December 3, 2024
+   THIS LICENSE AGREEMENT DOES NOT APPLY IN THE EUROPEAN UNION, UNITED KINGDOM AND SOUTH KOREA AND IS EXPRESSLY LIMITED TO THE TERRITORY, AS DEFINED BELOW.
+   By clicking to agree or by using, reproducing, modifying, distributing, performing or displaying any portion or element of the Tencent Hunyuan Works, including via any Hosted Service, You will be deemed to have recognized and accepted the content of this Agreement, which is effective immediately.
+   1.	DEFINITIONS.
+   a.	“Acceptable Use Policy” shall mean the policy made available by Tencent as set forth in the Exhibit A.
+   b.	“Agreement” shall mean the terms and conditions for use, reproduction, distribution, modification, performance and displaying of Tencent Hunyuan Works or any portion or element thereof set forth herein.
+   c.	“Documentation” shall mean the specifications, manuals and documentation for Tencent Hunyuan made publicly available by Tencent.
+   d.	“Hosted Service” shall mean a hosted service offered via an application programming interface (API), web access, or any other electronic or remote means.
+   e.	“Licensee,” “You” or “Your” shall mean a natural person or legal entity exercising the rights granted by this Agreement and/or using the Tencent Hunyuan Works for any purpose and in any field of use.
+   f.	“Materials” shall mean, collectively, Tencent’s proprietary Tencent Hunyuan and Documentation (and any portion thereof) as made available by Tencent under this Agreement.
+   g.	“Model Derivatives” shall mean all: (i) modifications to Tencent Hunyuan or any Model Derivative of Tencent Hunyuan; (ii) works based on Tencent Hunyuan or any Model Derivative of Tencent Hunyuan; or (iii) any other machine learning model which is created by transfer of patterns of the weights, parameters, operations, or Output of Tencent Hunyuan or any Model Derivative of Tencent Hunyuan, to that model in order to cause that model to perform similarly to Tencent Hunyuan or a Model Derivative of Tencent Hunyuan, including distillation methods, methods that use intermediate data representations, or methods based on the generation of synthetic data Outputs by Tencent Hunyuan or a Model Derivative of Tencent Hunyuan for training that model. For clarity, Outputs by themselves are not deemed Model Derivatives.
+   h.	“Output” shall mean the information and/or content output of Tencent Hunyuan or a Model Derivative that results from operating or otherwise using Tencent Hunyuan or a Model Derivative, including via a Hosted Service.
+   i.	“Tencent,” “We” or “Us” shall mean THL A29 Limited.
+   j.	“Tencent Hunyuan” shall mean the large language models, text/image/video/audio/3D generation models, and multimodal large language models and their software and algorithms, including trained model weights, parameters (including optimizer states), machine-learning model code, inference-enabling code, training-enabling code, fine-tuning enabling code and other elements of the foregoing made publicly available by Us, including, without limitation to, Tencent HunyuanVideo released at [https://github.com/Tencent/HunyuanVideo].
+   k.	“Tencent Hunyuan Works” shall mean: (i) the Materials; (ii) Model Derivatives; and (iii) all derivative works thereof.
+   l.	“Territory” shall mean the worldwide territory, excluding the territory of the European Union, United Kingdom and South Korea.
+   m.	“Third Party” or “Third Parties” shall mean individuals or legal entities that are not under common control with Us or You.
+   n.	“including” shall mean including but not limited to.
+   2.	GRANT OF RIGHTS.
+   We grant You, for the Territory only, a non-exclusive, non-transferable and royalty-free limited license under Tencent’s intellectual property or other rights owned by Us embodied in or utilized by the Materials to use, reproduce, distribute, create derivative works of (including Model Derivatives), and make modifications to the Materials, only in accordance with the terms of this Agreement and the Acceptable Use Policy, and You must not violate (or encourage or permit anyone else to violate) any term of this Agreement or the Acceptable Use Policy.
+   3.	DISTRIBUTION.
+   You may, subject to Your compliance with this Agreement, distribute or make available to Third Parties the Tencent Hunyuan Works, exclusively in the Territory, provided that You meet all of the following conditions:
+   a.	You must provide all such Third Party recipients of the Tencent Hunyuan Works or products or services using them a copy of this Agreement;
+   b.	You must cause any modified files to carry prominent notices stating that You changed the files;
+   c.	You are encouraged to: (i) publish at least one technology introduction blogpost or one public statement expressing Your experience of using the Tencent Hunyuan Works; and (ii) mark the products or services developed by using the Tencent Hunyuan Works to indicate that the product/service is “Powered by Tencent Hunyuan”; and
+   d.	All distributions to Third Parties (other than through a Hosted Service) must be accompanied by a “Notice” text file that contains the following notice: “Tencent Hunyuan is licensed under the Tencent Hunyuan Community License Agreement, Copyright © 2024 Tencent. All Rights Reserved. The trademark rights of “Tencent Hunyuan” are owned by Tencent or its affiliate.”
+   You may add Your own copyright statement to Your modifications and, except as set forth in this Section and in Section 5, may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Model Derivatives as a whole, provided Your use, reproduction, modification, distribution, performance and display of the work otherwise complies with the terms and conditions of this Agreement (including as regards the Territory). If You receive Tencent Hunyuan Works from a Licensee as part of an integrated end user product, then this Section 3 of this Agreement will not apply to You.
+   4.	ADDITIONAL COMMERCIAL TERMS.
+   If, on the Tencent Hunyuan version release date, the monthly active users of all products or services made available by or for Licensee is greater than 100 million monthly active users in the preceding calendar month, You must request a license from Tencent, which Tencent may grant to You in its sole discretion, and You are not authorized to exercise any of the rights under this Agreement unless or until Tencent otherwise expressly grants You such rights.
+   5.	RULES OF USE.
+   a.	Your use of the Tencent Hunyuan Works must comply with applicable laws and regulations (including trade compliance laws and regulations) and adhere to the Acceptable Use Policy for the Tencent Hunyuan Works, which is hereby incorporated by reference into this Agreement. You must include the use restrictions referenced in these Sections 5(a) and 5(b) as an enforceable provision in any agreement (e.g., license agreement, terms of use, etc.) governing the use and/or distribution of Tencent Hunyuan Works and You must provide notice to subsequent users to whom You distribute that Tencent Hunyuan Works are subject to the use restrictions in these Sections 5(a) and 5(b).
+   b.	You must not use the Tencent Hunyuan Works or any Output or results of the Tencent Hunyuan Works to improve any other AI model (other than Tencent Hunyuan or Model Derivatives thereof).
+   c.	You must not use, reproduce, modify, distribute, or display the Tencent Hunyuan Works, Output or results of the Tencent Hunyuan Works outside the Territory. Any such use outside the Territory is unlicensed and unauthorized under this Agreement.
+   6.	INTELLECTUAL PROPERTY.
+   a.	Subject to Tencent’s ownership of Tencent Hunyuan Works made by or for Tencent and intellectual property rights therein, conditioned upon Your compliance with the terms and conditions of this Agreement, as between You and Tencent, You will be the owner of any derivative works and modifications of the Materials and any Model Derivatives that are made by or for You.
+   b.	No trademark licenses are granted under this Agreement, and in connection with the Tencent Hunyuan Works, Licensee may not use any name or mark owned by or associated with Tencent or any of its affiliates, except as required for reasonable and customary use in describing and distributing the Tencent Hunyuan Works. Tencent hereby grants You a license to use “Tencent Hunyuan” (the “Mark”) in the Territory solely as required to comply with the provisions of Section 3(c), provided that You comply with any applicable laws related to trademark protection. All goodwill arising out of Your use of the Mark will inure to the benefit of Tencent.
+   c.	If You commence a lawsuit or other proceedings (including a cross-claim or counterclaim in a lawsuit) against Us or any person or entity alleging that the Materials or any Output, or any portion of any of the foregoing, infringe any intellectual property or other right owned or licensable by You, then all licenses granted to You under this Agreement shall terminate as of the date such lawsuit or other proceeding is filed. You will defend, indemnify and hold harmless Us from and against any claim by any Third Party arising out of or related to Your or the Third Party’s use or distribution of the Tencent Hunyuan Works.
+   d.	Tencent claims no rights in Outputs You generate. You and Your users are solely responsible for Outputs and their subsequent uses.
+   7.	DISCLAIMERS OF WARRANTY AND LIMITATIONS OF LIABILITY.
+   a.	We are not obligated to support, update, provide training for, or develop any further version of the Tencent Hunyuan Works or to grant any license thereto.
+   b.	UNLESS AND ONLY TO THE EXTENT REQUIRED BY APPLICABLE LAW, THE TENCENT HUNYUAN WORKS AND ANY OUTPUT AND RESULTS THEREFROM ARE PROVIDED “AS IS” WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES OF ANY KIND INCLUDING ANY WARRANTIES OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, COURSE OF DEALING, USAGE OF TRADE, OR FITNESS FOR A PARTICULAR PURPOSE. YOU ARE SOLELY RESPONSIBLE FOR DETERMINING THE APPROPRIATENESS OF USING, REPRODUCING, MODIFYING, PERFORMING, DISPLAYING OR DISTRIBUTING ANY OF THE TENCENT HUNYUAN WORKS OR OUTPUTS AND ASSUME ANY AND ALL RISKS ASSOCIATED WITH YOUR OR A THIRD PARTY’S USE OR DISTRIBUTION OF ANY OF THE TENCENT HUNYUAN WORKS OR OUTPUTS AND YOUR EXERCISE OF RIGHTS AND PERMISSIONS UNDER THIS AGREEMENT.
+   c.	TO THE FULLEST EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT SHALL TENCENT OR ITS AFFILIATES BE LIABLE UNDER ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, TORT, NEGLIGENCE, PRODUCTS LIABILITY, OR OTHERWISE, FOR ANY DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, EXEMPLARY, CONSEQUENTIAL OR PUNITIVE DAMAGES, OR LOST PROFITS OF ANY KIND ARISING FROM THIS AGREEMENT OR RELATED TO ANY OF THE TENCENT HUNYUAN WORKS OR OUTPUTS, EVEN IF TENCENT OR ITS AFFILIATES HAVE BEEN ADVISED OF THE POSSIBILITY OF ANY OF THE FOREGOING.
+   8.	SURVIVAL AND TERMINATION.
+   a.	The term of this Agreement shall commence upon Your acceptance of this Agreement or access to the Materials and will continue in full force and effect until terminated in accordance with the terms and conditions herein.
+   b.	We may terminate this Agreement if You breach any of the terms or conditions of this Agreement. Upon termination of this Agreement, You must promptly delete and cease use of the Tencent Hunyuan Works. Sections 6(a), 6(c), 7 and 9 shall survive the termination of this Agreement.
+   9.	GOVERNING LAW AND JURISDICTION.
+   a.	This Agreement and any dispute arising out of or relating to it will be governed by the laws of the Hong Kong Special Administrative Region of the People’s Republic of China, without regard to conflict of law principles, and the UN Convention on Contracts for the International Sale of Goods does not apply to this Agreement.
+   b.	Exclusive jurisdiction and venue for any dispute arising out of or relating to this Agreement will be a court of competent jurisdiction in the Hong Kong Special Administrative Region of the People’s Republic of China, and Tencent and Licensee consent to the exclusive jurisdiction of such court with respect to any such dispute.
+   EXHIBIT A
+   ACCEPTABLE USE POLICY
+   Tencent reserves the right to update this Acceptable Use Policy from time to time.
+   Last modified: November 5, 2024
+   Tencent endeavors to promote safe and fair use of its tools and features, including Tencent Hunyuan. You agree not to use Tencent Hunyuan or Model Derivatives:
+   1.	Outside the Territory;
+   2.	In any way that violates any applicable national, federal, state, local, international or any other law or regulation;
+   3.	To harm Yourself or others;
+   4.	To repurpose or distribute output from Tencent Hunyuan or any Model Derivatives to harm Yourself or others;
+   5.	To override or circumvent the safety guardrails and safeguards We have put in place;
+   6.	For the purpose of exploiting, harming or attempting to exploit or harm minors in any way;
+   7.	To generate or disseminate verifiably false information and/or content with the purpose of harming others or influencing elections;
+   8.	To generate or facilitate false online engagement, including fake reviews and other means of fake online engagement;
+   9.	To intentionally defame, disparage or otherwise harass others;
+   10.	To generate and/or disseminate malware (including ransomware) or any other content to be used for the purpose of harming electronic systems;
+   11.	To generate or disseminate personal identifiable information with the purpose of harming others;
+   12.	To generate or disseminate information (including images, code, posts, articles), and place the information in any public context (including –through the use of bot generated tweets), without expressly and conspicuously identifying that the information and/or content is machine generated;
+   13.	To impersonate another individual without consent, authorization, or legal right;
+   14.	To make high-stakes automated decisions in domains that affect an individual’s safety, rights or wellbeing (e.g., law enforcement, migration, medicine/health, management of critical infrastructure, safety components of products, essential services, credit, employment, housing, education, social scoring, or insurance);
+   15.	In a manner that violates or disrespects the social ethics and moral standards of other countries or regions;
+   16.	To perform, facilitate, threaten, incite, plan, promote or encourage violent extremism or terrorism;
+   17.	For any use intended to discriminate against or harm individuals or groups based on protected characteristics or categories, online or offline social behavior or known or predicted personal or personality characteristics;
+   18.	To intentionally exploit any of the vulnerabilities of a specific group of persons based on their age, social, physical or mental characteristics, in order to materially distort the behavior of a person pertaining to that group in a manner that causes or is likely to cause that person or another person physical or psychological harm;
+   19.	For military purposes;
+   20.	To engage in the unauthorized or unlicensed practice of any profession including, but not limited to, financial, legal, medical/health, or other professional practices.

enhance_a_video/__init__.py ADDED Viewed

File without changes

enhance_a_video/enhance.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import torch
+from einops import rearrange
+from .globals import get_enhance_weight, get_num_frames
+@torch.compiler.disable()
+def get_feta_scores(query, key):
+    img_q, img_k = query, key
+    num_frames = get_num_frames()
+    B, S, N, C = img_q.shape
+    # Calculate spatial dimension
+    spatial_dim = S // num_frames
+    # Add time dimension between spatial and head dims
+    query_image = img_q.reshape(B, spatial_dim, num_frames, N, C)
+    key_image = img_k.reshape(B, spatial_dim, num_frames, N, C)
+    # Expand time dimension
+    query_image = query_image.expand(-1, -1, num_frames, -1, -1)  # [B, S, T, N, C]
+    key_image = key_image.expand(-1, -1, num_frames, -1, -1)      # [B, S, T, N, C]
+    # Reshape to match feta_score input format: [(B S) N T C]
+    query_image = rearrange(query_image, "b s t n c -> (b s) n t c")  #torch.Size([3200, 24, 5, 128])
+    key_image = rearrange(key_image, "b s t n c -> (b s) n t c")
+    return feta_score(query_image, key_image, C, num_frames)
+@torch.compiler.disable()
+def feta_score(query_image, key_image, head_dim, num_frames):
+    scale = head_dim**-0.5
+    query_image = query_image * scale
+    attn_temp = query_image @ key_image.transpose(-2, -1)  # translate attn to float32
+    attn_temp = attn_temp.to(torch.float32)
+    attn_temp = attn_temp.softmax(dim=-1)
+    # Reshape to [batch_size * num_tokens, num_frames, num_frames]
+    attn_temp = attn_temp.reshape(-1, num_frames, num_frames)
+    # Create a mask for diagonal elements
+    diag_mask = torch.eye(num_frames, device=attn_temp.device).bool()
+    diag_mask = diag_mask.unsqueeze(0).expand(attn_temp.shape[0], -1, -1)
+    # Zero out diagonal elements
+    attn_wo_diag = attn_temp.masked_fill(diag_mask, 0)
+    # Calculate mean for each token's attention matrix
+    # Number of off-diagonal elements per matrix is n*n - n
+    num_off_diag = num_frames * num_frames - num_frames
+    mean_scores = attn_wo_diag.sum(dim=(1, 2)) / num_off_diag
+    enhance_scores = mean_scores.mean() * (num_frames + get_enhance_weight())
+    enhance_scores = enhance_scores.clamp(min=1)
+    return enhance_scores

enhance_a_video/globals.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import torch
+NUM_FRAMES = None
+FETA_WEIGHT = None
+ENABLE_FETA= False
+@torch.compiler.disable()
+def set_num_frames(num_frames: int):
+    global NUM_FRAMES
+    NUM_FRAMES = num_frames
+@torch.compiler.disable()
+def get_num_frames() -> int:
+    return NUM_FRAMES
+def enable_enhance():
+    global ENABLE_FETA
+    ENABLE_FETA = True
+def disable_enhance():
+    global ENABLE_FETA
+    ENABLE_FETA = False
+@torch.compiler.disable()
+def is_enhance_enabled() -> bool:
+    return ENABLE_FETA
+@torch.compiler.disable()
+def set_enhance_weight(feta_weight: float):
+    global FETA_WEIGHT
+    FETA_WEIGHT = feta_weight
+@torch.compiler.disable()
+def get_enhance_weight() -> float:
+    return FETA_WEIGHT

example_workflows/example_inputs/MTV_crafter_example_pose.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:57f1d09148ca2bd7994de0647d73ed5a44fb186f4d300265edd9571a912f8a0d
+size 318276

example_workflows/example_inputs/env.png ADDED Viewed

Git LFS Details

SHA256: 61d79eed86a7fd7d831dfc16ce0091f1da725690ee7e8a516f75b42d1c31624f
Pointer size: 132 Bytes
Size of remote file: 1.23 MB

example_workflows/example_inputs/human.png ADDED Viewed

Git LFS Details

SHA256: 3ee49acf2e6251ef452230019c9394addf95a34c784c421c50d4e3ccc664ed2f
Pointer size: 131 Bytes
Size of remote file: 210 kB

example_workflows/example_inputs/jeep.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:67dc9e4ce73a7289901b159755953652965a17939fe43aedad43381934b32f55
+size 185636

example_workflows/example_inputs/thing.png ADDED Viewed

example_workflows/example_inputs/wolf_interpolated.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:56fadca209868e8930dc12bd825a2aa8bab822f0152812aaa2aeefd46176c74b
+size 194949

example_workflows/example_inputs/woman.jpg ADDED Viewed

Git LFS Details

SHA256: 5662b7d55d57749a8ed53267be076b13234d2e2de445fdd5b58a695f894a40de
Pointer size: 131 Bytes
Size of remote file: 197 kB

example_workflows/example_inputs/woman.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e08584293621824d039c264132d90b654bede740f67d9384979544e3e2abfacc
+size 1765454

example_workflows/wanvideo2_2_I2V_A14B_example_WIP.json ADDED Viewed

	@@ -0,0 +1,2074 @@

+{
+  "id": "c6e410bc-5e2c-460b-ae81-c91b6094fbb1",
+  "revision": 0,
+  "last_node_id": 97,
+  "last_link_id": 169,
+  "nodes": [
+    {
+      "id": 50,
+      "type": "CLIPTextEncode",
+      "pos": [
+        354.00396728515625,
+        922.6547241210938
+      ],
+      "size": [
+        400,
+        200
+      ],
+      "flags": {},
+      "order": 13,
+      "mode": 2,
+      "inputs": [
+        {
+          "name": "clip",
+          "type": "CLIP",
+          "link": 53
+        }
+      ],
+      "outputs": [
+        {
+          "name": "CONDITIONING",
+          "type": "CONDITIONING",
+          "slot_index": 0,
+          "links": [
+            55
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfy-core",
+        "ver": "0.3.44",
+        "Node name for S&R": "CLIPTextEncode"
+      },
+      "widgets_values": [
+        "色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走"
+      ],
+      "color": "#432",
+      "bgcolor": "#653"
+    },
+    {
+      "id": 48,
+      "type": "CLIPLoader",
+      "pos": [
+        -5.996182441711426,
+        672.6546020507812
+      ],
+      "size": [
+        315,
+        106
+      ],
+      "flags": {},
+      "order": 0,
+      "mode": 2,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "CLIP",
+          "type": "CLIP",
+          "slot_index": 0,
+          "links": [
+            52,
+            53
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfy-core",
+        "ver": "0.3.44",
+        "Node name for S&R": "CLIPLoader"
+      },
+      "widgets_values": [
+        "umt5_xxl_fp16.safetensors",
+        "wan",
+        "default"
+      ],
+      "color": "#432",
+      "bgcolor": "#653"
+    },
+    {
+      "id": 51,
+      "type": "Note",
+      "pos": [
+        24.003835678100586,
+        502.65411376953125
+      ],
+      "size": [
+        253.16725158691406,
+        88
+      ],
+      "flags": {},
+      "order": 1,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [],
+      "properties": {},
+      "widgets_values": [
+        "You can also use native ComfyUI text encoding with these nodes instead of the original, the models are node specific and can't otherwise be mixed."
+      ],
+      "color": "#432",
+      "bgcolor": "#653"
+    },
+    {
+      "id": 49,
+      "type": "CLIPTextEncode",
+      "pos": [
+        354.00396728515625,
+        672.6546020507812
+      ],
+      "size": [
+        400,
+        200
+      ],
+      "flags": {},
+      "order": 12,
+      "mode": 2,
+      "inputs": [
+        {
+          "name": "clip",
+          "type": "CLIP",
+          "link": 52
+        }
+      ],
+      "outputs": [
+        {
+          "name": "CONDITIONING",
+          "type": "CONDITIONING",
+          "slot_index": 0,
+          "links": [
+            54
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfy-core",
+        "ver": "0.3.44",
+        "Node name for S&R": "CLIPTextEncode"
+      },
+      "widgets_values": [
+        "high quality nature video featuring a red panda balancing on a bamboo stem while a bird lands on it's head, on the background there is a waterfall"
+      ],
+      "color": "#432",
+      "bgcolor": "#653"
+    },
+    {
+      "id": 46,
+      "type": "WanVideoTextEmbedBridge",
+      "pos": [
+        804.0042724609375,
+        662.6546020507812
+      ],
+      "size": [
+        315,
+        46
+      ],
+      "flags": {},
+      "order": 19,
+      "mode": 2,
+      "inputs": [
+        {
+          "name": "positive",
+          "type": "CONDITIONING",
+          "link": 54
+        },
+        {
+          "name": "negative",
+          "shape": 7,
+          "type": "CONDITIONING",
+          "link": 55
+        }
+      ],
+      "outputs": [
+        {
+          "name": "text_embeds",
+          "type": "WANVIDEOTEXTEMBEDS",
+          "links": null
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
+        "Node name for S&R": "WanVideoTextEmbedBridge"
+      },
+      "widgets_values": []
+    },
+    {
+      "id": 44,
+      "type": "Note",
+      "pos": [
+        -960,
+        -810
+      ],
+      "size": [
+        303.0501403808594,
+        88
+      ],
+      "flags": {},
+      "order": 2,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [],
+      "properties": {},
+      "widgets_values": [
+        "If you have Triton installed, connect this for ~30% speed increase"
+      ],
+      "color": "#432",
+      "bgcolor": "#653"
+    },
+    {
+      "id": 35,
+      "type": "WanVideoTorchCompileSettings",
+      "pos": [
+        -550,
+        -870
+      ],
+      "size": [
+        390.5999755859375,
+        202
+      ],
+      "flags": {},
+      "order": 3,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "torch_compile_args",
+          "type": "WANCOMPILEARGS",
+          "slot_index": 0,
+          "links": [
+            111,
+            112
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
+        "Node name for S&R": "WanVideoTorchCompileSettings"
+      },
+      "widgets_values": [
+        "inductor",
+        false,
+        "default",
+        false,
+        64,
+        true,
+        128
+      ]
+    },
+    {
+      "id": 22,
+      "type": "WanVideoModelLoader",
+      "pos": [
+        -10,
+        -740
+      ],
+      "size": [
+        477.4410095214844,
+        274
+      ],
+      "flags": {},
+      "order": 14,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "compile_args",
+          "shape": 7,
+          "type": "WANCOMPILEARGS",
+          "link": 111
+        },
+        {
+          "name": "block_swap_args",
+          "shape": 7,
+          "type": "BLOCKSWAPARGS",
+          "link": null
+        },
+        {
+          "name": "lora",
+          "shape": 7,
+          "type": "WANVIDLORA",
+          "link": null
+        },
+        {
+          "name": "vram_management_args",
+          "shape": 7,
+          "type": "VRAM_MANAGEMENTARGS",
+          "link": null
+        },
+        {
+          "name": "vace_model",
+          "shape": 7,
+          "type": "VACEPATH",
+          "link": null
+        },
+        {
+          "name": "fantasytalking_model",
+          "shape": 7,
+          "type": "FANTASYTALKINGMODEL",
+          "link": null
+        },
+        {
+          "name": "multitalk_model",
+          "shape": 7,
+          "type": "MULTITALKMODEL",
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "name": "model",
+          "type": "WANVIDEOMODEL",
+          "slot_index": 0,
+          "links": [
+            155
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
+        "Node name for S&R": "WanVideoModelLoader"
+      },
+      "widgets_values": [
+        "WanVideo\\2_2\\Wan2_2-I2V-A14B-HIGH_fp8_e4m3fn_scaled_KJ.safetensors",
+        "fp16_fast",
+        "fp8_e4m3fn_scaled",
+        "offload_device",
+        "sageattn"
+      ],
+      "color": "#223",
+      "bgcolor": "#335"
+    },
+    {
+      "id": 71,
+      "type": "WanVideoModelLoader",
+      "pos": [
+        -10,
+        -380
+      ],
+      "size": [
+        477.4410095214844,
+        274
+      ],
+      "flags": {},
+      "order": 15,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "compile_args",
+          "shape": 7,
+          "type": "WANCOMPILEARGS",
+          "link": 112
+        },
+        {
+          "name": "block_swap_args",
+          "shape": 7,
+          "type": "BLOCKSWAPARGS",
+          "link": null
+        },
+        {
+          "name": "lora",
+          "shape": 7,
+          "type": "WANVIDLORA",
+          "link": null
+        },
+        {
+          "name": "vram_management_args",
+          "shape": 7,
+          "type": "VRAM_MANAGEMENTARGS",
+          "link": null
+        },
+        {
+          "name": "vace_model",
+          "shape": 7,
+          "type": "VACEPATH",
+          "link": null
+        },
+        {
+          "name": "fantasytalking_model",
+          "shape": 7,
+          "type": "FANTASYTALKINGMODEL",
+          "link": null
+        },
+        {
+          "name": "multitalk_model",
+          "shape": 7,
+          "type": "MULTITALKMODEL",
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "name": "model",
+          "type": "WANVIDEOMODEL",
+          "slot_index": 0,
+          "links": [
+            160
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
+        "Node name for S&R": "WanVideoModelLoader"
+      },
+      "widgets_values": [
+        "WanVideo\\2_2\\Wan2_2-I2V-A14B-LOW_fp8_e4m3fn_scaled_KJ.safetensors",
+        "fp16_fast",
+        "fp8_e4m3fn_scaled",
+        "offload_device",
+        "sageattn"
+      ],
+      "color": "#223",
+      "bgcolor": "#335"
+    },
+    {
+      "id": 92,
+      "type": "WanVideoSetBlockSwap",
+      "pos": [
+        538.2930908203125,
+        -699.6842041015625
+      ],
+      "size": [
+        201.76815795898438,
+        46
+      ],
+      "flags": {},
+      "order": 20,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "model",
+          "type": "WANVIDEOMODEL",
+          "link": 155
+        },
+        {
+          "name": "block_swap_args",
+          "shape": 7,
+          "type": "BLOCKSWAPARGS",
+          "link": 156
+        }
+      ],
+      "outputs": [
+        {
+          "name": "model",
+          "type": "WANVIDEOMODEL",
+          "links": [
+            157
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "7e290c67bff1f906cdab84523018573f6c9d4d7f",
+        "Node name for S&R": "WanVideoSetBlockSwap"
+      },
+      "color": "#223",
+      "bgcolor": "#335"
+    },
+    {
+      "id": 93,
+      "type": "WanVideoSetBlockSwap",
+      "pos": [
+        548.8602294921875,
+        -260.6182556152344
+      ],
+      "size": [
+        201.76815795898438,
+        46
+      ],
+      "flags": {},
+      "order": 21,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "model",
+          "type": "WANVIDEOMODEL",
+          "link": 160
+        },
+        {
+          "name": "block_swap_args",
+          "shape": 7,
+          "type": "BLOCKSWAPARGS",
+          "link": 159
+        }
+      ],
+      "outputs": [
+        {
+          "name": "model",
+          "type": "WANVIDEOMODEL",
+          "links": [
+            161
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "7e290c67bff1f906cdab84523018573f6c9d4d7f",
+        "Node name for S&R": "WanVideoSetBlockSwap"
+      },
+      "color": "#223",
+      "bgcolor": "#335"
+    },
+    {
+      "id": 79,
+      "type": "WanVideoSetLoRAs",
+      "pos": [
+        969.6483764648438,
+        -216.53614807128906
+      ],
+      "size": [
+        222.27981567382812,
+        46
+      ],
+      "flags": {},
+      "order": 24,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "model",
+          "type": "WANVIDEOMODEL",
+          "link": 161
+        },
+        {
+          "name": "lora",
+          "shape": 7,
+          "type": "WANVIDLORA",
+          "link": 169
+        }
+      ],
+      "outputs": [
+        {
+          "name": "model",
+          "type": "WANVIDEOMODEL",
+          "links": [
+            144
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
+        "Node name for S&R": "WanVideoSetLoRAs"
+      },
+      "widgets_values": [],
+      "color": "#223",
+      "bgcolor": "#335"
+    },
+    {
+      "id": 38,
+      "type": "WanVideoVAELoader",
+      "pos": [
+        1373.9725341796875,
+        -991.5189208984375
+      ],
+      "size": [
+        315,
+        82
+      ],
+      "flags": {},
+      "order": 4,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "vae",
+          "type": "WANVAE",
+          "slot_index": 0,
+          "links": [
+            43,
+            137
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
+        "Node name for S&R": "WanVideoVAELoader"
+      },
+      "widgets_values": [
+        "wanvideo\\Wan2_1_VAE_bf16.safetensors",
+        "bf16"
+      ],
+      "color": "#322",
+      "bgcolor": "#533"
+    },
+    {
+      "id": 67,
+      "type": "LoadImage",
+      "pos": [
+        330.2861633300781,
+        -1144.739013671875
+      ],
+      "size": [
+        274.080078125,
+        314
+      ],
+      "flags": {},
+      "order": 5,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [
+            71
+          ]
+        },
+        {
+          "name": "MASK",
+          "type": "MASK",
+          "links": null
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfy-core",
+        "ver": "0.3.44",
+        "Node name for S&R": "LoadImage"
+      },
+      "widgets_values": [
+        "oldman_upscaled.png",
+        "image"
+      ]
+    },
+    {
+      "id": 28,
+      "type": "WanVideoDecode",
+      "pos": [
+        2620.946533203125,
+        -519.3373413085938
+      ],
+      "size": [
+        315,
+        198
+      ],
+      "flags": {},
+      "order": 27,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "vae",
+          "type": "WANVAE",
+          "link": 43
+        },
+        {
+          "name": "samples",
+          "type": "LATENT",
+          "link": 151
+        }
+      ],
+      "outputs": [
+        {
+          "name": "images",
+          "type": "IMAGE",
+          "slot_index": 0,
+          "links": [
+            76
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
+        "Node name for S&R": "WanVideoDecode"
+      },
+      "widgets_values": [
+        false,
+        272,
+        272,
+        144,
+        128,
+        "default"
+      ],
+      "color": "#322",
+      "bgcolor": "#533"
+    },
+    {
+      "id": 91,
+      "type": "INTConstant",
+      "pos": [
+        1554.34130859375,
+        293.0675964355469
+      ],
+      "size": [
+        200,
+        58
+      ],
+      "flags": {},
+      "order": 6,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "value",
+          "type": "INT",
+          "links": [
+            153,
+            154
+          ]
+        }
+      ],
+      "title": "Split_step",
+      "properties": {
+        "cnr_id": "comfyui-kjnodes",
+        "ver": "a6b867b63a29ca48ddb15c589e17a9f2d8530d57",
+        "Node name for S&R": "INTConstant"
+      },
+      "widgets_values": [
+        3
+      ],
+      "color": "#1b4669",
+      "bgcolor": "#29699c"
+    },
+    {
+      "id": 80,
+      "type": "WanVideoSetLoRAs",
+      "pos": [
+        985.8822021484375,
+        -458.0684814453125
+      ],
+      "size": [
+        222.27981567382812,
+        46
+      ],
+      "flags": {},
+      "order": 23,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "model",
+          "type": "WANVIDEOMODEL",
+          "link": 157
+        },
+        {
+          "name": "lora",
+          "shape": 7,
+          "type": "WANVIDLORA",
+          "link": 110
+        }
+      ],
+      "outputs": [
+        {
+          "name": "model",
+          "type": "WANVIDEOMODEL",
+          "links": [
+            109
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
+        "Node name for S&R": "WanVideoSetLoRAs"
+      },
+      "widgets_values": [],
+      "color": "#223",
+      "bgcolor": "#335"
+    },
+    {
+      "id": 69,
+      "type": "GetImageSizeAndCount",
+      "pos": [
+        2725.78076171875,
+        -249.85873413085938
+      ],
+      "size": [
+        240.41265869140625,
+        86
+      ],
+      "flags": {},
+      "order": 28,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "image",
+          "type": "IMAGE",
+          "link": 76
+        }
+      ],
+      "outputs": [
+        {
+          "name": "image",
+          "type": "IMAGE",
+          "links": [
+            77
+          ]
+        },
+        {
+          "label": "704 width",
+          "name": "width",
+          "type": "INT",
+          "links": null
+        },
+        {
+          "label": "704 height",
+          "name": "height",
+          "type": "INT",
+          "links": null
+        },
+        {
+          "label": "81 count",
+          "name": "count",
+          "type": "INT",
+          "links": null
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfyui-kjnodes",
+        "ver": "a6b867b63a29ca48ddb15c589e17a9f2d8530d57",
+        "Node name for S&R": "GetImageSizeAndCount"
+      },
+      "widgets_values": []
+    },
+    {
+      "id": 27,
+      "type": "WanVideoSampler",
+      "pos": [
+        1833.1953125,
+        -394.93426513671875
+      ],
+      "size": [
+        315,
+        975
+      ],
+      "flags": {},
+      "order": 25,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "model",
+          "type": "WANVIDEOMODEL",
+          "link": 109
+        },
+        {
+          "name": "image_embeds",
+          "type": "WANVIDIMAGE_EMBEDS",
+          "link": 138
+        },
+        {
+          "name": "text_embeds",
+          "shape": 7,
+          "type": "WANVIDEOTEXTEMBEDS",
+          "link": 30
+        },
+        {
+          "name": "samples",
+          "shape": 7,
+          "type": "LATENT",
+          "link": null
+        },
+        {
+          "name": "feta_args",
+          "shape": 7,
+          "type": "FETAARGS",
+          "link": null
+        },
+        {
+          "name": "context_options",
+          "shape": 7,
+          "type": "WANVIDCONTEXT",
+          "link": null
+        },
+        {
+          "name": "cache_args",
+          "shape": 7,
+          "type": "CACHEARGS",
+          "link": null
+        },
+        {
+          "name": "flowedit_args",
+          "shape": 7,
+          "type": "FLOWEDITARGS",
+          "link": null
+        },
+        {
+          "name": "slg_args",
+          "shape": 7,
+          "type": "SLGARGS",
+          "link": null
+        },
+        {
+          "name": "loop_args",
+          "shape": 7,
+          "type": "LOOPARGS",
+          "link": null
+        },
+        {
+          "name": "experimental_args",
+          "shape": 7,
+          "type": "EXPERIMENTALARGS",
+          "link": null
+        },
+        {
+          "name": "sigmas",
+          "shape": 7,
+          "type": "SIGMAS",
+          "link": null
+        },
+        {
+          "name": "unianimate_poses",
+          "shape": 7,
+          "type": "UNIANIMATE_POSE",
+          "link": null
+        },
+        {
+          "name": "fantasytalking_embeds",
+          "shape": 7,
+          "type": "FANTASYTALKING_EMBEDS",
+          "link": null
+        },
+        {
+          "name": "uni3c_embeds",
+          "shape": 7,
+          "type": "UNI3C_EMBEDS",
+          "link": null
+        },
+        {
+          "name": "multitalk_embeds",
+          "shape": 7,
+          "type": "MULTITALK_EMBEDS",
+          "link": null
+        },
+        {
+          "name": "freeinit_args",
+          "shape": 7,
+          "type": "FREEINITARGS",
+          "link": null
+        },
+        {
+          "name": "steps",
+          "type": "INT",
+          "widget": {
+            "name": "steps"
+          },
+          "link": 163
+        },
+        {
+          "name": "cfg",
+          "type": "FLOAT",
+          "widget": {
+            "name": "cfg"
+          },
+          "link": 167
+        },
+        {
+          "name": "end_step",
+          "shape": 7,
+          "type": "INT",
+          "widget": {
+            "name": "end_step"
+          },
+          "link": 153
+        }
+      ],
+      "outputs": [
+        {
+          "name": "samples",
+          "type": "LATENT",
+          "slot_index": 0,
+          "links": [
+            143
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
+        "Node name for S&R": "WanVideoSampler"
+      },
+      "widgets_values": [
+        6,
+        1,
+        8,
+        43,
+        "fixed",
+        true,
+        "dpm++_sde",
+        0,
+        1,
+        false,
+        "comfy",
+        0,
+        10,
+        ""
+      ]
+    },
+    {
+      "id": 90,
+      "type": "WanVideoSampler",
+      "pos": [
+        2206.047119140625,
+        -393.0194396972656
+      ],
+      "size": [
+        315,
+        975
+      ],
+      "flags": {},
+      "order": 26,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "model",
+          "type": "WANVIDEOMODEL",
+          "link": 144
+        },
+        {
+          "name": "image_embeds",
+          "type": "WANVIDIMAGE_EMBEDS",
+          "link": 149
+        },
+        {
+          "name": "text_embeds",
+          "shape": 7,
+          "type": "WANVIDEOTEXTEMBEDS",
+          "link": 152
+        },
+        {
+          "name": "samples",
+          "shape": 7,
+          "type": "LATENT",
+          "link": 143
+        },
+        {
+          "name": "feta_args",
+          "shape": 7,
+          "type": "FETAARGS",
+          "link": null
+        },
+        {
+          "name": "context_options",
+          "shape": 7,
+          "type": "WANVIDCONTEXT",
+          "link": null
+        },
+        {
+          "name": "cache_args",
+          "shape": 7,
+          "type": "CACHEARGS",
+          "link": null
+        },
+        {
+          "name": "flowedit_args",
+          "shape": 7,
+          "type": "FLOWEDITARGS",
+          "link": null
+        },
+        {
+          "name": "slg_args",
+          "shape": 7,
+          "type": "SLGARGS",
+          "link": null
+        },
+        {
+          "name": "loop_args",
+          "shape": 7,
+          "type": "LOOPARGS",
+          "link": null
+        },
+        {
+          "name": "experimental_args",
+          "shape": 7,
+          "type": "EXPERIMENTALARGS",
+          "link": null
+        },
+        {
+          "name": "sigmas",
+          "shape": 7,
+          "type": "SIGMAS",
+          "link": null
+        },
+        {
+          "name": "unianimate_poses",
+          "shape": 7,
+          "type": "UNIANIMATE_POSE",
+          "link": null
+        },
+        {
+          "name": "fantasytalking_embeds",
+          "shape": 7,
+          "type": "FANTASYTALKING_EMBEDS",
+          "link": null
+        },
+        {
+          "name": "uni3c_embeds",
+          "shape": 7,
+          "type": "UNI3C_EMBEDS",
+          "link": null
+        },
+        {
+          "name": "multitalk_embeds",
+          "shape": 7,
+          "type": "MULTITALK_EMBEDS",
+          "link": null
+        },
+        {
+          "name": "freeinit_args",
+          "shape": 7,
+          "type": "FREEINITARGS",
+          "link": null
+        },
+        {
+          "name": "steps",
+          "type": "INT",
+          "widget": {
+            "name": "steps"
+          },
+          "link": 164
+        },
+        {
+          "name": "start_step",
+          "shape": 7,
+          "type": "INT",
+          "widget": {
+            "name": "start_step"
+          },
+          "link": 154
+        }
+      ],
+      "outputs": [
+        {
+          "name": "samples",
+          "type": "LATENT",
+          "slot_index": 0,
+          "links": [
+            151
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
+        "Node name for S&R": "WanVideoSampler"
+      },
+      "widgets_values": [
+        6,
+        1,
+        8,
+        43,
+        "fixed",
+        true,
+        "dpm++_sde",
+        0,
+        1,
+        false,
+        "comfy",
+        10,
+        -1,
+        ""
+      ]
+    },
+    {
+      "id": 94,
+      "type": "INTConstant",
+      "pos": [
+        1446.0140380859375,
+        -77.41889953613281
+      ],
+      "size": [
+        200,
+        58
+      ],
+      "flags": {},
+      "order": 7,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "value",
+          "type": "INT",
+          "links": [
+            163,
+            164,
+            165
+          ]
+        }
+      ],
+      "title": "Steps",
+      "properties": {
+        "cnr_id": "comfyui-kjnodes",
+        "ver": "a6b867b63a29ca48ddb15c589e17a9f2d8530d57",
+        "Node name for S&R": "INTConstant"
+      },
+      "widgets_values": [
+        6
+      ],
+      "color": "#1b4669",
+      "bgcolor": "#29699c"
+    },
+    {
+      "id": 95,
+      "type": "CreateCFGScheduleFloatList",
+      "pos": [
+        1455.8336181640625,
+        44.19880294799805
+      ],
+      "size": [
+        298.3199157714844,
+        178
+      ],
+      "flags": {},
+      "order": 17,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "steps",
+          "type": "INT",
+          "widget": {
+            "name": "steps"
+          },
+          "link": 165
+        }
+      ],
+      "outputs": [
+        {
+          "name": "float_list",
+          "type": "FLOAT",
+          "links": [
+            167
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "7e290c67bff1f906cdab84523018573f6c9d4d7f",
+        "Node name for S&R": "CreateCFGScheduleFloatList"
+      },
+      "widgets_values": [
+        30,
+        2,
+        2,
+        "linear",
+        0,
+        0.01
+      ]
+    },
+    {
+      "id": 97,
+      "type": "WanVideoLoraSelect",
+      "pos": [
+        -717.8909301757812,
+        -252.48538208007812
+      ],
+      "size": [
+        624.4888305664062,
+        150
+      ],
+      "flags": {},
+      "order": 8,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "prev_lora",
+          "shape": 7,
+          "type": "WANVIDLORA",
+          "link": null
+        },
+        {
+          "name": "blocks",
+          "shape": 7,
+          "type": "SELECTEDBLOCKS",
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "name": "lora",
+          "type": "WANVIDLORA",
+          "links": [
+            169
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
+        "Node name for S&R": "WanVideoLoraSelect"
+      },
+      "widgets_values": [
+        "WanVideo\\Lightx2v\\lightx2v_I2V_14B_480p_cfg_step_distill_rank64_bf16.safetensors",
+        1,
+        false,
+        false
+      ],
+      "color": "#223",
+      "bgcolor": "#335"
+    },
+    {
+      "id": 56,
+      "type": "WanVideoLoraSelect",
+      "pos": [
+        -765.4373168945312,
+        -481.49810791015625
+      ],
+      "size": [
+        659.4812622070312,
+        150
+      ],
+      "flags": {},
+      "order": 9,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "prev_lora",
+          "shape": 7,
+          "type": "WANVIDLORA",
+          "link": null
+        },
+        {
+          "name": "blocks",
+          "shape": 7,
+          "type": "SELECTEDBLOCKS",
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "name": "lora",
+          "type": "WANVIDLORA",
+          "links": [
+            110
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
+        "Node name for S&R": "WanVideoLoraSelect"
+      },
+      "widgets_values": [
+        "WanVideo\\Lightx2v\\lightx2v_I2V_14B_480p_cfg_step_distill_rank64_bf16.safetensors",
+        3,
+        false,
+        false
+      ],
+      "color": "#223",
+      "bgcolor": "#335"
+    },
+    {
+      "id": 11,
+      "type": "LoadWanVideoT5TextEncoder",
+      "pos": [
+        205.93421936035156,
+        -21.262622833251953
+      ],
+      "size": [
+        377.1661376953125,
+        130
+      ],
+      "flags": {},
+      "order": 10,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "wan_t5_model",
+          "type": "WANTEXTENCODER",
+          "slot_index": 0,
+          "links": [
+            15
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
+        "Node name for S&R": "LoadWanVideoT5TextEncoder"
+      },
+      "widgets_values": [
+        "umt5-xxl-enc-bf16.safetensors",
+        "bf16",
+        "offload_device",
+        "disabled"
+      ],
+      "color": "#332922",
+      "bgcolor": "#593930"
+    },
+    {
+      "id": 60,
+      "type": "VHS_VideoCombine",
+      "pos": [
+        3150,
+        -390
+      ],
+      "size": [
+        698.6392211914062,
+        1026.63916015625
+      ],
+      "flags": {},
+      "order": 29,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "images",
+          "type": "IMAGE",
+          "link": 77
+        },
+        {
+          "name": "audio",
+          "shape": 7,
+          "type": "AUDIO",
+          "link": null
+        },
+        {
+          "name": "meta_batch",
+          "shape": 7,
+          "type": "VHS_BatchManager",
+          "link": null
+        },
+        {
+          "name": "vae",
+          "shape": 7,
+          "type": "VAE",
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "name": "Filenames",
+          "type": "VHS_FILENAMES",
+          "links": null
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfyui-videohelpersuite",
+        "ver": "0a75c7958fe320efcb052f1d9f8451fd20c730a8",
+        "Node name for S&R": "VHS_VideoCombine"
+      },
+      "widgets_values": {
+        "frame_rate": 16,
+        "loop_count": 0,
+        "filename_prefix": "WanVideo2_2_I2V",
+        "format": "video/h264-mp4",
+        "pix_fmt": "yuv420p",
+        "crf": 19,
+        "save_metadata": true,
+        "trim_to_audio": false,
+        "pingpong": false,
+        "save_output": false,
+        "videopreview": {
+          "hidden": false,
+          "paused": false,
+          "params": {
+            "filename": "WanVideo2_2_I2V_00006.mp4",
+            "subfolder": "",
+            "type": "temp",
+            "format": "video/h264-mp4",
+            "frame_rate": 16,
+            "workflow": "WanVideo2_2_I2V_00006.png",
+            "fullpath": "N:\\AI\\ComfyUI\\temp\\WanVideo2_2_I2V_00006.mp4"
+          }
+        }
+      }
+    },
+    {
+      "id": 39,
+      "type": "WanVideoBlockSwap",
+      "pos": [
+        516.8650512695312,
+        -526.5733642578125
+      ],
+      "size": [
+        315,
+        154
+      ],
+      "flags": {},
+      "order": 11,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "block_swap_args",
+          "type": "BLOCKSWAPARGS",
+          "slot_index": 0,
+          "links": [
+            156,
+            159
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
+        "Node name for S&R": "WanVideoBlockSwap"
+      },
+      "widgets_values": [
+        20,
+        false,
+        false,
+        false,
+        1
+      ],
+      "color": "#223",
+      "bgcolor": "#335"
+    },
+    {
+      "id": 89,
+      "type": "WanVideoImageToVideoEncode",
+      "pos": [
+        1401.11962890625,
+        -698.7300415039062
+      ],
+      "size": [
+        308.2320251464844,
+        390
+      ],
+      "flags": {},
+      "order": 22,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "vae",
+          "type": "WANVAE",
+          "link": 137
+        },
+        {
+          "name": "clip_embeds",
+          "shape": 7,
+          "type": "WANVIDIMAGE_CLIPEMBEDS",
+          "link": null
+        },
+        {
+          "name": "start_image",
+          "shape": 7,
+          "type": "IMAGE",
+          "link": 139
+        },
+        {
+          "name": "end_image",
+          "shape": 7,
+          "type": "IMAGE",
+          "link": null
+        },
+        {
+          "name": "control_embeds",
+          "shape": 7,
+          "type": "WANVIDIMAGE_EMBEDS",
+          "link": null
+        },
+        {
+          "name": "temporal_mask",
+          "shape": 7,
+          "type": "MASK",
+          "link": null
+        },
+        {
+          "name": "extra_latents",
+          "shape": 7,
+          "type": "LATENT",
+          "link": null
+        },
+        {
+          "name": "add_cond_latents",
+          "shape": 7,
+          "type": "ADD_COND_LATENTS",
+          "link": null
+        },
+        {
+          "name": "width",
+          "type": "INT",
+          "widget": {
+            "name": "width"
+          },
+          "link": 141
+        },
+        {
+          "name": "height",
+          "type": "INT",
+          "widget": {
+            "name": "height"
+          },
+          "link": 142
+        }
+      ],
+      "outputs": [
+        {
+          "name": "image_embeds",
+          "type": "WANVIDIMAGE_EMBEDS",
+          "links": [
+            138,
+            149
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "7e290c67bff1f906cdab84523018573f6c9d4d7f",
+        "Node name for S&R": "WanVideoImageToVideoEncode"
+      },
+      "widgets_values": [
+        832,
+        480,
+        81,
+        0,
+        1,
+        1,
+        true,
+        false,
+        false
+      ],
+      "color": "#322",
+      "bgcolor": "#533"
+    },
+    {
+      "id": 16,
+      "type": "WanVideoTextEncode",
+      "pos": [
+        675.8850708007812,
+        -36.032100677490234
+      ],
+      "size": [
+        474.3573303222656,
+        316.48370361328125
+      ],
+      "flags": {},
+      "order": 18,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "t5",
+          "shape": 7,
+          "type": "WANTEXTENCODER",
+          "link": 15
+        },
+        {
+          "name": "model_to_offload",
+          "shape": 7,
+          "type": "WANVIDEOMODEL",
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "name": "text_embeds",
+          "type": "WANVIDEOTEXTEMBEDS",
+          "slot_index": 0,
+          "links": [
+            30,
+            152
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
+        "Node name for S&R": "WanVideoTextEncode"
+      },
+      "widgets_values": [
+        "old man gets up and jumps into the lake",
+        "色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走",
+        true,
+        false,
+        "gpu"
+      ],
+      "color": "#332922",
+      "bgcolor": "#593930"
+    },
+    {
+      "id": 68,
+      "type": "ImageResizeKJv2",
+      "pos": [
+        696.0801391601562,
+        -1143.5843505859375
+      ],
+      "size": [
+        270,
+        336
+      ],
+      "flags": {},
+      "order": 16,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "image",
+          "type": "IMAGE",
+          "link": 71
+        },
+        {
+          "name": "mask",
+          "shape": 7,
+          "type": "MASK",
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [
+            139
+          ]
+        },
+        {
+          "name": "width",
+          "type": "INT",
+          "links": [
+            141
+          ]
+        },
+        {
+          "name": "height",
+          "type": "INT",
+          "links": [
+            142
+          ]
+        },
+        {
+          "name": "mask",
+          "type": "MASK",
+          "links": null
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfyui-kjnodes",
+        "ver": "a6b867b63a29ca48ddb15c589e17a9f2d8530d57",
+        "Node name for S&R": "ImageResizeKJv2"
+      },
+      "widgets_values": [
+        720,
+        720,
+        "lanczos",
+        "crop",
+        "0, 0, 0",
+        "center",
+        32,
+        "cpu",
+        "<tr><td>Output: </td><td><b>1</b> x <b>704</b> x <b>704 | 5.67MB</b></td></tr>"
+      ]
+    }
+  ],
+  "links": [
+    [
+      15,
+      11,
+      0,
+      16,
+      0,
+      "WANTEXTENCODER"
+    ],
+    [
+      30,
+      16,
+      0,
+      27,
+      2,
+      "WANVIDEOTEXTEMBEDS"
+    ],
+    [
+      43,
+      38,
+      0,
+      28,
+      0,
+      "VAE"
+    ],
+    [
+      52,
+      48,
+      0,
+      49,
+      0,
+      "CLIP"
+    ],
+    [
+      53,
+      48,
+      0,
+      50,
+      0,
+      "CLIP"
+    ],
+    [
+      54,
+      49,
+      0,
+      46,
+      0,
+      "CONDITIONING"
+    ],
+    [
+      55,
+      50,
+      0,
+      46,
+      1,
+      "CONDITIONING"
+    ],
+    [
+      71,
+      67,
+      0,
+      68,
+      0,
+      "IMAGE"
+    ],
+    [
+      76,
+      28,
+      0,
+      69,
+      0,
+      "IMAGE"
+    ],
+    [
+      77,
+      69,
+      0,
+      60,
+      0,
+      "IMAGE"
+    ],
+    [
+      109,
+      80,
+      0,
+      27,
+      0,
+      "WANVIDEOMODEL"
+    ],
+    [
+      110,
+      56,
+      0,
+      80,
+      1,
+      "WANVIDLORA"
+    ],
+    [
+      111,
+      35,
+      0,
+      22,
+      0,
+      "WANCOMPILEARGS"
+    ],
+    [
+      112,
+      35,
+      0,
+      71,
+      0,
+      "WANCOMPILEARGS"
+    ],
+    [
+      137,
+      38,
+      0,
+      89,
+      0,
+      "WANVAE"
+    ],
+    [
+      138,
+      89,
+      0,
+      27,
+      1,
+      "WANVIDIMAGE_EMBEDS"
+    ],
+    [
+      139,
+      68,
+      0,
+      89,
+      2,
+      "IMAGE"
+    ],
+    [
+      141,
+      68,
+      1,
+      89,
+      8,
+      "INT"
+    ],
+    [
+      142,
+      68,
+      2,
+      89,
+      9,
+      "INT"
+    ],
+    [
+      143,
+      27,
+      0,
+      90,
+      3,
+      "LATENT"
+    ],
+    [
+      144,
+      79,
+      0,
+      90,
+      0,
+      "WANVIDEOMODEL"
+    ],
+    [
+      149,
+      89,
+      0,
+      90,
+      1,
+      "WANVIDIMAGE_EMBEDS"
+    ],
+    [
+      151,
+      90,
+      0,
+      28,
+      1,
+      "LATENT"
+    ],
+    [
+      152,
+      16,
+      0,
+      90,
+      2,
+      "WANVIDEOTEXTEMBEDS"
+    ],
+    [
+      153,
+      91,
+      0,
+      27,
+      19,
+      "INT"
+    ],
+    [
+      154,
+      91,
+      0,
+      90,
+      18,
+      "INT"
+    ],
+    [
+      155,
+      22,
+      0,
+      92,
+      0,
+      "WANVIDEOMODEL"
+    ],
+    [
+      156,
+      39,
+      0,
+      92,
+      1,
+      "BLOCKSWAPARGS"
+    ],
+    [
+      157,
+      92,
+      0,
+      80,
+      0,
+      "WANVIDEOMODEL"
+    ],
+    [
+      159,
+      39,
+      0,
+      93,
+      1,
+      "BLOCKSWAPARGS"
+    ],
+    [
+      160,
+      71,
+      0,
+      93,
+      0,
+      "WANVIDEOMODEL"
+    ],
+    [
+      161,
+      93,
+      0,
+      79,
+      0,
+      "WANVIDEOMODEL"
+    ],
+    [
+      163,
+      94,
+      0,
+      27,
+      17,
+      "INT"
+    ],
+    [
+      164,
+      94,
+      0,
+      90,
+      17,
+      "INT"
+    ],
+    [
+      165,
+      94,
+      0,
+      95,
+      0,
+      "INT"
+    ],
+    [
+      167,
+      95,
+      0,
+      27,
+      18,
+      "FLOAT"
+    ],
+    [
+      169,
+      97,
+      0,
+      79,
+      1,
+      "WANVIDLORA"
+    ]
+  ],
+  "groups": [
+    {
+      "id": 1,
+      "title": "ComfyUI text encoding alternative",
+      "bounding": [
+        -68.81207275390625,
+        358.7208251953125,
+        1210.621337890625,
+        805.9080810546875
+      ],
+      "color": "#3f789e",
+      "font_size": 24,
+      "flags": {}
+    }
+  ],
+  "config": {},
+  "extra": {
+    "ds": {
+      "scale": 1.3109994191500771,
+      "offset": [
+        -2108.9731183881954,
+        172.10398659016835
+      ]
+    },
+    "node_versions": {
+      "ComfyUI-WanVideoWrapper": "5a2383621a05825d0d0437781afcb8552d9590fd",
+      "comfy-core": "0.3.26",
+      "ComfyUI-VideoHelperSuite": "0a75c7958fe320efcb052f1d9f8451fd20c730a8"
+    },
+    "VHS_latentpreview": true,
+    "VHS_latentpreviewrate": 0,
+    "VHS_MetadataImage": true,
+    "VHS_KeepIntermediate": true,
+    "frontendVersion": "1.24.1"
+  },
+  "version": 0.4
+}

example_workflows/wanvideo_14B_pusa_I2V_example_01.json ADDED Viewed

	@@ -0,0 +1,1326 @@

+{
+  "id": "206247b6-9fec-4ed2-8927-e4f388c674d4",
+  "revision": 0,
+  "last_node_id": 85,
+  "last_link_id": 119,
+  "nodes": [
+    {
+      "id": 46,
+      "type": "WanVideoTextEmbedBridge",
+      "pos": [
+        854.3115844726562,
+        664.818603515625
+      ],
+      "size": [
+        315,
+        46
+      ],
+      "flags": {},
+      "order": 13,
+      "mode": 2,
+      "inputs": [
+        {
+          "name": "positive",
+          "type": "CONDITIONING",
+          "link": 54
+        },
+        {
+          "name": "negative",
+          "shape": 7,
+          "type": "CONDITIONING",
+          "link": 55
+        }
+      ],
+      "outputs": [
+        {
+          "name": "text_embeds",
+          "type": "WANVIDEOTEXTEMBEDS",
+          "links": null
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "d9b1f4d1a5aea91d101ae97a54714a5861af3f50",
+        "Node name for S&R": "WanVideoTextEmbedBridge"
+      },
+      "widgets_values": []
+    },
+    {
+      "id": 50,
+      "type": "CLIPTextEncode",
+      "pos": [
+        404.3124084472656,
+        924.8187255859375
+      ],
+      "size": [
+        400,
+        200
+      ],
+      "flags": {},
+      "order": 10,
+      "mode": 2,
+      "inputs": [
+        {
+          "name": "clip",
+          "type": "CLIP",
+          "link": 53
+        }
+      ],
+      "outputs": [
+        {
+          "name": "CONDITIONING",
+          "type": "CONDITIONING",
+          "slot_index": 0,
+          "links": [
+            55
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfy-core",
+        "ver": "0.3.26",
+        "Node name for S&R": "CLIPTextEncode"
+      },
+      "widgets_values": [
+        "色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走"
+      ]
+    },
+    {
+      "id": 48,
+      "type": "CLIPLoader",
+      "pos": [
+        44.31245803833008,
+        674.818603515625
+      ],
+      "size": [
+        315,
+        106
+      ],
+      "flags": {},
+      "order": 0,
+      "mode": 2,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "CLIP",
+          "type": "CLIP",
+          "slot_index": 0,
+          "links": [
+            52,
+            53
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfy-core",
+        "ver": "0.3.26",
+        "Node name for S&R": "CLIPLoader"
+      },
+      "widgets_values": [
+        "umt5_xxl_fp16.safetensors",
+        "wan",
+        "default"
+      ]
+    },
+    {
+      "id": 49,
+      "type": "CLIPTextEncode",
+      "pos": [
+        404.3124084472656,
+        674.818603515625
+      ],
+      "size": [
+        400,
+        200
+      ],
+      "flags": {},
+      "order": 9,
+      "mode": 2,
+      "inputs": [
+        {
+          "name": "clip",
+          "type": "CLIP",
+          "link": 52
+        }
+      ],
+      "outputs": [
+        {
+          "name": "CONDITIONING",
+          "type": "CONDITIONING",
+          "slot_index": 0,
+          "links": [
+            54
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfy-core",
+        "ver": "0.3.26",
+        "Node name for S&R": "CLIPTextEncode"
+      },
+      "widgets_values": [
+        "high quality nature video featuring a red panda balancing on a bamboo stem while a bird lands on it's head, on the background there is a waterfall"
+      ]
+    },
+    {
+      "id": 51,
+      "type": "Note",
+      "pos": [
+        74.31259155273438,
+        504.8180847167969
+      ],
+      "size": [
+        253.16725158691406,
+        88
+      ],
+      "flags": {},
+      "order": 1,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [],
+      "properties": {},
+      "widgets_values": [
+        "You can also use native ComfyUI text encoding with these nodes instead of the original, the models are node specific and can't otherwise be mixed."
+      ],
+      "color": "#432",
+      "bgcolor": "#653"
+    },
+    {
+      "id": 11,
+      "type": "LoadWanVideoT5TextEncoder",
+      "pos": [
+        161.7229461669922,
+        -501.2225036621094
+      ],
+      "size": [
+        377.1661376953125,
+        130
+      ],
+      "flags": {},
+      "order": 2,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "wan_t5_model",
+          "type": "WANTEXTENCODER",
+          "slot_index": 0,
+          "links": [
+            15
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "d9b1f4d1a5aea91d101ae97a54714a5861af3f50",
+        "Node name for S&R": "LoadWanVideoT5TextEncoder"
+      },
+      "widgets_values": [
+        "umt5-xxl-enc-bf16.safetensors",
+        "bf16",
+        "offload_device",
+        "disabled"
+      ],
+      "color": "#332922",
+      "bgcolor": "#593930"
+    },
+    {
+      "id": 38,
+      "type": "WanVideoVAELoader",
+      "pos": [
+        169.25408935546875,
+        -322.9471740722656
+      ],
+      "size": [
+        372.7727966308594,
+        82
+      ],
+      "flags": {},
+      "order": 3,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "vae",
+          "type": "WANVAE",
+          "slot_index": 0,
+          "links": [
+            43,
+            88
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "d9b1f4d1a5aea91d101ae97a54714a5861af3f50",
+        "Node name for S&R": "WanVideoVAELoader"
+      },
+      "widgets_values": [
+        "wanvideo\\Wan2_1_VAE_bf16.safetensors",
+        "bf16"
+      ],
+      "color": "#322",
+      "bgcolor": "#533"
+    },
+    {
+      "id": 30,
+      "type": "VHS_VideoCombine",
+      "pos": [
+        1684.1597900390625,
+        -394.2595520019531
+      ],
+      "size": [
+        697.7421264648438,
+        1025.7421875
+      ],
+      "flags": {},
+      "order": 20,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "images",
+          "type": "IMAGE",
+          "link": 36
+        },
+        {
+          "name": "audio",
+          "shape": 7,
+          "type": "AUDIO",
+          "link": null
+        },
+        {
+          "name": "meta_batch",
+          "shape": 7,
+          "type": "VHS_BatchManager",
+          "link": null
+        },
+        {
+          "name": "vae",
+          "shape": 7,
+          "type": "VAE",
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "name": "Filenames",
+          "type": "VHS_FILENAMES",
+          "links": null
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfyui-videohelpersuite",
+        "ver": "0a75c7958fe320efcb052f1d9f8451fd20c730a8",
+        "Node name for S&R": "VHS_VideoCombine"
+      },
+      "widgets_values": {
+        "frame_rate": 16,
+        "loop_count": 0,
+        "filename_prefix": "WanVideoWrapper_I2V",
+        "format": "video/h264-mp4",
+        "pix_fmt": "yuv420p",
+        "crf": 19,
+        "save_metadata": true,
+        "trim_to_audio": false,
+        "pingpong": false,
+        "save_output": true,
+        "videopreview": {
+          "hidden": false,
+          "paused": false,
+          "params": {
+            "filename": "WanVideoWrapper_I2V_00240.mp4",
+            "subfolder": "",
+            "type": "output",
+            "format": "video/h264-mp4",
+            "frame_rate": 16,
+            "workflow": "WanVideoWrapper_I2V_00240.png",
+            "fullpath": "N:\\AI\\ComfyUI\\output\\WanVideoWrapper_I2V_00240.mp4"
+          }
+        }
+      }
+    },
+    {
+      "id": 28,
+      "type": "WanVideoDecode",
+      "pos": [
+        1688.0194091796875,
+        -647.6461791992188
+      ],
+      "size": [
+        315,
+        198
+      ],
+      "flags": {},
+      "order": 19,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "vae",
+          "type": "WANVAE",
+          "link": 43
+        },
+        {
+          "name": "samples",
+          "type": "LATENT",
+          "link": 117
+        }
+      ],
+      "outputs": [
+        {
+          "name": "images",
+          "type": "IMAGE",
+          "slot_index": 0,
+          "links": [
+            36
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "d9b1f4d1a5aea91d101ae97a54714a5861af3f50",
+        "Node name for S&R": "WanVideoDecode"
+      },
+      "widgets_values": [
+        false,
+        272,
+        272,
+        144,
+        128,
+        "default"
+      ],
+      "color": "#322",
+      "bgcolor": "#533"
+    },
+    {
+      "id": 68,
+      "type": "WanVideoLoraSelect",
+      "pos": [
+        -326.3398742675781,
+        -452.3577880859375
+      ],
+      "size": [
+        406.5719909667969,
+        188
+      ],
+      "flags": {},
+      "order": 11,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "prev_lora",
+          "shape": 7,
+          "type": "WANVIDLORA",
+          "link": 111
+        },
+        {
+          "name": "blocks",
+          "shape": 7,
+          "type": "SELECTEDBLOCKS",
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "name": "lora",
+          "type": "WANVIDLORA",
+          "links": [
+            85
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "17d48e3e450c7e75f60566e787404cb3d917f48e",
+        "Node name for S&R": "WanVideoLoraSelect"
+      },
+      "widgets_values": [
+        "WanVideo\\Pusa\\Wan21_PusaV1_LoRA_14B_rank512_bf16.safetensors",
+        1.4,
+        false,
+        "<details><summary><b>Metadata</b></summary><table border='0' cellpadding='3'><tr><td colspan='2'><b>Metadata</b></td></tr><tr><td><b>format</b></td><td>pt</td></tr><tr><td><b>model_type</b></td><td>PusaV1-14B</td></tr></table></details>"
+      ]
+    },
+    {
+      "id": 75,
+      "type": "WanVideoLoraSelect",
+      "pos": [
+        -848.7334594726562,
+        -433.909423828125
+      ],
+      "size": [
+        502.5318298339844,
+        176
+      ],
+      "flags": {},
+      "order": 4,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "prev_lora",
+          "shape": 7,
+          "type": "WANVIDLORA",
+          "link": null
+        },
+        {
+          "name": "blocks",
+          "shape": 7,
+          "type": "SELECTEDBLOCKS",
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "name": "lora",
+          "type": "WANVIDLORA",
+          "links": [
+            111
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "17d48e3e450c7e75f60566e787404cb3d917f48e",
+        "Node name for S&R": "WanVideoLoraSelect"
+      },
+      "widgets_values": [
+        "WanVideo\\Lightx2v\\lightx2v_T2V_14B_cfg_step_distill_v2_lora_rank32_bf16_.safetensors",
+        1,
+        false,
+        "<details><summary><b>Metadata</b></summary><table border='0' cellpadding='3'><tr><td colspan='2'><b>Metadata</b></td></tr><tr><td>No metadata found</td></tr></table></details>"
+      ]
+    },
+    {
+      "id": 84,
+      "type": "MarkdownNote",
+      "pos": [
+        -846.2874145507812,
+        -711.7315063476562
+      ],
+      "size": [
+        555.7915649414062,
+        202.42054748535156
+      ],
+      "flags": {},
+      "order": 5,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [],
+      "properties": {},
+      "widgets_values": [
+        "LoRA links:\n\nPusa:\n\n[https://huggingface.co/Kijai/WanVideo_comfy/blob/main/Pusa/Wan21_PusaV1_LoRA_14B_rank512_bf16.safetensors](https://huggingface.co/Kijai/WanVideo_comfy/blob/main/Pusa/Wan21_PusaV1_LoRA_14B_rank512_bf16.safetensors)\n\nDistill:\n\n[https://huggingface.co/Kijai/WanVideo_comfy/blob/main/Lightx2v/lightx2v_T2V_14B_cfg_step_distill_v2_lora_rank32_bf16.safetensors](https://huggingface.co/Kijai/WanVideo_comfy/blob/main/Lightx2v/lightx2v_T2V_14B_cfg_step_distill_v2_lora_rank32_bf16.safetensors)"
+      ],
+      "color": "#432",
+      "bgcolor": "#653"
+    },
+    {
+      "id": 39,
+      "type": "WanVideoBlockSwap",
+      "pos": [
+        -214.50437927246094,
+        -775.3147583007812
+      ],
+      "size": [
+        315,
+        154
+      ],
+      "flags": {},
+      "order": 6,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "block_swap_args",
+          "type": "BLOCKSWAPARGS",
+          "slot_index": 0,
+          "links": [
+            50
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "d9b1f4d1a5aea91d101ae97a54714a5861af3f50",
+        "Node name for S&R": "WanVideoBlockSwap"
+      },
+      "widgets_values": [
+        10,
+        false,
+        false,
+        true,
+        0
+      ],
+      "color": "#223",
+      "bgcolor": "#335"
+    },
+    {
+      "id": 58,
+      "type": "LoadImage",
+      "pos": [
+        -1042.868408203125,
+        -97.90877532958984
+      ],
+      "size": [
+        413.10479736328125,
+        498.3180847167969
+      ],
+      "flags": {},
+      "order": 7,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [
+            96
+          ]
+        },
+        {
+          "name": "MASK",
+          "type": "MASK",
+          "links": null
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfy-core",
+        "ver": "0.3.26",
+        "Node name for S&R": "LoadImage"
+      },
+      "widgets_values": [
+        "oldman_upscaled.png",
+        "image"
+      ],
+      "color": "#2a363b",
+      "bgcolor": "#3f5159"
+    },
+    {
+      "id": 71,
+      "type": "ImageResizeKJv2",
+      "pos": [
+        -542.5422973632812,
+        -104.77092742919922
+      ],
+      "size": [
+        270,
+        336
+      ],
+      "flags": {},
+      "order": 12,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "image",
+          "type": "IMAGE",
+          "link": 96
+        },
+        {
+          "name": "mask",
+          "shape": 7,
+          "type": "MASK",
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [
+            115
+          ]
+        },
+        {
+          "name": "width",
+          "type": "INT",
+          "links": [
+            100
+          ]
+        },
+        {
+          "name": "height",
+          "type": "INT",
+          "links": [
+            101
+          ]
+        },
+        {
+          "name": "mask",
+          "type": "MASK",
+          "links": null
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfyui-kjnodes",
+        "ver": "ad37ce656c13e9abea002b46e3a89be3dba32355",
+        "Node name for S&R": "ImageResizeKJv2"
+      },
+      "widgets_values": [
+        720,
+        720,
+        "lanczos",
+        "crop",
+        "0, 0, 0",
+        "center",
+        16,
+        "cpu",
+        "<tr><td>Output: </td><td><b>1</b> x <b>720</b> x <b>720 | 5.93MB</b></td></tr>"
+      ]
+    },
+    {
+      "id": 70,
+      "type": "WanVideoEncode",
+      "pos": [
+        -81.78827667236328,
+        24.176483154296875
+      ],
+      "size": [
+        270,
+        242
+      ],
+      "flags": {},
+      "order": 15,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "vae",
+          "type": "WANVAE",
+          "link": 88
+        },
+        {
+          "name": "image",
+          "type": "IMAGE",
+          "link": 115
+        },
+        {
+          "name": "mask",
+          "shape": 7,
+          "type": "MASK",
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "name": "samples",
+          "type": "LATENT",
+          "links": [
+            103
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "17d48e3e450c7e75f60566e787404cb3d917f48e",
+        "Node name for S&R": "WanVideoEncode"
+      },
+      "widgets_values": [
+        false,
+        272,
+        272,
+        144,
+        128,
+        0,
+        1
+      ]
+    },
+    {
+      "id": 16,
+      "type": "WanVideoTextEncode",
+      "pos": [
+        787.8640747070312,
+        -91.52558898925781
+      ],
+      "size": [
+        453.0067138671875,
+        328.0239562988281
+      ],
+      "flags": {},
+      "order": 16,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "t5",
+          "type": "WANTEXTENCODER",
+          "link": 15
+        },
+        {
+          "name": "model_to_offload",
+          "shape": 7,
+          "type": "WANVIDEOMODEL",
+          "link": 79
+        }
+      ],
+      "outputs": [
+        {
+          "name": "text_embeds",
+          "type": "WANVIDEOTEXTEMBEDS",
+          "slot_index": 0,
+          "links": [
+            30
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "d9b1f4d1a5aea91d101ae97a54714a5861af3f50",
+        "Node name for S&R": "WanVideoTextEncode"
+      },
+      "widgets_values": [
+        "an old man takes of his hat",
+        "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards\"",
+        true
+      ],
+      "color": "#332922",
+      "bgcolor": "#593930"
+    },
+    {
+      "id": 27,
+      "type": "WanVideoSampler",
+      "pos": [
+        1315.2401123046875,
+        -401.48028564453125
+      ],
+      "size": [
+        315,
+        927
+      ],
+      "flags": {},
+      "order": 18,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "model",
+          "type": "WANVIDEOMODEL",
+          "link": 29
+        },
+        {
+          "name": "image_embeds",
+          "type": "WANVIDIMAGE_EMBEDS",
+          "link": 102
+        },
+        {
+          "name": "text_embeds",
+          "shape": 7,
+          "type": "WANVIDEOTEXTEMBEDS",
+          "link": 30
+        },
+        {
+          "name": "samples",
+          "shape": 7,
+          "type": "LATENT",
+          "link": null
+        },
+        {
+          "name": "feta_args",
+          "shape": 7,
+          "type": "FETAARGS",
+          "link": null
+        },
+        {
+          "name": "context_options",
+          "shape": 7,
+          "type": "WANVIDCONTEXT",
+          "link": null
+        },
+        {
+          "name": "cache_args",
+          "shape": 7,
+          "type": "CACHEARGS",
+          "link": null
+        },
+        {
+          "name": "flowedit_args",
+          "shape": 7,
+          "type": "FLOWEDITARGS",
+          "link": null
+        },
+        {
+          "name": "slg_args",
+          "shape": 7,
+          "type": "SLGARGS",
+          "link": null
+        },
+        {
+          "name": "loop_args",
+          "shape": 7,
+          "type": "LOOPARGS",
+          "link": null
+        },
+        {
+          "name": "experimental_args",
+          "shape": 7,
+          "type": "EXPERIMENTALARGS",
+          "link": null
+        },
+        {
+          "name": "sigmas",
+          "shape": 7,
+          "type": "SIGMAS",
+          "link": null
+        },
+        {
+          "name": "unianimate_poses",
+          "shape": 7,
+          "type": "UNIANIMATE_POSE",
+          "link": null
+        },
+        {
+          "name": "fantasytalking_embeds",
+          "shape": 7,
+          "type": "FANTASYTALKING_EMBEDS",
+          "link": null
+        },
+        {
+          "name": "uni3c_embeds",
+          "shape": 7,
+          "type": "UNI3C_EMBEDS",
+          "link": null
+        },
+        {
+          "name": "multitalk_embeds",
+          "shape": 7,
+          "type": "MULTITALK_EMBEDS",
+          "link": null
+        },
+        {
+          "name": "freeinit_args",
+          "shape": 7,
+          "type": "FREEINITARGS",
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "name": "samples",
+          "type": "LATENT",
+          "slot_index": 0,
+          "links": [
+            117
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "d9b1f4d1a5aea91d101ae97a54714a5861af3f50",
+        "Node name for S&R": "WanVideoSampler"
+      },
+      "widgets_values": [
+        6,
+        1,
+        5,
+        46,
+        "fixed",
+        true,
+        "flowmatch_pusa",
+        0,
+        1,
+        "",
+        "comfy",
+        ""
+      ]
+    },
+    {
+      "id": 22,
+      "type": "WanVideoModelLoader",
+      "pos": [
+        157.20700073242188,
+        -839.4575805664062
+      ],
+      "size": [
+        477.4410095214844,
+        274
+      ],
+      "flags": {},
+      "order": 14,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "compile_args",
+          "shape": 7,
+          "type": "WANCOMPILEARGS",
+          "link": 119
+        },
+        {
+          "name": "block_swap_args",
+          "shape": 7,
+          "type": "BLOCKSWAPARGS",
+          "link": 50
+        },
+        {
+          "name": "lora",
+          "shape": 7,
+          "type": "WANVIDLORA",
+          "link": 85
+        },
+        {
+          "name": "vram_management_args",
+          "shape": 7,
+          "type": "VRAM_MANAGEMENTARGS",
+          "link": null
+        },
+        {
+          "name": "vace_model",
+          "shape": 7,
+          "type": "VACEPATH",
+          "link": null
+        },
+        {
+          "name": "fantasytalking_model",
+          "shape": 7,
+          "type": "FANTASYTALKINGMODEL",
+          "link": null
+        },
+        {
+          "name": "multitalk_model",
+          "shape": 7,
+          "type": "MULTITALKMODEL",
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "name": "model",
+          "type": "WANVIDEOMODEL",
+          "slot_index": 0,
+          "links": [
+            29,
+            79
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "d9b1f4d1a5aea91d101ae97a54714a5861af3f50",
+        "Node name for S&R": "WanVideoModelLoader"
+      },
+      "widgets_values": [
+        "WanVideo\\Wan2_1-T2V-14B_fp8_e4m3fn.safetensors",
+        "fp16_fast",
+        "fp8_e4m3fn",
+        "offload_device",
+        "sageattn"
+      ],
+      "color": "#223",
+      "bgcolor": "#335"
+    },
+    {
+      "id": 78,
+      "type": "WanVideoEmptyEmbeds",
+      "pos": [
+        330.17205810546875,
+        -116.94092559814453
+      ],
+      "size": [
+        272.431640625,
+        126
+      ],
+      "flags": {},
+      "order": 17,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "control_embeds",
+          "shape": 7,
+          "type": "WANVIDIMAGE_EMBEDS",
+          "link": null
+        },
+        {
+          "name": "extra_latents",
+          "shape": 7,
+          "type": "LATENT",
+          "link": 103
+        },
+        {
+          "name": "width",
+          "type": "INT",
+          "widget": {
+            "name": "width"
+          },
+          "link": 100
+        },
+        {
+          "name": "height",
+          "type": "INT",
+          "widget": {
+            "name": "height"
+          },
+          "link": 101
+        }
+      ],
+      "outputs": [
+        {
+          "name": "image_embeds",
+          "type": "WANVIDIMAGE_EMBEDS",
+          "links": [
+            102
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "6bc53b771d5d2af316801cb69e2ee10dbf7d18b1",
+        "Node name for S&R": "WanVideoEmptyEmbeds"
+      },
+      "widgets_values": [
+        832,
+        480,
+        81
+      ]
+    },
+    {
+      "id": 35,
+      "type": "WanVideoTorchCompileSettings",
+      "pos": [
+        -276.8500671386719,
+        -1050.6326904296875
+      ],
+      "size": [
+        390.5999755859375,
+        202
+      ],
+      "flags": {},
+      "order": 8,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "torch_compile_args",
+          "type": "WANCOMPILEARGS",
+          "slot_index": 0,
+          "links": [
+            119
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-WanVideoWrapper",
+        "ver": "d9b1f4d1a5aea91d101ae97a54714a5861af3f50",
+        "Node name for S&R": "WanVideoTorchCompileSettings"
+      },
+      "widgets_values": [
+        "inductor",
+        false,
+        "default",
+        false,
+        64,
+        true,
+        128
+      ],
+      "color": "#223",
+      "bgcolor": "#335"
+    }
+  ],
+  "links": [
+    [
+      15,
+      11,
+      0,
+      16,
+      0,
+      "WANTEXTENCODER"
+    ],
+    [
+      29,
+      22,
+      0,
+      27,
+      0,
+      "WANVIDEOMODEL"
+    ],
+    [
+      30,
+      16,
+      0,
+      27,
+      2,
+      "WANVIDEOTEXTEMBEDS"
+    ],
+    [
+      36,
+      28,
+      0,
+      30,
+      0,
+      "IMAGE"
+    ],
+    [
+      43,
+      38,
+      0,
+      28,
+      0,
+      "VAE"
+    ],
+    [
+      50,
+      39,
+      0,
+      22,
+      1,
+      "BLOCKSWAPARGS"
+    ],
+    [
+      52,
+      48,
+      0,
+      49,
+      0,
+      "CLIP"
+    ],
+    [
+      53,
+      48,
+      0,
+      50,
+      0,
+      "CLIP"
+    ],
+    [
+      54,
+      49,
+      0,
+      46,
+      0,
+      "CONDITIONING"
+    ],
+    [
+      55,
+      50,
+      0,
+      46,
+      1,
+      "CONDITIONING"
+    ],
+    [
+      79,
+      22,
+      0,
+      16,
+      1,
+      "WANVIDEOMODEL"
+    ],
+    [
+      85,
+      68,
+      0,
+      22,
+      2,
+      "WANVIDLORA"
+    ],
+    [
+      88,
+      38,
+      0,
+      70,
+      0,
+      "WANVAE"
+    ],
+    [
+      96,
+      58,
+      0,
+      71,
+      0,
+      "IMAGE"
+    ],
+    [
+      100,
+      71,
+      1,
+      78,
+      2,
+      "INT"
+    ],
+    [
+      101,
+      71,
+      2,
+      78,
+      3,
+      "INT"
+    ],
+    [
+      102,
+      78,
+      0,
+      27,
+      1,
+      "WANVIDIMAGE_EMBEDS"
+    ],
+    [
+      103,
+      70,
+      0,
+      78,
+      1,
+      "LATENT"
+    ],
+    [
+      111,
+      75,
+      0,
+      68,
+      0,
+      "WANVIDLORA"
+    ],
+    [
+      115,
+      71,
+      0,
+      70,
+      1,
+      "IMAGE"
+    ],
+    [
+      117,
+      27,
+      0,
+      28,
+      1,
+      "LATENT"
+    ],
+    [
+      119,
+      35,
+      0,
+      22,
+      0,
+      "WANCOMPILEARGS"
+    ]
+  ],
+  "groups": [
+    {
+      "id": 1,
+      "title": "ComfyUI text encoding alternative",
+      "bounding": [
+        -18.503620147705078,
+        360.8843688964844,
+        1210.621337890625,
+        805.9080810546875
+      ],
+      "color": "#3f789e",
+      "font_size": 24,
+      "flags": {}
+    }
+  ],
+  "config": {},
+  "extra": {
+    "ds": {
+      "scale": 0.6115909044841845,
+      "offset": [
+        1277.9482772926067,
+        850.2732515940098
+      ]
+    },
+    "node_versions": {
+      "ComfyUI-WanVideoWrapper": "5a2383621a05825d0d0437781afcb8552d9590fd",
+      "comfy-core": "0.3.26",
+      "ComfyUI-KJNodes": "a5bd3c86c8ed6b83c55c2d0e7a59515b15a0137f",
+      "ComfyUI-VideoHelperSuite": "0a75c7958fe320efcb052f1d9f8451fd20c730a8"
+    },
+    "VHS_latentpreview": true,
+    "VHS_latentpreviewrate": 0,
+    "VHS_MetadataImage": true,
+    "VHS_KeepIntermediate": true,
+    "frontendVersion": "1.24.1"
+  },
+  "version": 0.4
+}