aliensmn commited on
Commit
cf812a0
·
verified ·
1 Parent(s): 63d5317

Mirror from https://github.com/kijai/ComfyUI-WanVideoWrapper

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +16 -35
  2. .github/FUNDING.yml +1 -0
  3. .github/workflows/publish.yml +25 -0
  4. .gitignore +13 -0
  5. ATI/motion.py +42 -0
  6. ATI/motion_patch.py +142 -0
  7. ATI/nodes.py +329 -0
  8. HuMo/audio_proj.py +87 -0
  9. HuMo/nodes.py +287 -0
  10. HuMo/whisper_config.json +50 -0
  11. LICENSE +201 -0
  12. MTV/data/mean.npy +3 -0
  13. MTV/data/std.npy +3 -0
  14. MTV/draw_pose.py +142 -0
  15. MTV/motion4d/__init__.py +1 -0
  16. MTV/motion4d/vqvae.py +329 -0
  17. MTV/mtv.py +193 -0
  18. MTV/nlf.py +0 -0
  19. MTV/nodes.py +242 -0
  20. __init__.py +113 -0
  21. cache_methods/cache_methods.py +158 -0
  22. cache_methods/nodes_cache.py +140 -0
  23. configs/T5_tokenizer/special_tokens_map.json +308 -0
  24. configs/T5_tokenizer/spiece.model +3 -0
  25. configs/T5_tokenizer/tokenizer.json +3 -0
  26. configs/T5_tokenizer/tokenizer_config.json +2748 -0
  27. configs/transformer_config_i2v.json +14 -0
  28. context_windows/context.py +258 -0
  29. controlnet/nodes.py +173 -0
  30. controlnet/wan_controlnet.py +281 -0
  31. custom_linear.py +115 -0
  32. diffsynth/vram_management/LICENSE +201 -0
  33. diffsynth/vram_management/__init__.py +1 -0
  34. diffsynth/vram_management/layers.py +103 -0
  35. diffsynth/vram_management/utils.py +51 -0
  36. echoshot/echoshot.py +104 -0
  37. enhance_a_video/LICENSE +562 -0
  38. enhance_a_video/__init__.py +0 -0
  39. enhance_a_video/enhance.py +55 -0
  40. enhance_a_video/globals.py +36 -0
  41. example_workflows/example_inputs/MTV_crafter_example_pose.mp4 +3 -0
  42. example_workflows/example_inputs/env.png +3 -0
  43. example_workflows/example_inputs/human.png +3 -0
  44. example_workflows/example_inputs/jeep.mp4 +3 -0
  45. example_workflows/example_inputs/thing.png +0 -0
  46. example_workflows/example_inputs/wolf_interpolated.mp4 +3 -0
  47. example_workflows/example_inputs/woman.jpg +3 -0
  48. example_workflows/example_inputs/woman.wav +3 -0
  49. example_workflows/wanvideo2_2_I2V_A14B_example_WIP.json +2074 -0
  50. example_workflows/wanvideo_14B_pusa_I2V_example_01.json +1326 -0
.gitattributes CHANGED
@@ -1,35 +1,16 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ # Auto detect text files and perform LF normalization
2
+ * text=auto
3
+ MTV/data/mean.npy filter=lfs diff=lfs merge=lfs -text
4
+ MTV/data/std.npy filter=lfs diff=lfs merge=lfs -text
5
+ configs/T5_tokenizer/spiece.model filter=lfs diff=lfs merge=lfs -text
6
+ configs/T5_tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text
7
+ example_workflows/example_inputs/MTV_crafter_example_pose.mp4 filter=lfs diff=lfs merge=lfs -text
8
+ example_workflows/example_inputs/env.png filter=lfs diff=lfs merge=lfs -text
9
+ example_workflows/example_inputs/human.png filter=lfs diff=lfs merge=lfs -text
10
+ example_workflows/example_inputs/jeep.mp4 filter=lfs diff=lfs merge=lfs -text
11
+ example_workflows/example_inputs/wolf_interpolated.mp4 filter=lfs diff=lfs merge=lfs -text
12
+ example_workflows/example_inputs/woman.jpg filter=lfs diff=lfs merge=lfs -text
13
+ example_workflows/example_inputs/woman.wav filter=lfs diff=lfs merge=lfs -text
14
+ fantasyportrait/models/face_det.onnx filter=lfs diff=lfs merge=lfs -text
15
+ fantasyportrait/models/face_landmark.onnx filter=lfs diff=lfs merge=lfs -text
16
+ multitalk/encoded_silence.safetensors filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.github/FUNDING.yml ADDED
@@ -0,0 +1 @@
 
 
1
+ github: [kijai]
.github/workflows/publish.yml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Publish to Comfy registry
2
+ on:
3
+ workflow_dispatch:
4
+ push:
5
+ branches:
6
+ - main
7
+ paths:
8
+ - "pyproject.toml"
9
+
10
+ permissions:
11
+ issues: write
12
+
13
+ jobs:
14
+ publish-node:
15
+ name: Publish Custom Node to registry
16
+ runs-on: ubuntu-latest
17
+ if: ${{ github.repository_owner == 'kijai' }}
18
+ steps:
19
+ - name: Check out code
20
+ uses: actions/checkout@v4
21
+ - name: Publish Custom Node
22
+ uses: Comfy-Org/publish-node-action@v1
23
+ with:
24
+ ## Add your own personal access token to your Github Repository secrets and reference it here.
25
+ personal_access_token: ${{ secrets.REGISTRY_ACCESS_TOKEN }}
.gitignore ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ output/
2
+ *__pycache__/
3
+ samples*/
4
+ runs/
5
+ checkpoints/
6
+ master_ip
7
+ logs/
8
+ *.DS_Store
9
+ .idea
10
+ tools/
11
+ .vscode/
12
+ convert_*
13
+ *.pt
ATI/motion.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2024-2025 Bytedance Ltd. and/or its affiliates
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Dict, List, Optional, Tuple, Union
16
+ import numpy as np
17
+ import torch
18
+
19
+ def process_tracks(tracks_np: np.ndarray, frame_size: Tuple[int, int], quant_multi: int = 8, **kwargs):
20
+ # tracks: shape [t, h, w, 3] => samples align with 24 fps, model trained with 16 fps.
21
+ # frame_size: tuple (W, H)
22
+
23
+ tracks = torch.from_numpy(tracks_np).float()
24
+
25
+ if tracks.shape[1] == 121:
26
+ tracks = torch.permute(tracks, (1, 0, 2, 3))
27
+
28
+ tracks, visibles = tracks[..., :2], tracks[..., 2:3]
29
+ short_edge = min(*frame_size)
30
+
31
+ tracks = tracks - torch.tensor([*frame_size]).type_as(tracks) / 2
32
+ tracks = tracks / short_edge * 2
33
+
34
+ visibles = visibles * 2 - 1
35
+
36
+ trange = torch.linspace(-1, 1, tracks.shape[0]).view(-1, 1, 1, 1).expand(*visibles.shape)
37
+
38
+ out_ = torch.cat([trange, tracks, visibles], dim=-1).view(121, -1, 4)
39
+ out_0 = out_[:1]
40
+ out_l = out_[1:] # 121 => 120 | 1
41
+ out_l = torch.repeat_interleave(out_l, 2, dim=0)[1::3] # 120 => 240 => 80
42
+ return torch.cat([out_0, out_l], dim=0)
ATI/motion_patch.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2024-2025 Bytedance Ltd. and/or its affiliates
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import List, Optional, Tuple, Union
16
+ import torch
17
+
18
+
19
+ # Refer to https://github.com/Angtian/VoGE/blob/main/VoGE/Utils.py
20
+ def ind_sel(target: torch.Tensor, ind: torch.Tensor, dim: int = 1):
21
+ """
22
+ :param target: [... (can be k or 1), n > M, ...]
23
+ :param ind: [... (k), M]
24
+ :param dim: dim to apply index on
25
+ :return: sel_target [... (k), M, ...]
26
+ """
27
+ assert (
28
+ len(ind.shape) > dim
29
+ ), "Index must have the target dim, but get dim: %d, ind shape: %s" % (dim, str(ind.shape))
30
+
31
+ target = target.expand(
32
+ *tuple(
33
+ [ind.shape[k] if target.shape[k] == 1 else -1 for k in range(dim)]
34
+ + [
35
+ -1,
36
+ ]
37
+ * (len(target.shape) - dim)
38
+ )
39
+ )
40
+
41
+ ind_pad = ind
42
+
43
+ if len(target.shape) > dim + 1:
44
+ for _ in range(len(target.shape) - (dim + 1)):
45
+ ind_pad = ind_pad.unsqueeze(-1)
46
+ ind_pad = ind_pad.expand(*(-1,) * (dim + 1), *target.shape[(dim + 1) : :])
47
+
48
+ return torch.gather(target, dim=dim, index=ind_pad)
49
+
50
+
51
+ def merge_final(vert_attr: torch.Tensor, weight: torch.Tensor, vert_assign: torch.Tensor):
52
+ """
53
+
54
+ :param vert_attr: [n, d] or [b, n, d] color or feature of each vertex
55
+ :param weight: [b(optional), w, h, M] weight of selected vertices
56
+ :param vert_assign: [b(optional), w, h, M] selective index
57
+ :return:
58
+ """
59
+ target_dim = len(vert_assign.shape) - 1
60
+ if len(vert_attr.shape) == 2:
61
+ assert vert_attr.shape[0] > vert_assign.max()
62
+ # [n, d] ind: [b(optional), w, h, M]-> [b(optional), w, h, M, d]
63
+ # sel_attr = ind_sel(
64
+ # vert_attr[(None,) * target_dim], vert_assign.type(torch.long), dim=target_dim
65
+ # )
66
+ new_shape = [1] * target_dim + list(vert_attr.shape)
67
+ tensor = vert_attr.reshape(new_shape)
68
+ sel_attr = ind_sel(tensor, vert_assign.type(torch.long), dim=target_dim)
69
+ else:
70
+ assert vert_attr.shape[1] > vert_assign.max()
71
+ #sel_attr = ind_sel(
72
+ # vert_attr[:, *(None,) * (target_dim - 1)], vert_assign.type(torch.long), dim=target_dim
73
+ #)
74
+ new_shape = [vert_attr.shape[0]] + [1] * (target_dim - 1) + list(vert_attr.shape[1:])
75
+ tensor = vert_attr.reshape(new_shape)
76
+ sel_attr = ind_sel(tensor, vert_assign.type(torch.long), dim=target_dim)
77
+
78
+ # [b(optional), w, h, M]
79
+ final_attr = torch.sum(sel_attr * weight.unsqueeze(-1), dim=-2)
80
+ return final_attr
81
+
82
+
83
+ def patch_motion(
84
+ tracks: torch.FloatTensor, # (B, T, N, 4)
85
+ vid: torch.FloatTensor, # (C, T, H, W)
86
+ temperature: float = 220.0,
87
+ vae_divide: tuple = (4, 16),
88
+ topk: int = 2,
89
+ ):
90
+ with torch.no_grad():
91
+ _, T, H, W = vid.shape
92
+ N = tracks.shape[2]
93
+ _, tracks, visible = torch.split(
94
+ tracks, [1, 2, 1], dim=-1
95
+ ) # (B, T, N, 2) | (B, T, N, 1)
96
+ tracks_n = tracks / torch.tensor([W / min(H, W), H / min(H, W)], device=tracks.device)
97
+ tracks_n = tracks_n.clamp(-1, 1)
98
+ visible = visible.clamp(0, 1)
99
+
100
+ xx = torch.linspace(-W / min(H, W), W / min(H, W), W)
101
+ yy = torch.linspace(-H / min(H, W), H / min(H, W), H)
102
+
103
+ grid = torch.stack(torch.meshgrid(yy, xx, indexing="ij")[::-1], dim=-1).to(
104
+ tracks.device
105
+ )
106
+
107
+ tracks_pad = tracks[:, 1:]
108
+ visible_pad = visible[:, 1:]
109
+
110
+ visible_align = visible_pad.view(T - 1, 4, *visible_pad.shape[2:]).sum(1)
111
+ tracks_align = (tracks_pad * visible_pad).view(T - 1, 4, *tracks_pad.shape[2:]).sum(
112
+ 1
113
+ ) / (visible_align + 1e-5)
114
+ dist_ = (
115
+ (tracks_align[:, None, None] - grid[None, :, :, None]).pow(2).sum(-1)
116
+ ) # T, H, W, N
117
+ weight = torch.exp(-dist_ * temperature) * visible_align.clamp(0, 1).view(
118
+ T - 1, 1, 1, N
119
+ )
120
+ vert_weight, vert_index = torch.topk(
121
+ weight, k=min(topk, weight.shape[-1]), dim=-1
122
+ )
123
+
124
+ grid_mode = "bilinear"
125
+ point_feature = torch.nn.functional.grid_sample(
126
+ vid[vae_divide[0]:].permute(1, 0, 2, 3)[:1],
127
+ tracks_n[:, :1].type(vid.dtype),
128
+ mode=grid_mode,
129
+ padding_mode="zeros",
130
+ align_corners=False,
131
+ )
132
+ point_feature = point_feature.squeeze(0).squeeze(1).permute(1, 0) # N, C=16
133
+
134
+ out_feature = merge_final(point_feature, vert_weight, vert_index).permute(3, 0, 1, 2) # T - 1, H, W, C => C, T - 1, H, W
135
+ out_weight = vert_weight.sum(-1) # T - 1, H, W
136
+
137
+ # out feature -> already soft weighted
138
+ mix_feature = out_feature + vid[vae_divide[0]:, 1:] * (1 - out_weight.clamp(0, 1))
139
+
140
+ out_feature_full = torch.cat([vid[vae_divide[0]:, :1], mix_feature], dim=1) # C, T, H, W
141
+ out_mask_full = torch.cat([torch.ones_like(out_weight[:1]), out_weight], dim=0) # T, H, W
142
+ return torch.cat([out_mask_full[None].expand(vae_divide[0], -1, -1, -1), out_feature_full], dim=0)
ATI/nodes.py ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from .motion import process_tracks
3
+ import numpy as np
4
+ from typing import List, Tuple
5
+ import torch
6
+ FIXED_LENGTH = 121
7
+ def pad_pts(tr):
8
+ """Convert list of {x,y} to (FIXED_LENGTH,1,3) array, padding/truncating."""
9
+ pts = np.array([[p['x'], p['y'], 1] for p in tr], dtype=np.float32)
10
+ n = pts.shape[0]
11
+ if n < FIXED_LENGTH:
12
+ pad = np.zeros((FIXED_LENGTH - n, 3), dtype=np.float32)
13
+ pts = np.vstack((pts, pad))
14
+ else:
15
+ pts = pts[:FIXED_LENGTH]
16
+ return pts.reshape(FIXED_LENGTH, 1, 3)
17
+
18
+ def age_to_bgr(ratio: float) -> Tuple[int,int,int]:
19
+ """
20
+ Map ratio∈[0,1] through: 0→blue, 1/3→green, 2/3→yellow, 1→red.
21
+ Returns (B,G,R) for OpenCV.
22
+ """
23
+ if ratio <= 1/3:
24
+ # blue→green
25
+ t = ratio / (1/3)
26
+ b = int(255 * (1 - t))
27
+ g = int(255 * t)
28
+ r = 0
29
+ elif ratio <= 2/3:
30
+ # green→yellow
31
+ t = (ratio - 1/3) / (1/3)
32
+ b = 0
33
+ g = 255
34
+ r = int(255 * t)
35
+ else:
36
+ # yellow→red
37
+ t = (ratio - 2/3) / (1/3)
38
+ b = 0
39
+ g = int(255 * (1 - t))
40
+ r = 255
41
+ return (r, g, b)
42
+
43
+ def paint_point_track(
44
+ frames: np.ndarray,
45
+ point_tracks: np.ndarray,
46
+ visibles: np.ndarray,
47
+ min_radius: int = 1,
48
+ max_radius: int = 6,
49
+ max_retain: int = 50
50
+ ) -> np.ndarray:
51
+ """
52
+ Draws every past point of each track on each frame, with radius and color
53
+ interpolated by the point's age (old→small to new→large).
54
+
55
+ Args:
56
+ frames: [F, H, W, 3] uint8 RGB
57
+ point_tracks:[N, F, 2] float32 – (x,y) in pixel coords
58
+ visibles: [N, F] bool – visibility mask
59
+ min_radius: radius for the very first point (oldest)
60
+ max_radius: radius for the current point (newest)
61
+
62
+ Returns:
63
+ video: [F, H, W, 3] uint8 RGB
64
+ """
65
+ import cv2
66
+ num_points, num_frames = point_tracks.shape[:2]
67
+ H, W = frames.shape[1:3]
68
+
69
+ video = frames.copy()
70
+
71
+ for t in range(num_frames):
72
+ # start from the original frame
73
+ frame = video[t].copy()
74
+
75
+ for i in range(num_points):
76
+ # draw every past step τ = 0..t
77
+ for τ in range(t + 1):
78
+ if not visibles[i, τ]:
79
+ continue
80
+
81
+ if t - τ > max_retain:
82
+ continue
83
+
84
+ # sub-pixel offset + clamp
85
+ x, y = point_tracks[i, τ] + 0.5
86
+ xi = int(np.clip(x, 0, W - 1))
87
+ yi = int(np.clip(y, 0, H - 1))
88
+
89
+ # age‐ratio in [0,1]
90
+ if num_frames > 1:
91
+ ratio = 1 - float(t - τ) / max_retain
92
+ else:
93
+ ratio = 1.0
94
+
95
+ # interpolated radius
96
+ radius = int(round(min_radius + (max_radius - min_radius) * ratio))
97
+
98
+ # OpenCV draws in BGR order:
99
+ color_rgb = age_to_bgr(ratio)
100
+
101
+ # filled circle
102
+ cv2.circle(frame, (xi, yi), radius, color_rgb, thickness=-1)
103
+
104
+ video[t] = frame
105
+
106
+ return video
107
+
108
+ def parse_json_tracks(tracks):
109
+ tracks_data = []
110
+ try:
111
+ # If tracks is a string, try to parse it as JSON
112
+ if isinstance(tracks, str):
113
+ parsed = json.loads(tracks.replace("'", '"'))
114
+ tracks_data.extend(parsed)
115
+ else:
116
+ # If tracks is a list of strings, parse each one
117
+ for track_str in tracks:
118
+ parsed = json.loads(track_str.replace("'", '"'))
119
+ tracks_data.append(parsed)
120
+
121
+ # Check if we have a single track (dict with x,y) or a list of tracks
122
+ if tracks_data and isinstance(tracks_data[0], dict) and 'x' in tracks_data[0]:
123
+ # Single track detected, wrap it in a list
124
+ tracks_data = [tracks_data]
125
+ elif tracks_data and isinstance(tracks_data[0], list) and tracks_data[0] and isinstance(tracks_data[0][0], dict) and 'x' in tracks_data[0][0]:
126
+ # Already a list of tracks, nothing to do
127
+ pass
128
+ else:
129
+ # Unexpected format
130
+ print(f"Warning: Unexpected track format: {type(tracks_data[0])}")
131
+
132
+ except json.JSONDecodeError as e:
133
+ print(f"Error parsing tracks JSON: {e}")
134
+ tracks_data = []
135
+
136
+ return tracks_data
137
+
138
+ class WanVideoATITracks:
139
+ @classmethod
140
+ def INPUT_TYPES(s):
141
+ return {"required": {
142
+ "model": ("WANVIDEOMODEL", ),
143
+ "tracks": ("STRING",),
144
+ "width": ("INT", {"default": 832, "min": 64, "max": 2048, "step": 8, "tooltip": "Width of the image to encode"}),
145
+ "height": ("INT", {"default": 480, "min": 64, "max": 29048, "step": 8, "tooltip": "Height of the image to encode"}),
146
+ "temperature": ("FLOAT", {"default": 220.0, "min": 0.0, "max": 1000.0, "step": 0.1}),
147
+ "topk": ("INT", {"default": 2, "min": 1, "max": 10, "step": 1}),
148
+ "start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "Start percent of the steps to apply ATI"}),
149
+ "end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "End percent of the steps to apply ATI"}),
150
+ },
151
+ }
152
+
153
+ RETURN_TYPES = ("WANVIDEOMODEL",)
154
+ RETURN_NAMES = ("model",)
155
+ FUNCTION = "patchmodel"
156
+ CATEGORY = "WanVideoWrapper"
157
+
158
+ def patchmodel(self, model, tracks, width, height, temperature, topk, start_percent, end_percent):
159
+ tracks_data = parse_json_tracks(tracks)
160
+ arrs = []
161
+ for track in tracks_data:
162
+ pts = pad_pts(track)
163
+ arrs.append(pts)
164
+
165
+ tracks_np = np.stack(arrs, axis=0)
166
+
167
+ processed_tracks = process_tracks(tracks_np, (width, height))
168
+
169
+ patcher = model.clone()
170
+ patcher.model_options["transformer_options"]["ati_tracks"] = processed_tracks.unsqueeze(0)
171
+ patcher.model_options["transformer_options"]["ati_temperature"] = temperature
172
+ patcher.model_options["transformer_options"]["ati_topk"] = topk
173
+ patcher.model_options["transformer_options"]["ati_start_percent"] = start_percent
174
+ patcher.model_options["transformer_options"]["ati_end_percent"] = end_percent
175
+
176
+ return (patcher,)
177
+
178
+ class WanVideoATITracksVisualize:
179
+ @classmethod
180
+ def INPUT_TYPES(s):
181
+ return {"required": {
182
+ "images": ("IMAGE",),
183
+ "tracks": ("STRING",),
184
+ "min_radius": ("INT", {"default": 1, "min": 0, "max": 100, "step": 1, "tooltip": "radius for the very first point (oldest)"}),
185
+ "max_radius": ("INT", {"default": 6, "min": 0, "max": 100, "step": 1, "tooltip": "radius for the current point (newest)"}),
186
+ "max_retain": ("INT", {"default": 50, "min": 0, "max": 100, "step": 1, "tooltip": "Maximum number of points to retain"}),
187
+ },
188
+ }
189
+
190
+ RETURN_TYPES = ("IMAGE",)
191
+ RETURN_NAMES = ("images",)
192
+ FUNCTION = "patchmodel"
193
+ CATEGORY = "WanVideoWrapper"
194
+
195
+ def patchmodel(self, images, tracks, min_radius, max_radius, max_retain):
196
+ tracks_data = parse_json_tracks(tracks)
197
+ arrs = []
198
+ for track in tracks_data:
199
+ pts = pad_pts(track)
200
+ arrs.append(pts)
201
+
202
+ tracks_np = np.stack(arrs, axis=0)
203
+ track = np.repeat(tracks_np, 2, axis=1)[:, ::3]
204
+ points = track[:, :, 0, :2].astype(np.float32)
205
+ visibles = track[:, :, 0, 2].astype(np.float32)
206
+
207
+ if images.shape[0] < points.shape[1]:
208
+ repeat_count = (points.shape[1] + images.shape[0] - 1) // images.shape[0]
209
+ images = images.repeat(repeat_count, 1, 1, 1)
210
+ images = images[:points.shape[1]]
211
+ elif images.shape[0] > points.shape[1]:
212
+ images = images[:points.shape[1]]
213
+
214
+ video_viz = paint_point_track(images.cpu().numpy(), points, visibles, min_radius, max_radius, max_retain)
215
+ video_viz = torch.from_numpy(video_viz).float()
216
+
217
+ return (video_viz,)
218
+
219
+ from comfy import utils
220
+ import types
221
+ from .motion_patch import patch_motion
222
+
223
+ class WanConcatCondPatch:
224
+ def __init__(self, tracks, temperature, topk):
225
+ self.tracks = tracks
226
+ self.temperature = temperature
227
+ self.topk = topk
228
+
229
+ def __get__(self, obj, objtype=None):
230
+ # Create bound method with stored parameters
231
+ def wrapped_concat_cond(self_module, *args, **kwargs):
232
+ return modified_concat_cond(self_module, self.tracks, self.temperature, self.topk, *args, **kwargs)
233
+ return types.MethodType(wrapped_concat_cond, obj)
234
+
235
+ def modified_concat_cond(self, tracks, temperature, topk, **kwargs):
236
+ noise = kwargs.get("noise", None)
237
+ extra_channels = self.diffusion_model.patch_embedding.weight.shape[1] - noise.shape[1]
238
+ if extra_channels == 0:
239
+ return None
240
+
241
+ image = kwargs.get("concat_latent_image", None)
242
+ device = kwargs["device"]
243
+
244
+ if image is None:
245
+ shape_image = list(noise.shape)
246
+ shape_image[1] = extra_channels
247
+ image = torch.zeros(shape_image, dtype=noise.dtype, layout=noise.layout, device=noise.device)
248
+ else:
249
+ image = utils.common_upscale(image.to(device), noise.shape[-1], noise.shape[-2], "bilinear", "center")
250
+ for i in range(0, image.shape[1], 16):
251
+ image[:, i: i + 16] = self.process_latent_in(image[:, i: i + 16])
252
+ image = utils.resize_to_batch_size(image, noise.shape[0])
253
+
254
+ if not self.image_to_video or extra_channels == image.shape[1]:
255
+ return image
256
+
257
+ if image.shape[1] > (extra_channels - 4):
258
+ image = image[:, :(extra_channels - 4)]
259
+
260
+ mask = kwargs.get("concat_mask", kwargs.get("denoise_mask", None))
261
+ if mask is None:
262
+ mask = torch.zeros_like(noise)[:, :4]
263
+ else:
264
+ if mask.shape[1] != 4:
265
+ mask = torch.mean(mask, dim=1, keepdim=True)
266
+ mask = 1.0 - mask
267
+ mask = utils.common_upscale(mask.to(device), noise.shape[-1], noise.shape[-2], "bilinear", "center")
268
+ if mask.shape[-3] < noise.shape[-3]:
269
+ mask = torch.nn.functional.pad(mask, (0, 0, 0, 0, 0, noise.shape[-3] - mask.shape[-3]), mode='constant', value=0)
270
+ if mask.shape[1] == 1:
271
+ mask = mask.repeat(1, 4, 1, 1, 1)
272
+ mask = utils.resize_to_batch_size(mask, noise.shape[0])
273
+
274
+ image_cond = torch.cat((mask, image), dim=1)
275
+ image_cond_ati = patch_motion(tracks.to(image_cond.device, image_cond.dtype), image_cond[0],
276
+ temperature=temperature, topk=topk)
277
+
278
+ return image_cond_ati.unsqueeze(0)
279
+
280
+ class WanVideoATI_comfy:
281
+ @classmethod
282
+ def INPUT_TYPES(s):
283
+ return {"required": {
284
+ "model": ("MODEL", ),
285
+ "width": ("INT", {"default": 832, "min": 64, "max": 2048, "step": 8, "tooltip": "Width of the image to encode"}),
286
+ "height": ("INT", {"default": 480, "min": 64, "max": 29048, "step": 8, "tooltip": "Height of the image to encode"}),
287
+ "tracks": ("STRING",),
288
+ "temperature": ("FLOAT", {"default": 220.0, "min": 0.0, "max": 1000.0, "step": 0.1}),
289
+ "topk": ("INT", {"default": 2, "min": 1, "max": 10, "step": 1}),
290
+ },
291
+ }
292
+
293
+ RETURN_TYPES = ("MODEL",)
294
+ RETURN_NAMES = ("model", )
295
+ FUNCTION = "patchcond"
296
+ CATEGORY = "WanVideoWrapper"
297
+
298
+ def patchcond(self, model, tracks, width, height, temperature, topk):
299
+
300
+ tracks_data = parse_json_tracks(tracks)
301
+ arrs = []
302
+ for track in tracks_data:
303
+ pts = pad_pts(track)
304
+ arrs.append(pts)
305
+
306
+ tracks_np = np.stack(arrs, axis=0)
307
+
308
+ processed_tracks = process_tracks(tracks_np, (width, height))
309
+
310
+ model_clone = model.clone()
311
+ model_clone.add_object_patch(
312
+ "concat_cond",
313
+ WanConcatCondPatch(
314
+ processed_tracks.unsqueeze(0), temperature, topk
315
+ ).__get__(model.model, model.model.__class__)
316
+ )
317
+
318
+ return (model_clone,)
319
+
320
+ NODE_CLASS_MAPPINGS = {
321
+ "WanVideoATITracks": WanVideoATITracks,
322
+ "WanVideoATITracksVisualize": WanVideoATITracksVisualize,
323
+ "WanVideoATI_comfy": WanVideoATI_comfy,
324
+ }
325
+ NODE_DISPLAY_NAME_MAPPINGS = {
326
+ "WanVideoATITracks": "WanVideo ATI Tracks",
327
+ "WanVideoATITracksVisualize": "WanVideo ATI Tracks Visualize",
328
+ "WanVideoATI_comfy": "WanVideo ATI Comfy",
329
+ }
HuMo/audio_proj.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from einops import rearrange
3
+ from torch import nn
4
+ from einops import rearrange
5
+
6
+ class WanRMSNorm(nn.Module):
7
+
8
+ def __init__(self, dim, eps=1e-5):
9
+ super().__init__()
10
+ self.dim = dim
11
+ self.eps = eps
12
+ self.weight = nn.Parameter(torch.ones(dim))
13
+
14
+ def forward(self, x):
15
+ r"""
16
+ Args:
17
+ x(Tensor): Shape [B, L, C]
18
+ """
19
+ return self._norm(x.float()).type_as(x) * self.weight
20
+
21
+ def _norm(self, x):
22
+ return x * torch.rsqrt(x.pow(2).mean(dim=-1, keepdim=True) + self.eps)
23
+
24
+
25
+ class DummyAdapterLayer(nn.Module):
26
+ def __init__(self, layer):
27
+ super().__init__()
28
+ self.layer = layer
29
+
30
+ def forward(self, *args, **kwargs):
31
+ return self.layer(*args, **kwargs)
32
+
33
+
34
+ class AudioProjModel(nn.Module):
35
+ def __init__(
36
+ self,
37
+ seq_len=5,
38
+ blocks=13, # add a new parameter blocks
39
+ channels=768, # add a new parameter channels
40
+ intermediate_dim=512,
41
+ output_dim=1536,
42
+ context_tokens=16,
43
+ ):
44
+ super().__init__()
45
+
46
+ self.seq_len = seq_len
47
+ self.blocks = blocks
48
+ self.channels = channels
49
+ self.input_dim = seq_len * blocks * channels # update input_dim to be the product of blocks and channels.
50
+ self.intermediate_dim = intermediate_dim
51
+ self.context_tokens = context_tokens
52
+ self.output_dim = output_dim
53
+
54
+ # define multiple linear layers
55
+ self.audio_proj_glob_1 = DummyAdapterLayer(nn.Linear(self.input_dim, intermediate_dim))
56
+ self.audio_proj_glob_2 = DummyAdapterLayer(nn.Linear(intermediate_dim, intermediate_dim))
57
+ self.audio_proj_glob_3 = DummyAdapterLayer(nn.Linear(intermediate_dim, context_tokens * output_dim))
58
+
59
+ self.audio_proj_glob_norm = DummyAdapterLayer(nn.LayerNorm(output_dim))
60
+
61
+ self.initialize_weights()
62
+
63
+ def initialize_weights(self):
64
+ # Initialize transformer layers:
65
+ def _basic_init(module):
66
+ if isinstance(module, nn.Linear):
67
+ torch.nn.init.xavier_uniform_(module.weight)
68
+ if module.bias is not None:
69
+ nn.init.constant_(module.bias, 0)
70
+
71
+ self.apply(_basic_init)
72
+
73
+ def forward(self, audio_embeds):
74
+ video_length = audio_embeds.shape[1]
75
+ audio_embeds = rearrange(audio_embeds, "bz f w b c -> (bz f) w b c")
76
+ batch_size, window_size, blocks, channels = audio_embeds.shape
77
+ audio_embeds = audio_embeds.view(batch_size, window_size * blocks * channels)
78
+
79
+ audio_embeds = torch.relu(self.audio_proj_glob_1(audio_embeds))
80
+ audio_embeds = torch.relu(self.audio_proj_glob_2(audio_embeds))
81
+
82
+ context_tokens = self.audio_proj_glob_3(audio_embeds).reshape(batch_size, self.context_tokens, self.output_dim)
83
+
84
+ context_tokens = self.audio_proj_glob_norm(context_tokens)
85
+ context_tokens = rearrange(context_tokens, "(bz f) m c -> bz f m c", f=video_length)
86
+
87
+ return context_tokens
HuMo/nodes.py ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import folder_paths
2
+ import torch
3
+ import torch.nn.functional as F
4
+ import os
5
+ import json
6
+ import torchaudio
7
+
8
+ from comfy.utils import load_torch_file, common_upscale
9
+ import comfy.model_management as mm
10
+
11
+ from accelerate import init_empty_weights
12
+ from ..utils import set_module_tensor_to_device, log
13
+ from ..nodes import WanVideoEncodeLatentBatch
14
+
15
+ script_directory = os.path.dirname(os.path.abspath(__file__))
16
+ device = mm.get_torch_device()
17
+ offload_device = mm.unet_offload_device()
18
+
19
+ def linear_interpolation_fps(features, input_fps, output_fps, output_len=None):
20
+ features = features.transpose(1, 2) # [1, C, T]
21
+ seq_len = features.shape[2] / float(input_fps)
22
+ if output_len is None:
23
+ output_len = int(seq_len * output_fps)
24
+ output_features = F.interpolate(features, size=output_len, align_corners=True, mode='linear')
25
+ return output_features.transpose(1, 2)
26
+
27
+ def get_audio_emb_window(audio_emb, frame_num, frame0_idx, audio_shift=2):
28
+ zero_audio_embed = torch.zeros((audio_emb.shape[1], audio_emb.shape[2]), dtype=audio_emb.dtype, device=audio_emb.device)
29
+ zero_audio_embed_3 = torch.zeros((3, audio_emb.shape[1], audio_emb.shape[2]), dtype=audio_emb.dtype, device=audio_emb.device)
30
+ iter_ = 1 + (frame_num - 1) // 4
31
+ audio_emb_wind = []
32
+ for lt_i in range(iter_):
33
+ if lt_i == 0:
34
+ st = frame0_idx + lt_i - 2
35
+ ed = frame0_idx + lt_i + 3
36
+ wind_feat = torch.stack([
37
+ audio_emb[i] if (0 <= i < audio_emb.shape[0]) else zero_audio_embed
38
+ for i in range(st, ed)
39
+ ], dim=0)
40
+ wind_feat = torch.cat((zero_audio_embed_3, wind_feat), dim=0)
41
+ else:
42
+ st = frame0_idx + 1 + 4 * (lt_i - 1) - audio_shift
43
+ ed = frame0_idx + 1 + 4 * lt_i + audio_shift
44
+ wind_feat = torch.stack([
45
+ audio_emb[i] if (0 <= i < audio_emb.shape[0]) else zero_audio_embed
46
+ for i in range(st, ed)
47
+ ], dim=0)
48
+ audio_emb_wind.append(wind_feat)
49
+ audio_emb_wind = torch.stack(audio_emb_wind, dim=0)
50
+
51
+ return audio_emb_wind, ed - audio_shift
52
+
53
+ class WhisperModelLoader:
54
+ @classmethod
55
+ def INPUT_TYPES(s):
56
+ return {
57
+ "required": {
58
+ "model": (folder_paths.get_filename_list("audio_encoders"), {"tooltip": "These models are loaded from the 'ComfyUI/models/audio_encoders' folder",}),
59
+ "base_precision": (["fp32", "bf16", "fp16"], {"default": "fp16"}),
60
+ "load_device": (["main_device", "offload_device"], {"default": "main_device", "tooltip": "Initial device to load the model to, NOT recommended with the larger models unless you have 48GB+ VRAM"}),
61
+ },
62
+ }
63
+
64
+ RETURN_TYPES = ("WHISPERMODEL",)
65
+ RETURN_NAMES = ("whisper_model", )
66
+ FUNCTION = "loadmodel"
67
+ CATEGORY = "WanVideoWrapper"
68
+
69
+ def loadmodel(self, model, base_precision, load_device):
70
+ from transformers import WhisperConfig, WhisperModel, WhisperFeatureExtractor
71
+
72
+ base_dtype = {"fp8_e4m3fn": torch.float8_e4m3fn, "fp8_e4m3fn_fast": torch.float8_e4m3fn, "bf16": torch.bfloat16, "fp16": torch.float16, "fp16_fast": torch.float16, "fp32": torch.float32}[base_precision]
73
+
74
+ if load_device == "offload_device":
75
+ transformer_load_device = offload_device
76
+ else:
77
+ transformer_load_device = device
78
+
79
+ config_path = os.path.join(script_directory, "whisper_config.json")
80
+ whisper_config = WhisperConfig(**json.load(open(config_path)))
81
+
82
+ with init_empty_weights():
83
+ whisper = WhisperModel(whisper_config).eval()
84
+ whisper.decoder = None # we only need the encoder
85
+
86
+ feature_extractor_config = {
87
+ "chunk_length": 30,
88
+ "feature_extractor_type": "WhisperFeatureExtractor",
89
+ "feature_size": 128,
90
+ "hop_length": 160,
91
+ "n_fft": 400,
92
+ "n_samples": 480000,
93
+ "nb_max_frames": 3000,
94
+ "padding_side": "right",
95
+ "padding_value": 0.0,
96
+ "processor_class": "WhisperProcessor",
97
+ "return_attention_mask": False,
98
+ "sampling_rate": 16000
99
+ }
100
+
101
+ feature_extractor = WhisperFeatureExtractor(**feature_extractor_config)
102
+
103
+ model_path = folder_paths.get_full_path_or_raise("audio_encoders", model)
104
+ sd = load_torch_file(model_path, device=transformer_load_device, safe_load=True)
105
+
106
+ for name, param in whisper.named_parameters():
107
+ key = "model." + name
108
+ value=sd[key]
109
+ set_module_tensor_to_device(whisper, name, device=offload_device, dtype=base_dtype, value=value)
110
+
111
+ whisper_model = {
112
+ "feature_extractor": feature_extractor,
113
+ "model": whisper,
114
+ "dtype": base_dtype,
115
+ }
116
+
117
+ return (whisper_model,)
118
+
119
+ class HuMoEmbeds:
120
+ @classmethod
121
+ def INPUT_TYPES(s):
122
+ return {"required": {
123
+ "num_frames": ("INT", {"default": 81, "min": -1, "max": 10000, "step": 1, "tooltip": "The total frame count to generate."}),
124
+ "width": ("INT", {"default": 832, "min": 64, "max": 4096, "step": 16}),
125
+ "height": ("INT", {"default": 480, "min": 64, "max": 4096, "step": 16}),
126
+ "audio_scale": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01, "tooltip": "Strength of the audio conditioning"}),
127
+ "audio_cfg_scale": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01, "tooltip": "When not 1.0, an extra model pass without audio conditioning is done: slower inference but more motion is allowed"}),
128
+ "audio_start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "The percent of the video to start applying audio conditioning"}),
129
+ "audio_end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "The percent of the video to stop applying audio conditioning"})
130
+ },
131
+ "optional" : {
132
+ "whisper_model": ("WHISPERMODEL",),
133
+ "vae": ("WANVAE", ),
134
+ "reference_images": ("IMAGE", {"tooltip": "reference images for the humo model"}),
135
+ "audio": ("AUDIO",),
136
+ "tiled_vae": ("BOOLEAN", {"default": False, "tooltip": "Use tiled VAE encoding for reduced memory use"}),
137
+ }
138
+ }
139
+
140
+ RETURN_TYPES = ("WANVIDIMAGE_EMBEDS", )
141
+ RETURN_NAMES = ("image_embeds", )
142
+ FUNCTION = "process"
143
+ CATEGORY = "WanVideoWrapper"
144
+
145
+ def process(self, num_frames, width, height, audio_scale, audio_cfg_scale, audio_start_percent, audio_end_percent, whisper_model=None, vae=None, reference_images=None, audio=None, tiled_vae=False):
146
+ if reference_images is not None and vae is None:
147
+ raise ValueError("VAE is required when reference images are provided")
148
+ if whisper_model is None and audio is not None:
149
+ raise ValueError("Whisper model is required when audio is provided")
150
+ model = whisper_model["model"]
151
+ feature_extractor = whisper_model["feature_extractor"]
152
+ dtype = whisper_model["dtype"]
153
+
154
+ sampling_rate = 16000
155
+
156
+ if audio is not None:
157
+ audio_input = audio["waveform"][0]
158
+ sample_rate = audio["sample_rate"]
159
+
160
+ if sample_rate != sampling_rate:
161
+ audio_input = torchaudio.functional.resample(audio_input, sample_rate, sampling_rate)
162
+ if audio_input.shape[1] == 2:
163
+ audio_input = audio_input.mean(dim=0, keepdim=False)
164
+ else:
165
+ audio_input = audio_input[0]
166
+
167
+ model.to(device)
168
+ audio_len = len(audio_input) // 640
169
+
170
+ # feature extraction
171
+ audio_features = []
172
+ window = 750*640
173
+ for i in range(0, len(audio_input), window):
174
+ audio_feature = feature_extractor(audio_input[i:i+window], sampling_rate=sampling_rate, return_tensors="pt").input_features
175
+ audio_features.append(audio_feature)
176
+ audio_features = torch.cat(audio_features, dim=-1).to(device, dtype)
177
+
178
+ # preprocess
179
+ window = 3000
180
+ audio_prompts = []
181
+ for i in range(0, audio_features.shape[-1], window):
182
+ audio_prompt = model.encoder(audio_features[:,:,i:i+window], output_hidden_states=True).hidden_states
183
+ audio_prompt = torch.stack(audio_prompt, dim=2)
184
+ audio_prompts.append(audio_prompt)
185
+
186
+ model.to(offload_device)
187
+
188
+ audio_prompts = torch.cat(audio_prompts, dim=1)
189
+ audio_prompts = audio_prompts[:,:audio_len*2]
190
+
191
+ feat0 = linear_interpolation_fps(audio_prompts[:, :, 0: 8].mean(dim=2), 50, 25)
192
+ feat1 = linear_interpolation_fps(audio_prompts[:, :, 8: 16].mean(dim=2), 50, 25)
193
+ feat2 = linear_interpolation_fps(audio_prompts[:, :, 16: 24].mean(dim=2), 50, 25)
194
+ feat3 = linear_interpolation_fps(audio_prompts[:, :, 24: 32].mean(dim=2), 50, 25)
195
+ feat4 = linear_interpolation_fps(audio_prompts[:, :, 32], 50, 25)
196
+ audio_emb = torch.stack([feat0, feat1, feat2, feat3, feat4], dim=2)[0] # [T, 5, 1280]
197
+ else:
198
+ audio_emb = torch.zeros(num_frames, 5, 1280, device=device)
199
+ audio_len = num_frames
200
+
201
+ pixel_frame_num = num_frames if num_frames != -1 else audio_len
202
+ pixel_frame_num = 4 * ((pixel_frame_num - 1) // 4) + 1
203
+ latent_frame_num = (pixel_frame_num - 1) // 4 + 1
204
+
205
+ log.info(f"HuMo set to generate {pixel_frame_num} frames")
206
+
207
+ #audio_emb, _ = get_audio_emb_window(audio_emb, pixel_frame_num, frame0_idx=0)
208
+
209
+ num_refs = 0
210
+ if reference_images is not None:
211
+ if reference_images.shape[1] != height or reference_images.shape[2] != width:
212
+ reference_images_in = common_upscale(reference_images.movedim(-1, 1), width, height, "lanczos", "disabled").movedim(1, -1)
213
+ else:
214
+ reference_images_in = reference_images
215
+ samples, = WanVideoEncodeLatentBatch.encode(self, vae, reference_images_in, tiled_vae, None, None, None, None)
216
+ samples = samples["samples"].transpose(0, 2).squeeze(0)
217
+ num_refs = samples.shape[1]
218
+
219
+ vae.to(device)
220
+ zero_frames = torch.zeros(1, 3, pixel_frame_num + 4*num_refs, height, width, device=device, dtype=vae.dtype)
221
+ zero_latents = vae.encode(zero_frames, device=device, tiled=tiled_vae)[0].to(offload_device)
222
+
223
+ vae.to(offload_device)
224
+ mm.soft_empty_cache()
225
+
226
+ target_shape = (16, latent_frame_num + num_refs, height // 8, width // 8)
227
+
228
+ mask = torch.ones(4, target_shape[1], target_shape[2], target_shape[3], device=offload_device, dtype=vae.dtype)
229
+ if reference_images is not None:
230
+ mask[:,:-num_refs] = 0
231
+ image_cond = torch.cat([zero_latents[:, :(target_shape[1]-num_refs)], samples], dim=1)
232
+ #zero_audio_pad = torch.zeros(num_refs, *audio_emb.shape[1:]).to(audio_emb.device)
233
+ #audio_emb = torch.cat([audio_emb, zero_audio_pad], dim=0)
234
+ else:
235
+ image_cond = zero_latents
236
+ mask = torch.zeros_like(mask)
237
+ image_cond = torch.cat([mask, image_cond], dim=0)
238
+ image_cond_neg = torch.cat([mask, zero_latents], dim=0)
239
+
240
+ embeds = {
241
+ "humo_audio_emb": audio_emb,
242
+ "humo_audio_emb_neg": torch.zeros_like(audio_emb, dtype=audio_emb.dtype, device=audio_emb.device),
243
+ "humo_image_cond": image_cond,
244
+ "humo_image_cond_neg": image_cond_neg,
245
+ "humo_reference_count": num_refs,
246
+ "target_shape": target_shape,
247
+ "num_frames": pixel_frame_num,
248
+ "humo_audio_scale": audio_scale,
249
+ "humo_audio_cfg_scale": audio_cfg_scale,
250
+ "humo_start_percent": audio_start_percent,
251
+ "humo_end_percent": audio_end_percent,
252
+ }
253
+
254
+ return (embeds, )
255
+
256
+ class WanVideoCombineEmbeds:
257
+ @classmethod
258
+ def INPUT_TYPES(s):
259
+ return {"required": {
260
+ "embeds_1": ("WANVIDIMAGE_EMBEDS",),
261
+ "embeds_2": ("WANVIDIMAGE_EMBEDS",),
262
+ }
263
+ }
264
+
265
+ RETURN_TYPES = ("WANVIDIMAGE_EMBEDS",)
266
+ RETURN_NAMES = ("image_embeds",)
267
+ FUNCTION = "add"
268
+ CATEGORY = "WanVideoWrapper"
269
+ EXPERIMENTAL = True
270
+
271
+ def add(self, embeds_1, embeds_2):
272
+ # Combine the two sets of embeds
273
+ combined = {**embeds_1, **embeds_2}
274
+ return (combined,)
275
+
276
+
277
+ NODE_CLASS_MAPPINGS = {
278
+ "WhisperModelLoader": WhisperModelLoader,
279
+ "HuMoEmbeds": HuMoEmbeds,
280
+ "WanVideoCombineEmbeds": WanVideoCombineEmbeds,
281
+ }
282
+
283
+ NODE_DISPLAY_NAME_MAPPINGS = {
284
+ "WhisperModelLoader": "Whisper Model Loader",
285
+ "HuMoEmbeds": "HuMo Embeds",
286
+ "WanVideoCombineEmbeds": "WanVideo Combine Embeds",
287
+ }
HuMo/whisper_config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai/whisper-large-v3",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "apply_spec_augment": false,
6
+ "architectures": [
7
+ "WhisperForConditionalGeneration"
8
+ ],
9
+ "attention_dropout": 0.0,
10
+ "begin_suppress_tokens": [
11
+ 220,
12
+ 50257
13
+ ],
14
+ "bos_token_id": 50257,
15
+ "classifier_proj_size": 256,
16
+ "d_model": 1280,
17
+ "decoder_attention_heads": 20,
18
+ "decoder_ffn_dim": 5120,
19
+ "decoder_layerdrop": 0.0,
20
+ "decoder_layers": 32,
21
+ "decoder_start_token_id": 50258,
22
+ "dropout": 0.0,
23
+ "encoder_attention_heads": 20,
24
+ "encoder_ffn_dim": 5120,
25
+ "encoder_layerdrop": 0.0,
26
+ "encoder_layers": 32,
27
+ "eos_token_id": 50257,
28
+ "init_std": 0.02,
29
+ "is_encoder_decoder": true,
30
+ "mask_feature_length": 10,
31
+ "mask_feature_min_masks": 0,
32
+ "mask_feature_prob": 0.0,
33
+ "mask_time_length": 10,
34
+ "mask_time_min_masks": 2,
35
+ "mask_time_prob": 0.05,
36
+ "max_length": 448,
37
+ "max_source_positions": 1500,
38
+ "max_target_positions": 448,
39
+ "median_filter_width": 7,
40
+ "model_type": "whisper",
41
+ "num_hidden_layers": 32,
42
+ "num_mel_bins": 128,
43
+ "pad_token_id": 50256,
44
+ "scale_embedding": false,
45
+ "torch_dtype": "float16",
46
+ "transformers_version": "4.36.0.dev0",
47
+ "use_cache": true,
48
+ "use_weighted_layer_sum": false,
49
+ "vocab_size": 51866
50
+ }
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
MTV/data/mean.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ababeaabf5ac096ce7c7714ada14aa1de8355c0016de25695be611d51285141
3
+ size 416
MTV/data/std.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:650e46902a0878e6947be401e4e1995e54a8fd407f2be3ded0dda62bda99a9b3
3
+ size 416
MTV/draw_pose.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import math
3
+ import torch
4
+ import numpy as np
5
+ from PIL import Image
6
+ from torchvision import transforms
7
+
8
+
9
+ def intrinsic_matrix_from_field_of_view(imshape, fov_degrees:float =55 ): # nlf default fov_degrees 55
10
+ imshape = np.array(imshape)
11
+ fov_radians = fov_degrees * np.array(np.pi / 180)
12
+ larger_side = np.max(imshape)
13
+ focal_length = larger_side / (np.tan(fov_radians / 2) * 2)
14
+ # intrinsic_matrix 3*3
15
+ return np.array([
16
+ [focal_length, 0, imshape[1] / 2],
17
+ [0, focal_length, imshape[0] / 2],
18
+ [0, 0, 1],
19
+ ])
20
+
21
+
22
+ def p3d_to_p2d(point_3d, height, width): # point3d n*1024*3
23
+ camera_matrix = intrinsic_matrix_from_field_of_view((height,width))
24
+ camera_matrix = np.expand_dims(camera_matrix, axis=0)
25
+ camera_matrix = np.expand_dims(camera_matrix, axis=0) # 1*1*3*3
26
+ point_3d = np.expand_dims(point_3d,axis=-1) # n*1024*3*1
27
+ point_2d = (camera_matrix@point_3d).squeeze(-1)
28
+ point_2d[:,:,:2] = point_2d[:,:,:2]/point_2d[:,:,2:3]
29
+ return point_2d[:,:,:] # n*1024*2
30
+
31
+
32
+ def get_pose_images(smpl_data, offset):
33
+ pose_images = []
34
+ for data in smpl_data:
35
+ if isinstance(data, np.ndarray):
36
+ joints3d = data
37
+ else:
38
+ joints3d = data.numpy()
39
+ canvas = np.zeros(shape=(offset[0], offset[1], 3), dtype=np.uint8)
40
+ joints3d = p3d_to_p2d(joints3d, offset[0], offset[1])
41
+ canvas = draw_3d_points(canvas, joints3d[0], stickwidth=int(offset[1]/350))
42
+ pose_images.append(Image.fromarray(canvas))
43
+ return pose_images
44
+
45
+
46
+ def get_control_conditions(poses, h, w):
47
+ video_transforms = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], inplace=True)
48
+ control_images = []
49
+ for idx, pose in enumerate(poses):
50
+ canvas = np.zeros(shape=(h, w, 3), dtype=np.uint8)
51
+ try:
52
+ joints3d = p3d_to_p2d(pose, h, w)
53
+ canvas = draw_3d_points(
54
+ canvas,
55
+ joints3d[0],
56
+ stickwidth=int(h / 350),
57
+ )
58
+ resized_canvas = cv2.resize(canvas, (w, h))
59
+ # Image.fromarray(resized_canvas).save(f'tmp/{idx}_pose.jpg')
60
+ control_images.append(resized_canvas)
61
+ except Exception as e:
62
+ print("wrong:", e)
63
+ control_images.append(Image.fromarray(canvas))
64
+ control_pixel_values = np.array(control_images)
65
+ control_pixel_values = torch.from_numpy(control_pixel_values).contiguous() / 255.
66
+ print("control_pixel_values.shape", control_pixel_values.shape)
67
+ #control_pixel_values = video_transforms(control_pixel_values)
68
+ return control_pixel_values
69
+
70
+
71
+ def draw_3d_points(canvas, points, stickwidth=2, r=2, draw_line=True):
72
+ colors = [
73
+ [255, 0, 0], # 0
74
+ [0, 255, 0], # 1
75
+ [0, 0, 255], # 2
76
+ [255, 0, 255], # 3
77
+ [255, 255, 0], # 4
78
+ [85, 255, 0], # 5
79
+ [0, 75, 255], # 6
80
+ [0, 255, 85], # 7
81
+ [0, 255, 170], # 8
82
+ [170, 0, 255], # 9
83
+ [85, 0, 255], # 10
84
+ [0, 85, 255], # 11
85
+ [0, 255, 255], # 12
86
+ [85, 0, 255], # 13
87
+ [170, 0, 255], # 14
88
+ [255, 0, 255], # 15
89
+ [255, 0, 170], # 16
90
+ [255, 0, 85], # 17
91
+ ]
92
+ connetions = [
93
+ [15,12],[12, 16],[16, 18],[18, 20],[20, 22],
94
+ [12,17],[17,19],[19,21],
95
+ [21,23],[12,9],[9,6],
96
+ [6,3],[3,0],[0,1],
97
+ [1,4],[4,7],[7,10],[0,2],[2,5],[5,8],[8,11]
98
+ ]
99
+ connection_colors = [
100
+ [255, 0, 0], # 0
101
+ [0, 255, 0], # 1
102
+ [0, 0, 255], # 2
103
+ [255, 255, 0], # 3
104
+ [255, 0, 255], # 4
105
+ [0, 255, 0], # 5
106
+ [0, 85, 255], # 6
107
+ [255, 175, 0], # 7
108
+ [0, 0, 255], # 8
109
+ [255, 85, 0], # 9
110
+ [0, 255, 85], # 10
111
+ [255, 0, 255], # 11
112
+ [255, 0, 0], # 12
113
+ [0, 175, 255], # 13
114
+ [255, 255, 0], # 14
115
+ [0, 0, 255], # 15
116
+ [0, 255, 0], # 16
117
+ ]
118
+
119
+ # draw point
120
+ for i in range(len(points)):
121
+ x,y = points[i][0:2]
122
+ x,y = int(x),int(y)
123
+ if i==13 or i == 14:
124
+ continue
125
+ cv2.circle(canvas, (x, y), r, colors[i%17], thickness=-1)
126
+
127
+ # draw line
128
+ if draw_line:
129
+ for i in range(len(connetions)):
130
+ point1_idx,point2_idx = connetions[i][0:2]
131
+ point1 = points[point1_idx]
132
+ point2 = points[point2_idx]
133
+ Y = [point2[0],point1[0]]
134
+ X = [point2[1],point1[1]]
135
+ mX = int(np.mean(X))
136
+ mY = int(np.mean(Y))
137
+ length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
138
+ angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
139
+ polygon = cv2.ellipse2Poly((mY, mX), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
140
+ cv2.fillConvexPoly(canvas, polygon, connection_colors[i%17])
141
+
142
+ return canvas
MTV/motion4d/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .vqvae import SMPL_VQVAE, VectorQuantizer, Encoder, Decoder
MTV/motion4d/vqvae.py ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ import numpy as np
5
+
6
+
7
+ class Encoder(nn.Module):
8
+ def __init__(
9
+ self,
10
+ in_channels=3,
11
+ mid_channels=[128, 512],
12
+ out_channels=3072,
13
+ downsample_time=[1, 1],
14
+ downsample_joint=[1, 1],
15
+ num_attention_heads=8,
16
+ attention_head_dim=64,
17
+ dim=3072,
18
+ ):
19
+ super(Encoder, self).__init__()
20
+
21
+ self.conv_in = nn.Conv2d(in_channels, mid_channels[0], kernel_size=3, stride=1, padding=1)
22
+ self.resnet1 = nn.ModuleList([ResBlock(mid_channels[0], mid_channels[0]) for _ in range(3)])
23
+ self.downsample1 = Downsample(mid_channels[0], mid_channels[0], downsample_time[0], downsample_joint[0])
24
+ self.resnet2 = ResBlock(mid_channels[0], mid_channels[1])
25
+ self.resnet3 = nn.ModuleList([ResBlock(mid_channels[1], mid_channels[1]) for _ in range(3)])
26
+ self.downsample2 = Downsample(mid_channels[1], mid_channels[1], downsample_time[1], downsample_joint[1])
27
+ self.conv_out = nn.Conv2d(mid_channels[-1], out_channels, kernel_size=3, stride=1, padding=1)
28
+
29
+ def forward(self, x):
30
+ x = self.conv_in(x)
31
+ for resnet in self.resnet1:
32
+ x = resnet(x)
33
+ x = self.downsample1(x)
34
+
35
+ x = self.resnet2(x)
36
+ for resnet in self.resnet3:
37
+ x = resnet(x)
38
+ x = self.downsample2(x)
39
+
40
+ x = self.conv_out(x)
41
+
42
+ return x
43
+
44
+
45
+
46
+ class VectorQuantizer(nn.Module):
47
+ def __init__(self, nb_code, code_dim):
48
+ super().__init__()
49
+ self.nb_code = nb_code
50
+ self.code_dim = code_dim
51
+ self.mu = 0.99
52
+ self.reset_codebook()
53
+ self.reset_count = 0
54
+ self.usage = torch.zeros((self.nb_code, 1))
55
+
56
+ def reset_codebook(self):
57
+ self.init = False
58
+ self.code_sum = None
59
+ self.code_count = None
60
+ self.register_buffer('codebook', torch.zeros(self.nb_code, self.code_dim).cuda())
61
+
62
+ def _tile(self, x):
63
+ nb_code_x, code_dim = x.shape
64
+ if nb_code_x < self.nb_code:
65
+ n_repeats = (self.nb_code + nb_code_x - 1) // nb_code_x
66
+ std = 0.01 / np.sqrt(code_dim)
67
+ out = x.repeat(n_repeats, 1)
68
+ out = out + torch.randn_like(out) * std
69
+ else:
70
+ out = x
71
+ return out
72
+
73
+ def preprocess(self, x):
74
+ # [bs, c, f, j] -> [bs * f * j, c]
75
+ x = x.permute(0, 2, 3, 1).contiguous()
76
+ x = x.view(-1, x.shape[-1])
77
+ return x
78
+
79
+ def quantize(self, x):
80
+ # [bs * f * j, dim=3072]
81
+ # Calculate latent code x_l
82
+ k_w = self.codebook.t()
83
+ distance = torch.sum(x ** 2, dim=-1, keepdim=True) - 2 * torch.matmul(x, k_w) + torch.sum(k_w ** 2, dim=0, keepdim=True)
84
+ _, code_idx = torch.min(distance, dim=-1)
85
+ return code_idx
86
+
87
+ def dequantize(self, code_idx):
88
+ x = F.embedding(code_idx, self.codebook) # indexing: [bs * f * j, 32]
89
+ return x
90
+
91
+ def forward(self, x, return_vq=False):
92
+ bs, c, f, j = x.shape # SMPL data frames: [bs, 3072, f, j]
93
+
94
+ # Preprocess
95
+ x = self.preprocess(x)
96
+ # return x.view(bs, f*j, c).contiguous(), None
97
+ assert x.shape[-1] == self.code_dim
98
+
99
+ # quantize and dequantize through bottleneck
100
+ code_idx = self.quantize(x)
101
+ x_d = self.dequantize(code_idx)
102
+
103
+ # Loss
104
+ commit_loss = F.mse_loss(x, x_d.detach())
105
+
106
+ # Passthrough
107
+ x_d = x + (x_d - x).detach()
108
+
109
+ if return_vq:
110
+ return x_d.view(bs, f*j, c).contiguous(), commit_loss
111
+ # return (x_d, x_d.view(bs, f, j, c).permute(0, 3, 1, 2).contiguous()), commit_loss, perplexity
112
+
113
+ # Postprocess
114
+ x_d = x_d.view(bs, f, j, c).permute(0, 3, 1, 2).contiguous()
115
+
116
+ return x_d, commit_loss
117
+
118
+
119
+
120
+
121
+ class Decoder(nn.Module):
122
+ def __init__(
123
+ self,
124
+ in_channels=3072,
125
+ mid_channels=[512, 128],
126
+ out_channels=3,
127
+ upsample_rate=None,
128
+ frame_upsample_rate=[1.0, 1.0],
129
+ joint_upsample_rate=[1.0, 1.0],
130
+ dim=128,
131
+ attention_head_dim=64,
132
+ num_attention_heads=8,
133
+ ):
134
+ super(Decoder, self).__init__()
135
+
136
+ self.conv_in = nn.Conv2d(in_channels, mid_channels[0], kernel_size=3, stride=1, padding=1)
137
+ self.resnet1 = nn.ModuleList([ResBlock(mid_channels[0], mid_channels[0]) for _ in range(3)])
138
+ self.upsample1 = Upsample(mid_channels[0], mid_channels[0], frame_upsample_rate=frame_upsample_rate[0], joint_upsample_rate=joint_upsample_rate[0])
139
+ self.resnet2 = ResBlock(mid_channels[0], mid_channels[1])
140
+ self.resnet3 = nn.ModuleList([ResBlock(mid_channels[1], mid_channels[1]) for _ in range(3)])
141
+ self.upsample2 = Upsample(mid_channels[1], mid_channels[1], frame_upsample_rate=frame_upsample_rate[1], joint_upsample_rate=joint_upsample_rate[1])
142
+ self.conv_out = nn.Conv2d(mid_channels[-1], out_channels, kernel_size=3, stride=1, padding=1)
143
+
144
+ def forward(self, x):
145
+ x = self.conv_in(x)
146
+ for resnet in self.resnet1:
147
+ x = resnet(x)
148
+ x = self.upsample1(x)
149
+
150
+ x = self.resnet2(x)
151
+ for resnet in self.resnet3:
152
+ x = resnet(x)
153
+ x = self.upsample2(x)
154
+
155
+ x = self.conv_out(x)
156
+
157
+ return x
158
+
159
+
160
+ class Upsample(nn.Module):
161
+ def __init__(
162
+ self,
163
+ in_channels,
164
+ out_channels,
165
+ upsample_rate=None,
166
+ frame_upsample_rate=None,
167
+ joint_upsample_rate=None,
168
+ ):
169
+ super(Upsample, self).__init__()
170
+
171
+ self.upsampler = nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
172
+ self.upsample_rate = upsample_rate
173
+ self.frame_upsample_rate = frame_upsample_rate
174
+ self.joint_upsample_rate = joint_upsample_rate
175
+ self.upsample_rate = upsample_rate
176
+
177
+ def forward(self, inputs):
178
+ if inputs.shape[2] > 1 and inputs.shape[2] % 2 == 1:
179
+ # split first frame
180
+ x_first, x_rest = inputs[:, :, 0], inputs[:, :, 1:]
181
+
182
+ if self.upsample_rate is not None:
183
+ # import pdb; pdb.set_trace()
184
+ x_first = F.interpolate(x_first, scale_factor=self.upsample_rate)
185
+ x_rest = F.interpolate(x_rest, scale_factor=self.upsample_rate)
186
+ else:
187
+ # import pdb; pdb.set_trace()
188
+ # x_first = F.interpolate(x_first, scale_factor=(self.frame_upsample_rate, self.joint_upsample_rate), mode="bilinear", align_corners=True)
189
+ x_rest = F.interpolate(x_rest, scale_factor=(self.frame_upsample_rate, self.joint_upsample_rate), mode="bilinear", align_corners=True)
190
+ x_first = x_first[:, :, None, :]
191
+ inputs = torch.cat([x_first, x_rest], dim=2)
192
+ elif inputs.shape[2] > 1:
193
+ if self.upsample_rate is not None:
194
+ inputs = F.interpolate(inputs, scale_factor=self.upsample_rate)
195
+ else:
196
+ inputs = F.interpolate(inputs, scale_factor=(self.frame_upsample_rate, self.joint_upsample_rate), mode="bilinear", align_corners=True)
197
+ else:
198
+ inputs = inputs.squeeze(2)
199
+ if self.upsample_rate is not None:
200
+ inputs = F.interpolate(inputs, scale_factor=self.upsample_rate)
201
+ else:
202
+ inputs = F.interpolate(inputs, scale_factor=(self.frame_upsample_rate, self.joint_upsample_rate), mode="linear", align_corners=True)
203
+ inputs = inputs[:, :, None, :, :]
204
+
205
+ b, c, t, j = inputs.shape
206
+ inputs = inputs.permute(0, 2, 1, 3).reshape(b * t, c, j)
207
+ inputs = self.upsampler(inputs)
208
+ inputs = inputs.reshape(b, t, *inputs.shape[1:]).permute(0, 2, 1, 3)
209
+
210
+ return inputs
211
+
212
+
213
+ class Downsample(nn.Module):
214
+ def __init__(
215
+ self,
216
+ in_channels,
217
+ out_channels,
218
+ frame_downsample_rate,
219
+ joint_downsample_rate
220
+ ):
221
+ super(Downsample, self).__init__()
222
+
223
+ self.frame_downsample_rate = frame_downsample_rate
224
+ self.joint_downsample_rate = joint_downsample_rate
225
+ self.joint_downsample = nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=self.joint_downsample_rate, padding=1)
226
+
227
+ def forward(self, x):
228
+ # (batch_size, channels, frames, joints) -> (batch_size * joints, channels, frames)
229
+ if self.frame_downsample_rate > 1:
230
+ batch_size, channels, frames, joints = x.shape
231
+ x = x.permute(0, 3, 1, 2).reshape(batch_size * joints, channels, frames)
232
+ if x.shape[-1] % 2 == 1:
233
+ x_first, x_rest = x[..., 0], x[..., 1:]
234
+ if x_rest.shape[-1] > 0:
235
+ # (batch_size * height * width, channels, frames - 1) -> (batch_size * height * width, channels, (frames - 1) // 2)
236
+ x_rest = F.avg_pool1d(x_rest, kernel_size=self.frame_downsample_rate, stride=self.frame_downsample_rate)
237
+
238
+ x = torch.cat([x_first[..., None], x_rest], dim=-1)
239
+ # (batch_size * joints, channels, (frames // 2) + 1) -> (batch_size, channels, (frames // 2) + 1, joints)
240
+ x = x.reshape(batch_size, joints, channels, x.shape[-1]).permute(0, 2, 3, 1)
241
+ else:
242
+ # (batch_size * joints, channels, frames) -> (batch_size * joints, channels, frames // 2)
243
+ x = F.avg_pool1d(x, kernel_size=2, stride=2)
244
+ # (batch_size * joints, channels, frames // 2) -> (batch_size, height, width, channels, frames // 2) -> (batch_size, channels, frames // 2, height, width)
245
+ x = x.reshape(batch_size, joints, channels, x.shape[-1]).permute(0, 2, 3, 1)
246
+
247
+ # Pad the tensor
248
+ # pad = (0, 1)
249
+ # x = F.pad(x, pad, mode="constant", value=0)
250
+ batch_size, channels, frames, joints = x.shape
251
+ # (batch_size, channels, frames, joints) -> (batch_size * frames, channels, joints)
252
+ x = x.permute(0, 2, 1, 3).reshape(batch_size * frames, channels, joints)
253
+ x = self.joint_downsample(x)
254
+ # (batch_size * frames, channels, joints) -> (batch_size, channels, frames, joints)
255
+ x = x.reshape(batch_size, frames, x.shape[1], x.shape[2]).permute(0, 2, 1, 3)
256
+ return x
257
+
258
+
259
+
260
+ class ResBlock(nn.Module):
261
+ def __init__(self,
262
+ in_channels,
263
+ out_channels,
264
+ group_num=32,
265
+ max_channels=512):
266
+ super(ResBlock, self).__init__()
267
+ skip = max(1, max_channels // out_channels - 1)
268
+ self.block = nn.Sequential(
269
+ nn.GroupNorm(group_num, in_channels, eps=1e-06, affine=True),
270
+ nn.SiLU(),
271
+ nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=skip, dilation=skip),
272
+ nn.GroupNorm(group_num, out_channels, eps=1e-06, affine=True),
273
+ nn.SiLU(),
274
+ nn.Conv2d(out_channels, out_channels, kernel_size=1, stride=1, padding=0),
275
+ )
276
+ self.conv_short = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0) if in_channels != out_channels else nn.Identity()
277
+
278
+ def forward(self, x):
279
+ hidden_states = self.block(x)
280
+ if hidden_states.shape != x.shape:
281
+ x = self.conv_short(x)
282
+ x = x + hidden_states
283
+ return x
284
+
285
+
286
+
287
+ class SMPL_VQVAE(nn.Module):
288
+ def __init__(self, encoder, decoder, vq):
289
+ super(SMPL_VQVAE, self).__init__()
290
+
291
+ self.encoder = encoder
292
+ self.decoder = decoder
293
+ self.vq = vq
294
+
295
+ def to(self, device):
296
+ self.encoder = self.encoder.to(device)
297
+ self.decoder = self.decoder.to(device)
298
+ self.vq = self.vq.to(device)
299
+ self.device = device
300
+ return self
301
+
302
+ def encdec_slice_frames(self, x, frame_batch_size, encdec, return_vq):
303
+ num_frames = x.shape[2]
304
+ remaining_frames = num_frames % frame_batch_size
305
+ x_output = []
306
+
307
+ for i in range(num_frames // frame_batch_size):
308
+ remaining_frames = num_frames % frame_batch_size
309
+ start_frame = frame_batch_size * i + (0 if i == 0 else remaining_frames)
310
+ end_frame = frame_batch_size * (i + 1) + remaining_frames
311
+ x_intermediate = x[:, :, start_frame:end_frame]
312
+ x_intermediate = encdec(x_intermediate)
313
+ x_output.append(x_intermediate)
314
+ if encdec == self.encoder and self.vq is not None:
315
+ x_output, loss = self.vq(torch.cat(x_output, dim=2), return_vq=return_vq)
316
+ return x_output, loss
317
+ else:
318
+ return torch.cat(x_output, dim=2), None, None
319
+
320
+ def forward(self, x, return_vq=False):
321
+ x = x.permute(0, 3, 1, 2)
322
+ x, loss = self.encdec_slice_frames(x, frame_batch_size=8, encdec=self.encoder, return_vq=return_vq)
323
+
324
+ if return_vq:
325
+ return x, loss
326
+ x, _, _ = self.encdec_slice_frames(x, frame_batch_size=2, encdec=self.decoder, return_vq=return_vq)
327
+ x = x.permute(0, 2, 3, 1)
328
+
329
+ return x, loss
MTV/mtv.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+ from typing import Union, Tuple
4
+
5
+
6
+ def get_1d_rotary_pos_embed(
7
+ dim: int,
8
+ pos: Union[np.ndarray, int],
9
+ theta: float = 10000.0,
10
+ use_real=False,
11
+ linear_factor=1.0,
12
+ ntk_factor=1.0,
13
+ repeat_interleave_real=True,
14
+ freqs_dtype=torch.float32, # torch.float32, torch.float64 (flux)
15
+ ):
16
+ """
17
+ Precompute the frequency tensor for complex exponentials (cis) with given dimensions.
18
+
19
+ This function calculates a frequency tensor with complex exponentials using the given dimension 'dim' and the end
20
+ index 'end'. The 'theta' parameter scales the frequencies. The returned tensor contains complex values in complex64
21
+ data type.
22
+
23
+ Args:
24
+ dim (`int`): Dimension of the frequency tensor.
25
+ pos (`np.ndarray` or `int`): Position indices for the frequency tensor. [S] or scalar
26
+ theta (`float`, *optional*, defaults to 10000.0):
27
+ Scaling factor for frequency computation. Defaults to 10000.0.
28
+ use_real (`bool`, *optional*):
29
+ If True, return real part and imaginary part separately. Otherwise, return complex numbers.
30
+ linear_factor (`float`, *optional*, defaults to 1.0):
31
+ Scaling factor for the context extrapolation. Defaults to 1.0.
32
+ ntk_factor (`float`, *optional*, defaults to 1.0):
33
+ Scaling factor for the NTK-Aware RoPE. Defaults to 1.0.
34
+ repeat_interleave_real (`bool`, *optional*, defaults to `True`):
35
+ If `True` and `use_real`, real part and imaginary part are each interleaved with themselves to reach `dim`.
36
+ Otherwise, they are concateanted with themselves.
37
+ freqs_dtype (`torch.float32` or `torch.float64`, *optional*, defaults to `torch.float32`):
38
+ the dtype of the frequency tensor.
39
+ Returns:
40
+ `torch.Tensor`: Precomputed frequency tensor with complex exponentials. [S, D/2]
41
+ """
42
+ assert dim % 2 == 0
43
+
44
+ if isinstance(pos, int):
45
+ pos = torch.arange(pos)
46
+ if isinstance(pos, np.ndarray):
47
+ pos = torch.from_numpy(pos) # type: ignore # [S]
48
+
49
+ theta = theta * ntk_factor
50
+ freqs = (
51
+ 1.0
52
+ / (theta ** (torch.arange(0, dim, 2, dtype=freqs_dtype, device=pos.device)[: (dim // 2)] / dim))
53
+ / linear_factor
54
+ ) # [D/2]
55
+ freqs = torch.outer(pos, freqs) # type: ignore # [S, D/2]
56
+ if use_real and repeat_interleave_real:
57
+ freqs_cos = freqs.cos().repeat_interleave(2, dim=1).float() # [S, D]
58
+ freqs_sin = freqs.sin().repeat_interleave(2, dim=1).float() # [S, D]
59
+ return freqs_cos, freqs_sin
60
+ elif use_real:
61
+ freqs_cos = torch.cat([freqs.cos(), freqs.cos()], dim=-1).float() # [S, D]
62
+ freqs_sin = torch.cat([freqs.sin(), freqs.sin()], dim=-1).float() # [S, D]
63
+ return freqs_cos, freqs_sin
64
+ else:
65
+ freqs_cis = torch.polar(torch.ones_like(freqs), freqs) # complex64 # [S, D/2]
66
+ return freqs_cis
67
+
68
+
69
+ def get_3d_rotary_pos_embed(
70
+ embed_dim, crops_coords, grid_size, temporal_size, theta: int = 10000, use_real: bool = True
71
+ ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
72
+ """
73
+ RoPE for video tokens with 3D structure.
74
+
75
+ Args:
76
+ embed_dim: (`int`):
77
+ The embedding dimension size, corresponding to hidden_size_head.
78
+ crops_coords (`Tuple[int]`):
79
+ The top-left and bottom-right coordinates of the crop.
80
+ grid_size (`Tuple[int]`):
81
+ The grid size of the spatial positional embedding (height, width).
82
+ temporal_size (`int`):
83
+ The size of the temporal dimension.
84
+ theta (`float`):
85
+ Scaling factor for frequency computation.
86
+
87
+ Returns:
88
+ `torch.Tensor`: positional embedding with shape `(temporal_size * grid_size[0] * grid_size[1], embed_dim/2)`.
89
+ """
90
+ if use_real is not True:
91
+ raise ValueError(" `use_real = False` is not currently supported for get_3d_rotary_pos_embed")
92
+ start, stop = crops_coords
93
+ grid_size_h, grid_size_w = grid_size
94
+ grid_h = np.linspace(start[0], stop[0], grid_size_h, endpoint=False, dtype=np.float32)
95
+ grid_w = np.linspace(start[1], stop[1], grid_size_w, endpoint=False, dtype=np.float32)
96
+ grid_t = np.linspace(0, temporal_size, temporal_size, endpoint=False, dtype=np.float32)
97
+
98
+ # Compute dimensions for each axis
99
+ dim_t = embed_dim // 4
100
+ dim_h = embed_dim // 8 * 3
101
+ dim_w = embed_dim // 8 * 3
102
+
103
+ # Temporal frequencies
104
+ freqs_t = get_1d_rotary_pos_embed(dim_t, grid_t, use_real=True)
105
+ # Spatial frequencies for height and width
106
+ freqs_h = get_1d_rotary_pos_embed(dim_h, grid_h, use_real=True)
107
+ freqs_w = get_1d_rotary_pos_embed(dim_w, grid_w, use_real=True)
108
+
109
+ # BroadCast and concatenate temporal and spaial frequencie (height and width) into a 3d tensor
110
+ def combine_time_height_width(freqs_t, freqs_h, freqs_w):
111
+ freqs_t = freqs_t[:, None, None, :].expand(
112
+ -1, grid_size_h, grid_size_w, -1
113
+ ) # temporal_size, grid_size_h, grid_size_w, dim_t
114
+ freqs_h = freqs_h[None, :, None, :].expand(
115
+ temporal_size, -1, grid_size_w, -1
116
+ ) # temporal_size, grid_size_h, grid_size_2, dim_h
117
+ freqs_w = freqs_w[None, None, :, :].expand(
118
+ temporal_size, grid_size_h, -1, -1
119
+ ) # temporal_size, grid_size_h, grid_size_2, dim_w
120
+
121
+ freqs = torch.cat(
122
+ [freqs_t, freqs_h, freqs_w], dim=-1
123
+ ) # temporal_size, grid_size_h, grid_size_w, (dim_t + dim_h + dim_w)
124
+ freqs = freqs.view(
125
+ temporal_size * grid_size_h * grid_size_w, -1
126
+ ) # (temporal_size * grid_size_h * grid_size_w), (dim_t + dim_h + dim_w)
127
+ return freqs
128
+
129
+ t_cos, t_sin = freqs_t # both t_cos and t_sin has shape: temporal_size, dim_t
130
+ h_cos, h_sin = freqs_h # both h_cos and h_sin has shape: grid_size_h, dim_h
131
+ w_cos, w_sin = freqs_w # both w_cos and w_sin has shape: grid_size_w, dim_w
132
+ cos = combine_time_height_width(t_cos, h_cos, w_cos)
133
+ sin = combine_time_height_width(t_sin, h_sin, w_sin)
134
+ return cos, sin
135
+
136
+
137
+ def get_3d_motion_spatial_embed(
138
+ embed_dim: int, num_joints: int, joints_mean: np.ndarray, joints_std: np.ndarray, theta: float = 10000.0
139
+ ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
140
+ assert embed_dim % 2 == 0 and embed_dim % 3 == 0
141
+
142
+ def create_rope_pe(dim, pos, freqs_dtype=torch.float32):
143
+ if isinstance(pos, np.ndarray):
144
+ pos = torch.from_numpy(pos)
145
+ freqs = (
146
+ 1.0
147
+ / (theta ** (torch.arange(0, dim, 2, dtype=freqs_dtype, device=pos.device)[: (dim // 2)] / dim))
148
+ ) # [D/2]
149
+ freqs = torch.outer(pos, freqs) # type: ignore # [S, D/2]
150
+ freqs_cos = freqs.cos().repeat_interleave(2, dim=1).float() # [S, D]
151
+ freqs_sin = freqs.sin().repeat_interleave(2, dim=1).float() # [S, D]
152
+ return freqs_cos, freqs_sin
153
+
154
+ pos_x = joints_mean[:, 0]
155
+ pos_y = joints_mean[:, 1]
156
+ pos_z = joints_mean[:, 2]
157
+
158
+ normalized_pos_x = (pos_x - pos_x.mean())
159
+ normalized_pos_y = (pos_y - pos_y.mean())
160
+ normalized_pos_z = (pos_z - pos_z.mean())
161
+
162
+ freqs_cos_x, freqs_sin_x = create_rope_pe(embed_dim // 3, normalized_pos_x)
163
+ freqs_cos_y, freqs_sin_y = create_rope_pe(embed_dim // 3, normalized_pos_y)
164
+ freqs_cos_z, freqs_sin_z = create_rope_pe(embed_dim // 3, normalized_pos_z)
165
+
166
+ freqs_cos = torch.cat([freqs_cos_x, freqs_cos_y, freqs_cos_z], dim=-1)
167
+ freqs_sin = torch.cat([freqs_sin_x, freqs_sin_y, freqs_sin_z], dim=-1)
168
+
169
+ return freqs_cos, freqs_sin
170
+
171
+ def prepare_motion_embeddings(num_frames, num_joints, joints_mean, joints_std, theta=10000, device='cuda'):
172
+ time_embed = get_1d_rotary_pos_embed(44, num_frames, theta, use_real=True)
173
+ time_embed_cos = time_embed[0][:, None, :].expand(-1, num_joints, -1).reshape(num_frames*num_joints, -1)
174
+ time_embed_sin = time_embed[1][:, None, :].expand(-1, num_joints, -1).reshape(num_frames*num_joints, -1)
175
+ spatial_motion_embed = get_3d_motion_spatial_embed(84, num_joints, joints_mean, joints_std, theta)
176
+ spatial_embed_cos = spatial_motion_embed[0][None, :, :].expand(num_frames, -1, -1).reshape(num_frames*num_joints, -1)
177
+ spatial_embed_sin = spatial_motion_embed[1][None, :, :].expand(num_frames, -1, -1).reshape(num_frames*num_joints, -1)
178
+ motion_embed_cos = torch.cat([time_embed_cos, spatial_embed_cos], dim=-1).to(device=device)
179
+ motion_embed_sin = torch.cat([time_embed_sin, spatial_embed_sin], dim=-1).to(device=device)
180
+ return motion_embed_cos, motion_embed_sin
181
+
182
+ def apply_rotary_emb(x, freqs_cis):
183
+ cos, sin = freqs_cis # [S, D]
184
+ cos = cos[None, None]
185
+ sin = sin[None, None]
186
+ cos, sin = cos.to(x.device), sin.to(x.device)
187
+
188
+ x_real, x_imag = x.reshape(*x.shape[:-1], -1, 2).unbind(-1) # [B, S, H, D//2]
189
+ x_rotated = torch.stack([-x_imag, x_real], dim=-1).flatten(3)
190
+
191
+ out = (x.float() * cos + x_rotated.float() * sin).to(x.dtype)
192
+
193
+ return out
MTV/nlf.py ADDED
File without changes
MTV/nodes.py ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import gc
4
+ from ..utils import log, dict_to_device
5
+ import numpy as np
6
+ from accelerate import init_empty_weights
7
+ from accelerate.utils import set_module_tensor_to_device
8
+
9
+ import comfy.model_management as mm
10
+ from comfy.utils import load_torch_file
11
+ import folder_paths
12
+
13
+ script_directory = os.path.dirname(os.path.abspath(__file__))
14
+ device = mm.get_torch_device()
15
+ offload_device = mm.unet_offload_device()
16
+
17
+ local_model_path = os.path.join(folder_paths.models_dir, "nlf", "nlf_l_multi_0.3.2.torchscript")
18
+
19
+ from .motion4d import SMPL_VQVAE, VectorQuantizer, Encoder, Decoder
20
+ from .mtv import prepare_motion_embeddings
21
+
22
+ class DownloadAndLoadNLFModel:
23
+ @classmethod
24
+ def INPUT_TYPES(s):
25
+ return {
26
+ "required": {
27
+ "url": (
28
+ [
29
+ "https://github.com/isarandi/nlf/releases/download/v0.3.2/nlf_l_multi_0.3.2.torchscript"
30
+ ],
31
+ )
32
+ },
33
+ }
34
+
35
+ RETURN_TYPES = ("NLFMODEL",)
36
+ RETURN_NAMES = ("nlf_model", )
37
+ FUNCTION = "loadmodel"
38
+ CATEGORY = "WanVideoWrapper"
39
+
40
+ def loadmodel(self, url):
41
+
42
+ if not os.path.exists(local_model_path):
43
+ log.info(f"Downloading NLF model to: {local_model_path}")
44
+ import requests
45
+ os.makedirs(os.path.dirname(local_model_path), exist_ok=True)
46
+ response = requests.get(url)
47
+ if response.status_code == 200:
48
+ with open(local_model_path, "wb") as f:
49
+ f.write(response.content)
50
+ else:
51
+ print("Failed to download file:", response.status_code)
52
+
53
+ model = torch.jit.load(local_model_path).eval()
54
+
55
+ return (model,)
56
+
57
+ class LoadNLFModel:
58
+ @classmethod
59
+ def INPUT_TYPES(s):
60
+ return {
61
+ "required": {
62
+ "path": ("STRING", {"default": local_model_path}),
63
+ },
64
+ }
65
+
66
+ RETURN_TYPES = ("NLFMODEL",)
67
+ RETURN_NAMES = ("nlf_model", )
68
+ FUNCTION = "loadmodel"
69
+ CATEGORY = "WanVideoWrapper"
70
+
71
+ def loadmodel(self, path):
72
+ model = torch.jit.load(path).eval()
73
+
74
+ return model,
75
+
76
+ class LoadVQVAE:
77
+ @classmethod
78
+ def INPUT_TYPES(s):
79
+ return {
80
+ "required": {
81
+ "model_name": (folder_paths.get_filename_list("vae"), {"tooltip": "These models are loaded from 'ComfyUI/models/vae'"}),
82
+ },
83
+ }
84
+
85
+ RETURN_TYPES = ("VQVAE",)
86
+ RETURN_NAMES = ("vqvae", )
87
+ FUNCTION = "loadmodel"
88
+ CATEGORY = "WanVideoWrapper"
89
+
90
+ def loadmodel(self, model_name):
91
+ model_path = folder_paths.get_full_path("vae", model_name)
92
+ vae_sd = load_torch_file(model_path, safe_load=True)
93
+
94
+ # Get motion tokenizer
95
+ motion_encoder = Encoder(
96
+ in_channels=3,
97
+ mid_channels=[128, 512],
98
+ out_channels=3072,
99
+ downsample_time=[2, 2],
100
+ downsample_joint=[1, 1]
101
+ )
102
+ motion_quant = VectorQuantizer(nb_code=8192, code_dim=3072)
103
+ motion_decoder = Decoder(
104
+ in_channels=3072,
105
+ mid_channels=[512, 128],
106
+ out_channels=3,
107
+ upsample_rate=2.0,
108
+ frame_upsample_rate=[2.0, 2.0],
109
+ joint_upsample_rate=[1.0, 1.0]
110
+ )
111
+
112
+ vqvae = SMPL_VQVAE(motion_encoder, motion_decoder, motion_quant).to(device)
113
+ vqvae.load_state_dict(vae_sd, strict=True)
114
+
115
+ return vqvae,
116
+
117
+ class MTVCrafterEncodePoses:
118
+ @classmethod
119
+ def INPUT_TYPES(s):
120
+ return {
121
+ "required": {
122
+ "vqvae": ("VQVAE", {"tooltip": "VQVAE model"}),
123
+ "poses": ("NLFPRED", {"tooltip": "Input poses for the model"}),
124
+ },
125
+ }
126
+
127
+ RETURN_TYPES = ("MTVCRAFTERMOTION", "NLFPRED")
128
+ RETURN_NAMES = ("mtvcrafter_motion", "pose_results")
129
+ FUNCTION = "encode"
130
+ CATEGORY = "WanVideoWrapper"
131
+
132
+ def encode(self, vqvae, poses):
133
+
134
+ # import pickle
135
+ # with open(os.path.join(script_directory, "data", "sampled_data.pkl"), 'rb') as f:
136
+ # data_list = pickle.load(f)
137
+ # if not isinstance(data_list, list):
138
+ # data_list = [data_list]
139
+ # print(data_list)
140
+
141
+ # smpl_poses = data_list[1]['pose']
142
+
143
+ global_mean = np.load(os.path.join(script_directory, "data", "mean.npy")) #global_mean.shape: (24, 3)
144
+ global_std = np.load(os.path.join(script_directory, "data", "std.npy"))
145
+
146
+ smpl_poses = []
147
+ for pose in poses['joints3d_nonparam'][0]:
148
+ smpl_poses.append(pose[0].cpu().numpy())
149
+ smpl_poses = np.array(smpl_poses)
150
+
151
+ norm_poses = torch.tensor((smpl_poses - global_mean) / global_std).unsqueeze(0)
152
+ print(f"norm_poses shape: {norm_poses.shape}, dtype: {norm_poses.dtype}")
153
+
154
+ vqvae.to(device)
155
+ motion_tokens, vq_loss = vqvae(norm_poses.to(device), return_vq=True)
156
+
157
+ recon_motion = vqvae(norm_poses.to(device))[0][0].to(dtype=torch.float32).cpu().detach() * global_std + global_mean
158
+ vqvae.to(offload_device)
159
+
160
+ poses_dict = {
161
+ 'mtv_motion_tokens': motion_tokens,
162
+ 'global_mean': global_mean,
163
+ 'global_std': global_std
164
+ }
165
+
166
+ return poses_dict, recon_motion
167
+
168
+
169
+ class NLFPredict:
170
+ @classmethod
171
+ def INPUT_TYPES(s):
172
+ return {"required": {
173
+ "model": ("NLFMODEL",),
174
+ "images": ("IMAGE", {"tooltip": "Input images for the model"}),
175
+ },
176
+ }
177
+
178
+ RETURN_TYPES = ("NLFPRED", )
179
+ RETURN_NAMES = ("pose_results",)
180
+ FUNCTION = "predict"
181
+ CATEGORY = "WanVideoWrapper"
182
+
183
+ def predict(self, model, images):
184
+
185
+ model.to(device)
186
+ pred = model.detect_smpl_batched(images.permute(0, 3, 1, 2).to(device))
187
+ model.to(offload_device)
188
+
189
+ pred = dict_to_device(pred, offload_device)
190
+
191
+ pose_results = {
192
+ 'joints3d_nonparam': [],
193
+ }
194
+ # Collect pose data
195
+ for key in pose_results.keys():
196
+ if key in pred:
197
+ pose_results[key].append(pred[key])
198
+ else:
199
+ pose_results[key].append(None)
200
+
201
+ return (pose_results,)
202
+
203
+ class DrawNLFPoses:
204
+ @classmethod
205
+ def INPUT_TYPES(s):
206
+ return {"required": {
207
+ "poses": ("NLFPRED", {"tooltip": "Input poses for the model"}),
208
+ "width": ("INT", {"default": 512}),
209
+ "height": ("INT", {"default": 512}),
210
+ },
211
+ }
212
+
213
+ RETURN_TYPES = ("IMAGE", )
214
+ RETURN_NAMES = ("image",)
215
+ FUNCTION = "predict"
216
+ CATEGORY = "WanVideoWrapper"
217
+
218
+ def predict(self, poses, width, height):
219
+ from .draw_pose import get_control_conditions
220
+ print(type(poses))
221
+ if isinstance(poses, dict):
222
+ pose_input = poses['joints3d_nonparam'][0] if 'joints3d_nonparam' in poses else poses
223
+ else:
224
+ pose_input = poses
225
+ control_conditions = get_control_conditions(pose_input, height, width)
226
+
227
+ return (control_conditions,)
228
+
229
+ NODE_CLASS_MAPPINGS = {
230
+ "DownloadAndLoadNLFModel": DownloadAndLoadNLFModel,
231
+ "NLFPredict": NLFPredict,
232
+ "DrawNLFPoses": DrawNLFPoses,
233
+ "LoadVQVAE": LoadVQVAE,
234
+ "MTVCrafterEncodePoses": MTVCrafterEncodePoses
235
+ }
236
+ NODE_DISPLAY_NAME_MAPPINGS = {
237
+ "DownloadAndLoadNLFModel": "(Download)Load NLF Model",
238
+ "NLFPredict": "NLF Predict",
239
+ "DrawNLFPoses": "Draw NLF Poses",
240
+ "LoadVQVAE": "Load VQVAE",
241
+ "MTVCrafterEncodePoses": "MTV Crafter Encode Poses"
242
+ }
__init__.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ try:
2
+ from .utils import check_duplicate_nodes, log
3
+ duplicate_dirs = check_duplicate_nodes()
4
+ if duplicate_dirs:
5
+ warning_msg = f"WARNING: Found {len(duplicate_dirs)} other WanVideoWrapper directories:\n"
6
+ for dir_path in duplicate_dirs:
7
+ warning_msg += f" - {dir_path}\n"
8
+ log.warning(warning_msg + "Please remove duplicates to avoid possible conflicts.")
9
+ except:
10
+ pass
11
+
12
+ from .nodes import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS
13
+ from .recammaster.nodes import NODE_CLASS_MAPPINGS as RECAM_MASTER_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as RECAM_MASTER_NODE_DISPLAY_NAME_MAPPINGS
14
+ from .skyreels.nodes import NODE_CLASS_MAPPINGS as SKYREELS_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as SKYREELS_NODE_DISPLAY_NAME_MAPPINGS
15
+ from .fantasytalking.nodes import NODE_CLASS_MAPPINGS as FANTASYTALKING_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as FANTASYTALKING_NODE_DISPLAY_NAME_MAPPINGS
16
+ from .nodes_sampler import NODE_CLASS_MAPPINGS as SAMPLER_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as SAMPLER_NODE_DISPLAY_NAME_MAPPINGS
17
+ from .fun_camera.nodes import NODE_CLASS_MAPPINGS as FUN_CAMERA_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as FUN_CAMERA_NODE_DISPLAY_NAME_MAPPINGS
18
+ from .uni3c.nodes import NODE_CLASS_MAPPINGS as UNI3C_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as UNI3C_NODE_DISPLAY_NAME_MAPPINGS
19
+ from .controlnet.nodes import NODE_CLASS_MAPPINGS as CONTROLNET_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as CONTROLNET_NODE_DISPLAY_NAME_MAPPINGS
20
+ from .ATI.nodes import NODE_CLASS_MAPPINGS as ATI_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as ATI_NODE_DISPLAY_NAME_MAPPINGS
21
+ from .multitalk.nodes import NODE_CLASS_MAPPINGS as MULTITALK_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as MULTITALK_NODE_DISPLAY_NAME_MAPPINGS
22
+ from .nodes_model_loading import NODE_CLASS_MAPPINGS as MODEL_LOADING_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as MODEL_LOADING_NODE_DISPLAY_NAME_MAPPINGS
23
+ from .nodes_utility import NODE_CLASS_MAPPINGS as UTILITY_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as UTILITY_NODE_DISPLAY_NAME_MAPPINGS
24
+ from .cache_methods.nodes_cache import NODE_CLASS_MAPPINGS as NODE_CACHE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as NODE_CACHE_DISPLAY_NAME_MAPPINGS
25
+ from .nodes_deprecated import NODE_CLASS_MAPPINGS as DEPRECATED_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as DEPRECATED_NODE_DISPLAY_NAME_MAPPINGS
26
+ from .s2v.nodes import NODE_CLASS_MAPPINGS as S2V_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as S2V_NODE_DISPLAY_NAME_MAPPINGS
27
+
28
+ try:
29
+ from .qwen.qwen import NODE_CLASS_MAPPINGS as QWEN_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as QWEN_NODE_DISPLAY_NAME_MAPPINGS
30
+ except Exception as e:
31
+ log.warning(f"WanVideoWrapper WARNING: Qwen nodes not available due to error in importing them: {e}")
32
+ QWEN_NODE_CLASS_MAPPINGS = {}
33
+ QWEN_NODE_DISPLAY_NAME_MAPPINGS = {}
34
+
35
+
36
+ try:
37
+ from .fantasyportrait.nodes import NODE_CLASS_MAPPINGS as FANTASYPORTRAIT_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as FANTASYPORTRAIT_NODE_DISPLAY_NAME_MAPPINGS
38
+ except Exception as e:
39
+ log.warning(f"WanVideoWrapper WARNING: FantasyPortrait nodes not available due to error in importing them: {e}")
40
+ FANTASYPORTRAIT_NODE_CLASS_MAPPINGS = {}
41
+ FANTASYPORTRAIT_NODE_DISPLAY_NAME_MAPPINGS = {}
42
+
43
+ try:
44
+ from .unianimate.nodes import NODE_CLASS_MAPPINGS as UNIANIMATE_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as UNIANIMATE_NODE_DISPLAY_NAME_MAPPINGS
45
+ except Exception as e:
46
+ log.warning(f"WanVideoWrapper WARNING: UniAnimate nodes not available due to error in importing them: {e}")
47
+ UNIANIMATE_NODE_CLASS_MAPPINGS = {}
48
+ UNIANIMATE_NODE_DISPLAY_NAME_MAPPINGS = {}
49
+
50
+ try:
51
+ from .MTV.nodes import NODE_CLASS_MAPPINGS as MTV_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as MTV_NODE_DISPLAY_NAME_MAPPINGS
52
+ except Exception as e:
53
+ log.warning(f"WanVideoWrapper WARNING: MTV nodes not available due to error in importing them: {e}")
54
+ MTV_NODE_CLASS_MAPPINGS = {}
55
+ MTV_NODE_DISPLAY_NAME_MAPPINGS = {}
56
+
57
+ try:
58
+ from .HuMo.nodes import NODE_CLASS_MAPPINGS as HUMO_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as HUMO_NODE_DISPLAY_NAME_MAPPINGS
59
+ except Exception as e:
60
+ log.warning(f"WanVideoWrapper WARNING: HuMo nodes not available due to error in importing them: {e}")
61
+ HUMO_NODE_CLASS_MAPPINGS = {}
62
+ HUMO_NODE_DISPLAY_NAME_MAPPINGS = {}
63
+
64
+ try:
65
+ from .lynx.nodes import NODE_CLASS_MAPPINGS as LYNX_NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as LYNX_NODE_DISPLAY_NAME_MAPPINGS
66
+ except Exception as e:
67
+ log.warning(f"WanVideoWrapper WARNING: Lynx nodes not available due to error in importing them: {e}")
68
+ LYNX_NODE_CLASS_MAPPINGS = {}
69
+ LYNX_NODE_DISPLAY_NAME_MAPPINGS = {}
70
+
71
+ NODE_CLASS_MAPPINGS.update(RECAM_MASTER_NODE_CLASS_MAPPINGS)
72
+ NODE_CLASS_MAPPINGS.update(UNIANIMATE_NODE_CLASS_MAPPINGS)
73
+ NODE_CLASS_MAPPINGS.update(SKYREELS_NODE_CLASS_MAPPINGS)
74
+ NODE_CLASS_MAPPINGS.update(FANTASYTALKING_NODE_CLASS_MAPPINGS)
75
+ NODE_CLASS_MAPPINGS.update(FANTASYPORTRAIT_NODE_CLASS_MAPPINGS)
76
+ NODE_CLASS_MAPPINGS.update(FUN_CAMERA_NODE_CLASS_MAPPINGS)
77
+ NODE_CLASS_MAPPINGS.update(UNI3C_NODE_CLASS_MAPPINGS)
78
+ NODE_CLASS_MAPPINGS.update(CONTROLNET_NODE_CLASS_MAPPINGS)
79
+ NODE_CLASS_MAPPINGS.update(ATI_NODE_CLASS_MAPPINGS)
80
+ NODE_CLASS_MAPPINGS.update(MULTITALK_NODE_CLASS_MAPPINGS)
81
+ NODE_CLASS_MAPPINGS.update(MODEL_LOADING_NODE_CLASS_MAPPINGS)
82
+ NODE_CLASS_MAPPINGS.update(UTILITY_NODE_CLASS_MAPPINGS)
83
+ NODE_CLASS_MAPPINGS.update(NODE_CACHE_CLASS_MAPPINGS)
84
+ NODE_CLASS_MAPPINGS.update(DEPRECATED_NODE_CLASS_MAPPINGS)
85
+ NODE_CLASS_MAPPINGS.update(QWEN_NODE_CLASS_MAPPINGS)
86
+ NODE_CLASS_MAPPINGS.update(MTV_NODE_CLASS_MAPPINGS)
87
+ NODE_CLASS_MAPPINGS.update(S2V_NODE_CLASS_MAPPINGS)
88
+ NODE_CLASS_MAPPINGS.update(HUMO_NODE_CLASS_MAPPINGS)
89
+ NODE_CLASS_MAPPINGS.update(SAMPLER_NODE_CLASS_MAPPINGS)
90
+ NODE_CLASS_MAPPINGS.update(LYNX_NODE_CLASS_MAPPINGS)
91
+
92
+ NODE_DISPLAY_NAME_MAPPINGS.update(RECAM_MASTER_NODE_DISPLAY_NAME_MAPPINGS)
93
+ NODE_DISPLAY_NAME_MAPPINGS.update(UNIANIMATE_NODE_DISPLAY_NAME_MAPPINGS)
94
+ NODE_DISPLAY_NAME_MAPPINGS.update(SKYREELS_NODE_DISPLAY_NAME_MAPPINGS)
95
+ NODE_DISPLAY_NAME_MAPPINGS.update(FANTASYTALKING_NODE_DISPLAY_NAME_MAPPINGS)
96
+ NODE_DISPLAY_NAME_MAPPINGS.update(FANTASYPORTRAIT_NODE_DISPLAY_NAME_MAPPINGS)
97
+ NODE_DISPLAY_NAME_MAPPINGS.update(FUN_CAMERA_NODE_DISPLAY_NAME_MAPPINGS)
98
+ NODE_DISPLAY_NAME_MAPPINGS.update(UNI3C_NODE_DISPLAY_NAME_MAPPINGS)
99
+ NODE_DISPLAY_NAME_MAPPINGS.update(CONTROLNET_NODE_DISPLAY_NAME_MAPPINGS)
100
+ NODE_DISPLAY_NAME_MAPPINGS.update(ATI_NODE_DISPLAY_NAME_MAPPINGS)
101
+ NODE_DISPLAY_NAME_MAPPINGS.update(MULTITALK_NODE_DISPLAY_NAME_MAPPINGS)
102
+ NODE_DISPLAY_NAME_MAPPINGS.update(MODEL_LOADING_NODE_DISPLAY_NAME_MAPPINGS)
103
+ NODE_DISPLAY_NAME_MAPPINGS.update(UTILITY_NODE_DISPLAY_NAME_MAPPINGS)
104
+ NODE_DISPLAY_NAME_MAPPINGS.update(NODE_CACHE_DISPLAY_NAME_MAPPINGS)
105
+ NODE_DISPLAY_NAME_MAPPINGS.update(DEPRECATED_NODE_DISPLAY_NAME_MAPPINGS)
106
+ NODE_DISPLAY_NAME_MAPPINGS.update(QWEN_NODE_DISPLAY_NAME_MAPPINGS)
107
+ NODE_DISPLAY_NAME_MAPPINGS.update(MTV_NODE_DISPLAY_NAME_MAPPINGS)
108
+ NODE_DISPLAY_NAME_MAPPINGS.update(S2V_NODE_DISPLAY_NAME_MAPPINGS)
109
+ NODE_DISPLAY_NAME_MAPPINGS.update(HUMO_NODE_DISPLAY_NAME_MAPPINGS)
110
+ NODE_DISPLAY_NAME_MAPPINGS.update(SAMPLER_NODE_DISPLAY_NAME_MAPPINGS)
111
+ NODE_DISPLAY_NAME_MAPPINGS.update(LYNX_NODE_DISPLAY_NAME_MAPPINGS)
112
+
113
+ __all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]
cache_methods/cache_methods.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import log
2
+ import torch
3
+
4
+ def set_transformer_cache_method(transformer, timesteps, cache_args=None):
5
+ transformer.cache_device = cache_args["cache_device"]
6
+ if cache_args["cache_type"] == "TeaCache":
7
+ log.info(f"TeaCache: Using cache device: {transformer.cache_device}")
8
+ transformer.teacache_state.clear_all()
9
+ transformer.enable_teacache = True
10
+ transformer.rel_l1_thresh = cache_args["rel_l1_thresh"]
11
+ transformer.teacache_start_step = cache_args["start_step"]
12
+ transformer.teacache_end_step = len(timesteps)-1 if cache_args["end_step"] == -1 else cache_args["end_step"]
13
+ transformer.teacache_use_coefficients = cache_args["use_coefficients"]
14
+ transformer.teacache_mode = cache_args["mode"]
15
+ elif cache_args["cache_type"] == "MagCache":
16
+ log.info(f"MagCache: Using cache device: {transformer.cache_device}")
17
+ transformer.magcache_state.clear_all()
18
+ transformer.enable_magcache = True
19
+ transformer.magcache_start_step = cache_args["start_step"]
20
+ transformer.magcache_end_step = len(timesteps)-1 if cache_args["end_step"] == -1 else cache_args["end_step"]
21
+ transformer.magcache_thresh = cache_args["magcache_thresh"]
22
+ transformer.magcache_K = cache_args["magcache_K"]
23
+ elif cache_args["cache_type"] == "EasyCache":
24
+ log.info(f"EasyCache: Using cache device: {transformer.cache_device}")
25
+ transformer.easycache_state.clear_all()
26
+ transformer.enable_easycache = True
27
+ transformer.easycache_start_step = cache_args["start_step"]
28
+ transformer.easycache_end_step = len(timesteps)-1 if cache_args["end_step"] == -1 else cache_args["end_step"]
29
+ transformer.easycache_thresh = cache_args["easycache_thresh"]
30
+ return transformer
31
+
32
+ class TeaCacheState:
33
+ def __init__(self, cache_device='cpu'):
34
+ self.cache_device = cache_device
35
+ self.states = {}
36
+ self._next_pred_id = 0
37
+
38
+ def new_prediction(self, cache_device='cpu'):
39
+ """Create new prediction state and return its ID"""
40
+ self.cache_device = cache_device
41
+ pred_id = self._next_pred_id
42
+ self._next_pred_id += 1
43
+ self.states[pred_id] = {
44
+ 'previous_residual': None,
45
+ 'accumulated_rel_l1_distance': 0,
46
+ 'previous_modulated_input': None,
47
+ 'skipped_steps': [],
48
+ }
49
+ return pred_id
50
+
51
+ def update(self, pred_id, **kwargs):
52
+ """Update state for specific prediction"""
53
+ if pred_id not in self.states:
54
+ return None
55
+ for key, value in kwargs.items():
56
+ self.states[pred_id][key] = value
57
+
58
+ def get(self, pred_id):
59
+ return self.states.get(pred_id, {})
60
+
61
+ def clear_all(self):
62
+ self.states = {}
63
+ self._next_pred_id = 0
64
+
65
+ class MagCacheState:
66
+ def __init__(self, cache_device='cpu'):
67
+ self.cache_device = cache_device
68
+ self.states = {}
69
+ self._next_pred_id = 0
70
+
71
+ def new_prediction(self, cache_device='cpu'):
72
+ """Create new prediction state and return its ID"""
73
+ self.cache_device = cache_device
74
+ pred_id = self._next_pred_id
75
+ self._next_pred_id += 1
76
+ self.states[pred_id] = {
77
+ 'residual_cache': None,
78
+ 'accumulated_ratio': 1.0,
79
+ 'accumulated_steps': 0,
80
+ 'accumulated_err': 0,
81
+ 'skipped_steps': [],
82
+ }
83
+ return pred_id
84
+
85
+ def update(self, pred_id, **kwargs):
86
+ """Update state for specific prediction"""
87
+ if pred_id not in self.states:
88
+ return None
89
+ for key, value in kwargs.items():
90
+ self.states[pred_id][key] = value
91
+
92
+ def get(self, pred_id):
93
+ return self.states.get(pred_id, {})
94
+
95
+ def clear_all(self):
96
+ self.states = {}
97
+ self._next_pred_id = 0
98
+
99
+ class EasyCacheState:
100
+ def __init__(self, cache_device='cpu'):
101
+ self.cache_device = cache_device
102
+ self.states = {}
103
+ self._next_pred_id = 0
104
+
105
+ def new_prediction(self, cache_device='cpu'):
106
+ """Create a new prediction state and return its ID."""
107
+ self.cache_device = cache_device
108
+ pred_id = self._next_pred_id
109
+ self._next_pred_id += 1
110
+ self.states[pred_id] = {
111
+ 'previous_raw_input': None,
112
+ 'previous_raw_output': None,
113
+ 'cache': None,
114
+ 'accumulated_error': 0.0,
115
+ 'skipped_steps': [],
116
+ }
117
+ return pred_id
118
+
119
+ def update(self, pred_id, **kwargs):
120
+ """Update state for a specific prediction."""
121
+ if pred_id not in self.states:
122
+ return None
123
+ for key, value in kwargs.items():
124
+ self.states[pred_id][key] = value
125
+
126
+ def get(self, pred_id):
127
+ return self.states.get(pred_id, {})
128
+
129
+ def clear_all(self):
130
+ self.states = {}
131
+ self._next_pred_id = 0
132
+
133
+ def relative_l1_distance(last_tensor, current_tensor):
134
+ l1_distance = torch.abs(last_tensor.to(current_tensor.device) - current_tensor).mean()
135
+ norm = torch.abs(last_tensor).mean()
136
+ relative_l1_distance = l1_distance / norm
137
+ return relative_l1_distance.to(torch.float32).to(current_tensor.device)
138
+
139
+ def cache_report(transformer, cache_args):
140
+ cache_type = cache_args["cache_type"]
141
+ states = (
142
+ transformer.teacache_state.states if cache_type == "TeaCache" else
143
+ transformer.magcache_state.states if cache_type == "MagCache" else
144
+ transformer.easycache_state.states if cache_type == "EasyCache" else
145
+ None
146
+ )
147
+ state_names = {
148
+ 0: "conditional",
149
+ 1: "unconditional"
150
+ }
151
+ for pred_id, state in states.items():
152
+ name = state_names.get(pred_id, f"prediction_{pred_id}")
153
+ if 'skipped_steps' in state:
154
+ log.info(f"{cache_type} skipped: {len(state['skipped_steps'])} {name} steps: {state['skipped_steps']}")
155
+ transformer.teacache_state.clear_all()
156
+ transformer.magcache_state.clear_all()
157
+ transformer.easycache_state.clear_all()
158
+ del states
cache_methods/nodes_cache.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from comfy import model_management as mm
2
+
3
+ class WanVideoTeaCache:
4
+ @classmethod
5
+ def INPUT_TYPES(s):
6
+ return {
7
+ "required": {
8
+ "rel_l1_thresh": ("FLOAT", {"default": 0.3, "min": 0.0, "max": 1.0, "step": 0.001,
9
+ "tooltip": "Higher values will make TeaCache more aggressive, faster, but may cause artifacts. Good value range for 1.3B: 0.05 - 0.08, for other models 0.15-0.30"}),
10
+ "start_step": ("INT", {"default": 1, "min": 0, "max": 9999, "step": 1, "tooltip": "Start percentage of the steps to apply TeaCache"}),
11
+ "end_step": ("INT", {"default": -1, "min": -1, "max": 9999, "step": 1, "tooltip": "End steps to apply TeaCache"}),
12
+ "cache_device": (["main_device", "offload_device"], {"default": "offload_device", "tooltip": "Device to cache to"}),
13
+ "use_coefficients": ("BOOLEAN", {"default": True, "tooltip": "Use calculated coefficients for more accuracy. When enabled therel_l1_thresh should be about 10 times higher than without"}),
14
+ },
15
+ "optional": {
16
+ "mode": (["e", "e0"], {"default": "e", "tooltip": "Choice between using e (time embeds, default) or e0 (modulated time embeds)"}),
17
+ },
18
+ }
19
+ RETURN_TYPES = ("CACHEARGS",)
20
+ RETURN_NAMES = ("cache_args",)
21
+ FUNCTION = "process"
22
+ CATEGORY = "WanVideoWrapper"
23
+ DESCRIPTION = """
24
+ Patch WanVideo model to use TeaCache. Speeds up inference by caching the output and
25
+ applying it instead of doing the step. Best results are achieved by choosing the
26
+ appropriate coefficients for the model. Early steps should never be skipped, with too
27
+ aggressive values this can happen and the motion suffers. Starting later can help with that too.
28
+ When NOT using coefficients, the threshold value should be
29
+ about 10 times smaller than the value used with coefficients.
30
+
31
+ Official recommended values https://github.com/ali-vilab/TeaCache/tree/main/TeaCache4Wan2.1:
32
+
33
+
34
+ <pre style='font-family:monospace'>
35
+ +-------------------+--------+---------+--------+
36
+ | Model | Low | Medium | High |
37
+ +-------------------+--------+---------+--------+
38
+ | Wan2.1 t2v 1.3B | 0.05 | 0.07 | 0.08 |
39
+ | Wan2.1 t2v 14B | 0.14 | 0.15 | 0.20 |
40
+ | Wan2.1 i2v 480P | 0.13 | 0.19 | 0.26 |
41
+ | Wan2.1 i2v 720P | 0.18 | 0.20 | 0.30 |
42
+ +-------------------+--------+---------+--------+
43
+ </pre>
44
+ """
45
+
46
+ def process(self, rel_l1_thresh, start_step, end_step, cache_device, use_coefficients, mode="e"):
47
+ if cache_device == "main_device":
48
+ cache_device = mm.get_torch_device()
49
+ else:
50
+ cache_device = mm.unet_offload_device()
51
+ cache_args = {
52
+ "cache_type": "TeaCache",
53
+ "rel_l1_thresh": rel_l1_thresh,
54
+ "start_step": start_step,
55
+ "end_step": end_step,
56
+ "cache_device": cache_device,
57
+ "use_coefficients": use_coefficients,
58
+ "mode": mode,
59
+ }
60
+ return (cache_args,)
61
+
62
+ class WanVideoMagCache:
63
+ @classmethod
64
+ def INPUT_TYPES(s):
65
+ return {
66
+ "required": {
67
+ "magcache_thresh": ("FLOAT", {"default": 0.02, "min": 0.0, "max": 0.3, "step": 0.001, "tooltip": "How strongly to cache the output of diffusion model. This value must be non-negative."}),
68
+ "magcache_K": ("INT", {"default": 4, "min": 0, "max": 6, "step": 1, "tooltip": "The maxium skip steps of MagCache."}),
69
+ "start_step": ("INT", {"default": 1, "min": 0, "max": 9999, "step": 1, "tooltip": "Step to start applying MagCache"}),
70
+ "end_step": ("INT", {"default": -1, "min": -1, "max": 9999, "step": 1, "tooltip": "Step to end applying MagCache"}),
71
+ "cache_device": (["main_device", "offload_device"], {"default": "offload_device", "tooltip": "Device to cache to"}),
72
+ },
73
+ }
74
+ RETURN_TYPES = ("CACHEARGS",)
75
+ RETURN_NAMES = ("cache_args",)
76
+ FUNCTION = "setargs"
77
+ CATEGORY = "WanVideoWrapper"
78
+ EXPERIMENTAL = True
79
+ DESCRIPTION = "MagCache for WanVideoWrapper, source https://github.com/Zehong-Ma/MagCache"
80
+
81
+ def setargs(self, magcache_thresh, magcache_K, start_step, end_step, cache_device):
82
+ if cache_device == "main_device":
83
+ cache_device = mm.get_torch_device()
84
+ else:
85
+ cache_device = mm.unet_offload_device()
86
+
87
+ cache_args = {
88
+ "cache_type": "MagCache",
89
+ "magcache_thresh": magcache_thresh,
90
+ "magcache_K": magcache_K,
91
+ "start_step": start_step,
92
+ "end_step": end_step,
93
+ "cache_device": cache_device,
94
+ }
95
+ return (cache_args,)
96
+
97
+ class WanVideoEasyCache:
98
+ @classmethod
99
+ def INPUT_TYPES(s):
100
+ return {
101
+ "required": {
102
+ "easycache_thresh": ("FLOAT", {"default": 0.015, "min": 0.0, "max": 1.0, "step": 0.001, "tooltip": "How strongly to cache the output of diffusion model. This value must be non-negative."}),
103
+ "start_step": ("INT", {"default": 10, "min": 0, "max": 9999, "step": 1, "tooltip": "Step to start applying EasyCache"}),
104
+ "end_step": ("INT", {"default": -1, "min": -1, "max": 9999, "step": 1, "tooltip": "Step to end applying EasyCache"}),
105
+ "cache_device": (["main_device", "offload_device"], {"default": "offload_device", "tooltip": "Device to cache to"}),
106
+ },
107
+ }
108
+ RETURN_TYPES = ("CACHEARGS",)
109
+ RETURN_NAMES = ("cache_args",)
110
+ FUNCTION = "setargs"
111
+ CATEGORY = "WanVideoWrapper"
112
+ EXPERIMENTAL = True
113
+ DESCRIPTION = "EasyCache for WanVideoWrapper, source https://github.com/H-EmbodVis/EasyCache"
114
+
115
+ def setargs(self, easycache_thresh, start_step, end_step, cache_device):
116
+ if cache_device == "main_device":
117
+ cache_device = mm.get_torch_device()
118
+ else:
119
+ cache_device = mm.unet_offload_device()
120
+
121
+ cache_args = {
122
+ "cache_type": "EasyCache",
123
+ "easycache_thresh": easycache_thresh,
124
+ "start_step": start_step,
125
+ "end_step": end_step,
126
+ "cache_device": cache_device,
127
+ }
128
+ return (cache_args,)
129
+
130
+
131
+ NODE_CLASS_MAPPINGS = {
132
+ "WanVideoTeaCache": WanVideoTeaCache,
133
+ "WanVideoMagCache": WanVideoMagCache,
134
+ "WanVideoEasyCache": WanVideoEasyCache,
135
+ }
136
+ NODE_DISPLAY_NAME_MAPPINGS = {
137
+ "WanVideoTeaCache": "WanVideo TeaCache",
138
+ "WanVideoMagCache": "WanVideo MagCache",
139
+ "WanVideoEasyCache": "WanVideo EasyCache"
140
+ }
configs/T5_tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,308 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>",
103
+ "<extra_id_100>",
104
+ "<extra_id_101>",
105
+ "<extra_id_102>",
106
+ "<extra_id_103>",
107
+ "<extra_id_104>",
108
+ "<extra_id_105>",
109
+ "<extra_id_106>",
110
+ "<extra_id_107>",
111
+ "<extra_id_108>",
112
+ "<extra_id_109>",
113
+ "<extra_id_110>",
114
+ "<extra_id_111>",
115
+ "<extra_id_112>",
116
+ "<extra_id_113>",
117
+ "<extra_id_114>",
118
+ "<extra_id_115>",
119
+ "<extra_id_116>",
120
+ "<extra_id_117>",
121
+ "<extra_id_118>",
122
+ "<extra_id_119>",
123
+ "<extra_id_120>",
124
+ "<extra_id_121>",
125
+ "<extra_id_122>",
126
+ "<extra_id_123>",
127
+ "<extra_id_124>",
128
+ "<extra_id_125>",
129
+ "<extra_id_126>",
130
+ "<extra_id_127>",
131
+ "<extra_id_128>",
132
+ "<extra_id_129>",
133
+ "<extra_id_130>",
134
+ "<extra_id_131>",
135
+ "<extra_id_132>",
136
+ "<extra_id_133>",
137
+ "<extra_id_134>",
138
+ "<extra_id_135>",
139
+ "<extra_id_136>",
140
+ "<extra_id_137>",
141
+ "<extra_id_138>",
142
+ "<extra_id_139>",
143
+ "<extra_id_140>",
144
+ "<extra_id_141>",
145
+ "<extra_id_142>",
146
+ "<extra_id_143>",
147
+ "<extra_id_144>",
148
+ "<extra_id_145>",
149
+ "<extra_id_146>",
150
+ "<extra_id_147>",
151
+ "<extra_id_148>",
152
+ "<extra_id_149>",
153
+ "<extra_id_150>",
154
+ "<extra_id_151>",
155
+ "<extra_id_152>",
156
+ "<extra_id_153>",
157
+ "<extra_id_154>",
158
+ "<extra_id_155>",
159
+ "<extra_id_156>",
160
+ "<extra_id_157>",
161
+ "<extra_id_158>",
162
+ "<extra_id_159>",
163
+ "<extra_id_160>",
164
+ "<extra_id_161>",
165
+ "<extra_id_162>",
166
+ "<extra_id_163>",
167
+ "<extra_id_164>",
168
+ "<extra_id_165>",
169
+ "<extra_id_166>",
170
+ "<extra_id_167>",
171
+ "<extra_id_168>",
172
+ "<extra_id_169>",
173
+ "<extra_id_170>",
174
+ "<extra_id_171>",
175
+ "<extra_id_172>",
176
+ "<extra_id_173>",
177
+ "<extra_id_174>",
178
+ "<extra_id_175>",
179
+ "<extra_id_176>",
180
+ "<extra_id_177>",
181
+ "<extra_id_178>",
182
+ "<extra_id_179>",
183
+ "<extra_id_180>",
184
+ "<extra_id_181>",
185
+ "<extra_id_182>",
186
+ "<extra_id_183>",
187
+ "<extra_id_184>",
188
+ "<extra_id_185>",
189
+ "<extra_id_186>",
190
+ "<extra_id_187>",
191
+ "<extra_id_188>",
192
+ "<extra_id_189>",
193
+ "<extra_id_190>",
194
+ "<extra_id_191>",
195
+ "<extra_id_192>",
196
+ "<extra_id_193>",
197
+ "<extra_id_194>",
198
+ "<extra_id_195>",
199
+ "<extra_id_196>",
200
+ "<extra_id_197>",
201
+ "<extra_id_198>",
202
+ "<extra_id_199>",
203
+ "<extra_id_200>",
204
+ "<extra_id_201>",
205
+ "<extra_id_202>",
206
+ "<extra_id_203>",
207
+ "<extra_id_204>",
208
+ "<extra_id_205>",
209
+ "<extra_id_206>",
210
+ "<extra_id_207>",
211
+ "<extra_id_208>",
212
+ "<extra_id_209>",
213
+ "<extra_id_210>",
214
+ "<extra_id_211>",
215
+ "<extra_id_212>",
216
+ "<extra_id_213>",
217
+ "<extra_id_214>",
218
+ "<extra_id_215>",
219
+ "<extra_id_216>",
220
+ "<extra_id_217>",
221
+ "<extra_id_218>",
222
+ "<extra_id_219>",
223
+ "<extra_id_220>",
224
+ "<extra_id_221>",
225
+ "<extra_id_222>",
226
+ "<extra_id_223>",
227
+ "<extra_id_224>",
228
+ "<extra_id_225>",
229
+ "<extra_id_226>",
230
+ "<extra_id_227>",
231
+ "<extra_id_228>",
232
+ "<extra_id_229>",
233
+ "<extra_id_230>",
234
+ "<extra_id_231>",
235
+ "<extra_id_232>",
236
+ "<extra_id_233>",
237
+ "<extra_id_234>",
238
+ "<extra_id_235>",
239
+ "<extra_id_236>",
240
+ "<extra_id_237>",
241
+ "<extra_id_238>",
242
+ "<extra_id_239>",
243
+ "<extra_id_240>",
244
+ "<extra_id_241>",
245
+ "<extra_id_242>",
246
+ "<extra_id_243>",
247
+ "<extra_id_244>",
248
+ "<extra_id_245>",
249
+ "<extra_id_246>",
250
+ "<extra_id_247>",
251
+ "<extra_id_248>",
252
+ "<extra_id_249>",
253
+ "<extra_id_250>",
254
+ "<extra_id_251>",
255
+ "<extra_id_252>",
256
+ "<extra_id_253>",
257
+ "<extra_id_254>",
258
+ "<extra_id_255>",
259
+ "<extra_id_256>",
260
+ "<extra_id_257>",
261
+ "<extra_id_258>",
262
+ "<extra_id_259>",
263
+ "<extra_id_260>",
264
+ "<extra_id_261>",
265
+ "<extra_id_262>",
266
+ "<extra_id_263>",
267
+ "<extra_id_264>",
268
+ "<extra_id_265>",
269
+ "<extra_id_266>",
270
+ "<extra_id_267>",
271
+ "<extra_id_268>",
272
+ "<extra_id_269>",
273
+ "<extra_id_270>",
274
+ "<extra_id_271>",
275
+ "<extra_id_272>",
276
+ "<extra_id_273>",
277
+ "<extra_id_274>",
278
+ "<extra_id_275>",
279
+ "<extra_id_276>",
280
+ "<extra_id_277>",
281
+ "<extra_id_278>",
282
+ "<extra_id_279>",
283
+ "<extra_id_280>",
284
+ "<extra_id_281>",
285
+ "<extra_id_282>",
286
+ "<extra_id_283>",
287
+ "<extra_id_284>",
288
+ "<extra_id_285>",
289
+ "<extra_id_286>",
290
+ "<extra_id_287>",
291
+ "<extra_id_288>",
292
+ "<extra_id_289>",
293
+ "<extra_id_290>",
294
+ "<extra_id_291>",
295
+ "<extra_id_292>",
296
+ "<extra_id_293>",
297
+ "<extra_id_294>",
298
+ "<extra_id_295>",
299
+ "<extra_id_296>",
300
+ "<extra_id_297>",
301
+ "<extra_id_298>",
302
+ "<extra_id_299>"
303
+ ],
304
+ "bos_token": "<s>",
305
+ "eos_token": "</s>",
306
+ "pad_token": "<pad>",
307
+ "unk_token": "<unk>"
308
+ }
configs/T5_tokenizer/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3909a67b780650b35cf529ac782ad2b6b26e6d1f849d3fbb6a872905f452458
3
+ size 4548313
configs/T5_tokenizer/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e197b4d3dbd71da14b4eb255f4fa91c9c1f2068b20a2de2472967ca3d22602b
3
+ size 16837417
configs/T5_tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,2748 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "256000": {
36
+ "content": "<extra_id_299>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "256001": {
44
+ "content": "<extra_id_298>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "256002": {
52
+ "content": "<extra_id_297>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "256003": {
60
+ "content": "<extra_id_296>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "256004": {
68
+ "content": "<extra_id_295>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "256005": {
76
+ "content": "<extra_id_294>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "256006": {
84
+ "content": "<extra_id_293>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "256007": {
92
+ "content": "<extra_id_292>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "256008": {
100
+ "content": "<extra_id_291>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "256009": {
108
+ "content": "<extra_id_290>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "256010": {
116
+ "content": "<extra_id_289>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "256011": {
124
+ "content": "<extra_id_288>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "256012": {
132
+ "content": "<extra_id_287>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "256013": {
140
+ "content": "<extra_id_286>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "256014": {
148
+ "content": "<extra_id_285>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "256015": {
156
+ "content": "<extra_id_284>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "256016": {
164
+ "content": "<extra_id_283>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "256017": {
172
+ "content": "<extra_id_282>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "256018": {
180
+ "content": "<extra_id_281>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "256019": {
188
+ "content": "<extra_id_280>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "256020": {
196
+ "content": "<extra_id_279>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "256021": {
204
+ "content": "<extra_id_278>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "256022": {
212
+ "content": "<extra_id_277>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "256023": {
220
+ "content": "<extra_id_276>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "256024": {
228
+ "content": "<extra_id_275>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "256025": {
236
+ "content": "<extra_id_274>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "256026": {
244
+ "content": "<extra_id_273>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "256027": {
252
+ "content": "<extra_id_272>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "256028": {
260
+ "content": "<extra_id_271>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "256029": {
268
+ "content": "<extra_id_270>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "256030": {
276
+ "content": "<extra_id_269>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "256031": {
284
+ "content": "<extra_id_268>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "256032": {
292
+ "content": "<extra_id_267>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "256033": {
300
+ "content": "<extra_id_266>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "256034": {
308
+ "content": "<extra_id_265>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "256035": {
316
+ "content": "<extra_id_264>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "256036": {
324
+ "content": "<extra_id_263>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "256037": {
332
+ "content": "<extra_id_262>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "256038": {
340
+ "content": "<extra_id_261>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "256039": {
348
+ "content": "<extra_id_260>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "256040": {
356
+ "content": "<extra_id_259>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "256041": {
364
+ "content": "<extra_id_258>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "256042": {
372
+ "content": "<extra_id_257>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "256043": {
380
+ "content": "<extra_id_256>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "256044": {
388
+ "content": "<extra_id_255>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "256045": {
396
+ "content": "<extra_id_254>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "256046": {
404
+ "content": "<extra_id_253>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "256047": {
412
+ "content": "<extra_id_252>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "256048": {
420
+ "content": "<extra_id_251>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "256049": {
428
+ "content": "<extra_id_250>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "256050": {
436
+ "content": "<extra_id_249>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "256051": {
444
+ "content": "<extra_id_248>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "256052": {
452
+ "content": "<extra_id_247>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "256053": {
460
+ "content": "<extra_id_246>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "256054": {
468
+ "content": "<extra_id_245>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "256055": {
476
+ "content": "<extra_id_244>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "256056": {
484
+ "content": "<extra_id_243>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "256057": {
492
+ "content": "<extra_id_242>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "256058": {
500
+ "content": "<extra_id_241>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "256059": {
508
+ "content": "<extra_id_240>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "256060": {
516
+ "content": "<extra_id_239>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "256061": {
524
+ "content": "<extra_id_238>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "256062": {
532
+ "content": "<extra_id_237>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "256063": {
540
+ "content": "<extra_id_236>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "256064": {
548
+ "content": "<extra_id_235>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "256065": {
556
+ "content": "<extra_id_234>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "256066": {
564
+ "content": "<extra_id_233>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "256067": {
572
+ "content": "<extra_id_232>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "256068": {
580
+ "content": "<extra_id_231>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "256069": {
588
+ "content": "<extra_id_230>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "256070": {
596
+ "content": "<extra_id_229>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "256071": {
604
+ "content": "<extra_id_228>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "256072": {
612
+ "content": "<extra_id_227>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "256073": {
620
+ "content": "<extra_id_226>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "256074": {
628
+ "content": "<extra_id_225>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "256075": {
636
+ "content": "<extra_id_224>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "256076": {
644
+ "content": "<extra_id_223>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "256077": {
652
+ "content": "<extra_id_222>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "256078": {
660
+ "content": "<extra_id_221>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "256079": {
668
+ "content": "<extra_id_220>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "256080": {
676
+ "content": "<extra_id_219>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "256081": {
684
+ "content": "<extra_id_218>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "256082": {
692
+ "content": "<extra_id_217>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "256083": {
700
+ "content": "<extra_id_216>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "256084": {
708
+ "content": "<extra_id_215>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "256085": {
716
+ "content": "<extra_id_214>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "256086": {
724
+ "content": "<extra_id_213>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "256087": {
732
+ "content": "<extra_id_212>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "256088": {
740
+ "content": "<extra_id_211>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "256089": {
748
+ "content": "<extra_id_210>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "256090": {
756
+ "content": "<extra_id_209>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "256091": {
764
+ "content": "<extra_id_208>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "256092": {
772
+ "content": "<extra_id_207>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "256093": {
780
+ "content": "<extra_id_206>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "256094": {
788
+ "content": "<extra_id_205>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "256095": {
796
+ "content": "<extra_id_204>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "256096": {
804
+ "content": "<extra_id_203>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "256097": {
812
+ "content": "<extra_id_202>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "256098": {
820
+ "content": "<extra_id_201>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "256099": {
828
+ "content": "<extra_id_200>",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "256100": {
836
+ "content": "<extra_id_199>",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "256101": {
844
+ "content": "<extra_id_198>",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "256102": {
852
+ "content": "<extra_id_197>",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "256103": {
860
+ "content": "<extra_id_196>",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "256104": {
868
+ "content": "<extra_id_195>",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "256105": {
876
+ "content": "<extra_id_194>",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "256106": {
884
+ "content": "<extra_id_193>",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "256107": {
892
+ "content": "<extra_id_192>",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "256108": {
900
+ "content": "<extra_id_191>",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "256109": {
908
+ "content": "<extra_id_190>",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "256110": {
916
+ "content": "<extra_id_189>",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "256111": {
924
+ "content": "<extra_id_188>",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ },
931
+ "256112": {
932
+ "content": "<extra_id_187>",
933
+ "lstrip": false,
934
+ "normalized": false,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": true
938
+ },
939
+ "256113": {
940
+ "content": "<extra_id_186>",
941
+ "lstrip": false,
942
+ "normalized": false,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": true
946
+ },
947
+ "256114": {
948
+ "content": "<extra_id_185>",
949
+ "lstrip": false,
950
+ "normalized": false,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": true
954
+ },
955
+ "256115": {
956
+ "content": "<extra_id_184>",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": true
962
+ },
963
+ "256116": {
964
+ "content": "<extra_id_183>",
965
+ "lstrip": false,
966
+ "normalized": false,
967
+ "rstrip": false,
968
+ "single_word": false,
969
+ "special": true
970
+ },
971
+ "256117": {
972
+ "content": "<extra_id_182>",
973
+ "lstrip": false,
974
+ "normalized": false,
975
+ "rstrip": false,
976
+ "single_word": false,
977
+ "special": true
978
+ },
979
+ "256118": {
980
+ "content": "<extra_id_181>",
981
+ "lstrip": false,
982
+ "normalized": false,
983
+ "rstrip": false,
984
+ "single_word": false,
985
+ "special": true
986
+ },
987
+ "256119": {
988
+ "content": "<extra_id_180>",
989
+ "lstrip": false,
990
+ "normalized": false,
991
+ "rstrip": false,
992
+ "single_word": false,
993
+ "special": true
994
+ },
995
+ "256120": {
996
+ "content": "<extra_id_179>",
997
+ "lstrip": false,
998
+ "normalized": false,
999
+ "rstrip": false,
1000
+ "single_word": false,
1001
+ "special": true
1002
+ },
1003
+ "256121": {
1004
+ "content": "<extra_id_178>",
1005
+ "lstrip": false,
1006
+ "normalized": false,
1007
+ "rstrip": false,
1008
+ "single_word": false,
1009
+ "special": true
1010
+ },
1011
+ "256122": {
1012
+ "content": "<extra_id_177>",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false,
1017
+ "special": true
1018
+ },
1019
+ "256123": {
1020
+ "content": "<extra_id_176>",
1021
+ "lstrip": false,
1022
+ "normalized": false,
1023
+ "rstrip": false,
1024
+ "single_word": false,
1025
+ "special": true
1026
+ },
1027
+ "256124": {
1028
+ "content": "<extra_id_175>",
1029
+ "lstrip": false,
1030
+ "normalized": false,
1031
+ "rstrip": false,
1032
+ "single_word": false,
1033
+ "special": true
1034
+ },
1035
+ "256125": {
1036
+ "content": "<extra_id_174>",
1037
+ "lstrip": false,
1038
+ "normalized": false,
1039
+ "rstrip": false,
1040
+ "single_word": false,
1041
+ "special": true
1042
+ },
1043
+ "256126": {
1044
+ "content": "<extra_id_173>",
1045
+ "lstrip": false,
1046
+ "normalized": false,
1047
+ "rstrip": false,
1048
+ "single_word": false,
1049
+ "special": true
1050
+ },
1051
+ "256127": {
1052
+ "content": "<extra_id_172>",
1053
+ "lstrip": false,
1054
+ "normalized": false,
1055
+ "rstrip": false,
1056
+ "single_word": false,
1057
+ "special": true
1058
+ },
1059
+ "256128": {
1060
+ "content": "<extra_id_171>",
1061
+ "lstrip": false,
1062
+ "normalized": false,
1063
+ "rstrip": false,
1064
+ "single_word": false,
1065
+ "special": true
1066
+ },
1067
+ "256129": {
1068
+ "content": "<extra_id_170>",
1069
+ "lstrip": false,
1070
+ "normalized": false,
1071
+ "rstrip": false,
1072
+ "single_word": false,
1073
+ "special": true
1074
+ },
1075
+ "256130": {
1076
+ "content": "<extra_id_169>",
1077
+ "lstrip": false,
1078
+ "normalized": false,
1079
+ "rstrip": false,
1080
+ "single_word": false,
1081
+ "special": true
1082
+ },
1083
+ "256131": {
1084
+ "content": "<extra_id_168>",
1085
+ "lstrip": false,
1086
+ "normalized": false,
1087
+ "rstrip": false,
1088
+ "single_word": false,
1089
+ "special": true
1090
+ },
1091
+ "256132": {
1092
+ "content": "<extra_id_167>",
1093
+ "lstrip": false,
1094
+ "normalized": false,
1095
+ "rstrip": false,
1096
+ "single_word": false,
1097
+ "special": true
1098
+ },
1099
+ "256133": {
1100
+ "content": "<extra_id_166>",
1101
+ "lstrip": false,
1102
+ "normalized": false,
1103
+ "rstrip": false,
1104
+ "single_word": false,
1105
+ "special": true
1106
+ },
1107
+ "256134": {
1108
+ "content": "<extra_id_165>",
1109
+ "lstrip": false,
1110
+ "normalized": false,
1111
+ "rstrip": false,
1112
+ "single_word": false,
1113
+ "special": true
1114
+ },
1115
+ "256135": {
1116
+ "content": "<extra_id_164>",
1117
+ "lstrip": false,
1118
+ "normalized": false,
1119
+ "rstrip": false,
1120
+ "single_word": false,
1121
+ "special": true
1122
+ },
1123
+ "256136": {
1124
+ "content": "<extra_id_163>",
1125
+ "lstrip": false,
1126
+ "normalized": false,
1127
+ "rstrip": false,
1128
+ "single_word": false,
1129
+ "special": true
1130
+ },
1131
+ "256137": {
1132
+ "content": "<extra_id_162>",
1133
+ "lstrip": false,
1134
+ "normalized": false,
1135
+ "rstrip": false,
1136
+ "single_word": false,
1137
+ "special": true
1138
+ },
1139
+ "256138": {
1140
+ "content": "<extra_id_161>",
1141
+ "lstrip": false,
1142
+ "normalized": false,
1143
+ "rstrip": false,
1144
+ "single_word": false,
1145
+ "special": true
1146
+ },
1147
+ "256139": {
1148
+ "content": "<extra_id_160>",
1149
+ "lstrip": false,
1150
+ "normalized": false,
1151
+ "rstrip": false,
1152
+ "single_word": false,
1153
+ "special": true
1154
+ },
1155
+ "256140": {
1156
+ "content": "<extra_id_159>",
1157
+ "lstrip": false,
1158
+ "normalized": false,
1159
+ "rstrip": false,
1160
+ "single_word": false,
1161
+ "special": true
1162
+ },
1163
+ "256141": {
1164
+ "content": "<extra_id_158>",
1165
+ "lstrip": false,
1166
+ "normalized": false,
1167
+ "rstrip": false,
1168
+ "single_word": false,
1169
+ "special": true
1170
+ },
1171
+ "256142": {
1172
+ "content": "<extra_id_157>",
1173
+ "lstrip": false,
1174
+ "normalized": false,
1175
+ "rstrip": false,
1176
+ "single_word": false,
1177
+ "special": true
1178
+ },
1179
+ "256143": {
1180
+ "content": "<extra_id_156>",
1181
+ "lstrip": false,
1182
+ "normalized": false,
1183
+ "rstrip": false,
1184
+ "single_word": false,
1185
+ "special": true
1186
+ },
1187
+ "256144": {
1188
+ "content": "<extra_id_155>",
1189
+ "lstrip": false,
1190
+ "normalized": false,
1191
+ "rstrip": false,
1192
+ "single_word": false,
1193
+ "special": true
1194
+ },
1195
+ "256145": {
1196
+ "content": "<extra_id_154>",
1197
+ "lstrip": false,
1198
+ "normalized": false,
1199
+ "rstrip": false,
1200
+ "single_word": false,
1201
+ "special": true
1202
+ },
1203
+ "256146": {
1204
+ "content": "<extra_id_153>",
1205
+ "lstrip": false,
1206
+ "normalized": false,
1207
+ "rstrip": false,
1208
+ "single_word": false,
1209
+ "special": true
1210
+ },
1211
+ "256147": {
1212
+ "content": "<extra_id_152>",
1213
+ "lstrip": false,
1214
+ "normalized": false,
1215
+ "rstrip": false,
1216
+ "single_word": false,
1217
+ "special": true
1218
+ },
1219
+ "256148": {
1220
+ "content": "<extra_id_151>",
1221
+ "lstrip": false,
1222
+ "normalized": false,
1223
+ "rstrip": false,
1224
+ "single_word": false,
1225
+ "special": true
1226
+ },
1227
+ "256149": {
1228
+ "content": "<extra_id_150>",
1229
+ "lstrip": false,
1230
+ "normalized": false,
1231
+ "rstrip": false,
1232
+ "single_word": false,
1233
+ "special": true
1234
+ },
1235
+ "256150": {
1236
+ "content": "<extra_id_149>",
1237
+ "lstrip": false,
1238
+ "normalized": false,
1239
+ "rstrip": false,
1240
+ "single_word": false,
1241
+ "special": true
1242
+ },
1243
+ "256151": {
1244
+ "content": "<extra_id_148>",
1245
+ "lstrip": false,
1246
+ "normalized": false,
1247
+ "rstrip": false,
1248
+ "single_word": false,
1249
+ "special": true
1250
+ },
1251
+ "256152": {
1252
+ "content": "<extra_id_147>",
1253
+ "lstrip": false,
1254
+ "normalized": false,
1255
+ "rstrip": false,
1256
+ "single_word": false,
1257
+ "special": true
1258
+ },
1259
+ "256153": {
1260
+ "content": "<extra_id_146>",
1261
+ "lstrip": false,
1262
+ "normalized": false,
1263
+ "rstrip": false,
1264
+ "single_word": false,
1265
+ "special": true
1266
+ },
1267
+ "256154": {
1268
+ "content": "<extra_id_145>",
1269
+ "lstrip": false,
1270
+ "normalized": false,
1271
+ "rstrip": false,
1272
+ "single_word": false,
1273
+ "special": true
1274
+ },
1275
+ "256155": {
1276
+ "content": "<extra_id_144>",
1277
+ "lstrip": false,
1278
+ "normalized": false,
1279
+ "rstrip": false,
1280
+ "single_word": false,
1281
+ "special": true
1282
+ },
1283
+ "256156": {
1284
+ "content": "<extra_id_143>",
1285
+ "lstrip": false,
1286
+ "normalized": false,
1287
+ "rstrip": false,
1288
+ "single_word": false,
1289
+ "special": true
1290
+ },
1291
+ "256157": {
1292
+ "content": "<extra_id_142>",
1293
+ "lstrip": false,
1294
+ "normalized": false,
1295
+ "rstrip": false,
1296
+ "single_word": false,
1297
+ "special": true
1298
+ },
1299
+ "256158": {
1300
+ "content": "<extra_id_141>",
1301
+ "lstrip": false,
1302
+ "normalized": false,
1303
+ "rstrip": false,
1304
+ "single_word": false,
1305
+ "special": true
1306
+ },
1307
+ "256159": {
1308
+ "content": "<extra_id_140>",
1309
+ "lstrip": false,
1310
+ "normalized": false,
1311
+ "rstrip": false,
1312
+ "single_word": false,
1313
+ "special": true
1314
+ },
1315
+ "256160": {
1316
+ "content": "<extra_id_139>",
1317
+ "lstrip": false,
1318
+ "normalized": false,
1319
+ "rstrip": false,
1320
+ "single_word": false,
1321
+ "special": true
1322
+ },
1323
+ "256161": {
1324
+ "content": "<extra_id_138>",
1325
+ "lstrip": false,
1326
+ "normalized": false,
1327
+ "rstrip": false,
1328
+ "single_word": false,
1329
+ "special": true
1330
+ },
1331
+ "256162": {
1332
+ "content": "<extra_id_137>",
1333
+ "lstrip": false,
1334
+ "normalized": false,
1335
+ "rstrip": false,
1336
+ "single_word": false,
1337
+ "special": true
1338
+ },
1339
+ "256163": {
1340
+ "content": "<extra_id_136>",
1341
+ "lstrip": false,
1342
+ "normalized": false,
1343
+ "rstrip": false,
1344
+ "single_word": false,
1345
+ "special": true
1346
+ },
1347
+ "256164": {
1348
+ "content": "<extra_id_135>",
1349
+ "lstrip": false,
1350
+ "normalized": false,
1351
+ "rstrip": false,
1352
+ "single_word": false,
1353
+ "special": true
1354
+ },
1355
+ "256165": {
1356
+ "content": "<extra_id_134>",
1357
+ "lstrip": false,
1358
+ "normalized": false,
1359
+ "rstrip": false,
1360
+ "single_word": false,
1361
+ "special": true
1362
+ },
1363
+ "256166": {
1364
+ "content": "<extra_id_133>",
1365
+ "lstrip": false,
1366
+ "normalized": false,
1367
+ "rstrip": false,
1368
+ "single_word": false,
1369
+ "special": true
1370
+ },
1371
+ "256167": {
1372
+ "content": "<extra_id_132>",
1373
+ "lstrip": false,
1374
+ "normalized": false,
1375
+ "rstrip": false,
1376
+ "single_word": false,
1377
+ "special": true
1378
+ },
1379
+ "256168": {
1380
+ "content": "<extra_id_131>",
1381
+ "lstrip": false,
1382
+ "normalized": false,
1383
+ "rstrip": false,
1384
+ "single_word": false,
1385
+ "special": true
1386
+ },
1387
+ "256169": {
1388
+ "content": "<extra_id_130>",
1389
+ "lstrip": false,
1390
+ "normalized": false,
1391
+ "rstrip": false,
1392
+ "single_word": false,
1393
+ "special": true
1394
+ },
1395
+ "256170": {
1396
+ "content": "<extra_id_129>",
1397
+ "lstrip": false,
1398
+ "normalized": false,
1399
+ "rstrip": false,
1400
+ "single_word": false,
1401
+ "special": true
1402
+ },
1403
+ "256171": {
1404
+ "content": "<extra_id_128>",
1405
+ "lstrip": false,
1406
+ "normalized": false,
1407
+ "rstrip": false,
1408
+ "single_word": false,
1409
+ "special": true
1410
+ },
1411
+ "256172": {
1412
+ "content": "<extra_id_127>",
1413
+ "lstrip": false,
1414
+ "normalized": false,
1415
+ "rstrip": false,
1416
+ "single_word": false,
1417
+ "special": true
1418
+ },
1419
+ "256173": {
1420
+ "content": "<extra_id_126>",
1421
+ "lstrip": false,
1422
+ "normalized": false,
1423
+ "rstrip": false,
1424
+ "single_word": false,
1425
+ "special": true
1426
+ },
1427
+ "256174": {
1428
+ "content": "<extra_id_125>",
1429
+ "lstrip": false,
1430
+ "normalized": false,
1431
+ "rstrip": false,
1432
+ "single_word": false,
1433
+ "special": true
1434
+ },
1435
+ "256175": {
1436
+ "content": "<extra_id_124>",
1437
+ "lstrip": false,
1438
+ "normalized": false,
1439
+ "rstrip": false,
1440
+ "single_word": false,
1441
+ "special": true
1442
+ },
1443
+ "256176": {
1444
+ "content": "<extra_id_123>",
1445
+ "lstrip": false,
1446
+ "normalized": false,
1447
+ "rstrip": false,
1448
+ "single_word": false,
1449
+ "special": true
1450
+ },
1451
+ "256177": {
1452
+ "content": "<extra_id_122>",
1453
+ "lstrip": false,
1454
+ "normalized": false,
1455
+ "rstrip": false,
1456
+ "single_word": false,
1457
+ "special": true
1458
+ },
1459
+ "256178": {
1460
+ "content": "<extra_id_121>",
1461
+ "lstrip": false,
1462
+ "normalized": false,
1463
+ "rstrip": false,
1464
+ "single_word": false,
1465
+ "special": true
1466
+ },
1467
+ "256179": {
1468
+ "content": "<extra_id_120>",
1469
+ "lstrip": false,
1470
+ "normalized": false,
1471
+ "rstrip": false,
1472
+ "single_word": false,
1473
+ "special": true
1474
+ },
1475
+ "256180": {
1476
+ "content": "<extra_id_119>",
1477
+ "lstrip": false,
1478
+ "normalized": false,
1479
+ "rstrip": false,
1480
+ "single_word": false,
1481
+ "special": true
1482
+ },
1483
+ "256181": {
1484
+ "content": "<extra_id_118>",
1485
+ "lstrip": false,
1486
+ "normalized": false,
1487
+ "rstrip": false,
1488
+ "single_word": false,
1489
+ "special": true
1490
+ },
1491
+ "256182": {
1492
+ "content": "<extra_id_117>",
1493
+ "lstrip": false,
1494
+ "normalized": false,
1495
+ "rstrip": false,
1496
+ "single_word": false,
1497
+ "special": true
1498
+ },
1499
+ "256183": {
1500
+ "content": "<extra_id_116>",
1501
+ "lstrip": false,
1502
+ "normalized": false,
1503
+ "rstrip": false,
1504
+ "single_word": false,
1505
+ "special": true
1506
+ },
1507
+ "256184": {
1508
+ "content": "<extra_id_115>",
1509
+ "lstrip": false,
1510
+ "normalized": false,
1511
+ "rstrip": false,
1512
+ "single_word": false,
1513
+ "special": true
1514
+ },
1515
+ "256185": {
1516
+ "content": "<extra_id_114>",
1517
+ "lstrip": false,
1518
+ "normalized": false,
1519
+ "rstrip": false,
1520
+ "single_word": false,
1521
+ "special": true
1522
+ },
1523
+ "256186": {
1524
+ "content": "<extra_id_113>",
1525
+ "lstrip": false,
1526
+ "normalized": false,
1527
+ "rstrip": false,
1528
+ "single_word": false,
1529
+ "special": true
1530
+ },
1531
+ "256187": {
1532
+ "content": "<extra_id_112>",
1533
+ "lstrip": false,
1534
+ "normalized": false,
1535
+ "rstrip": false,
1536
+ "single_word": false,
1537
+ "special": true
1538
+ },
1539
+ "256188": {
1540
+ "content": "<extra_id_111>",
1541
+ "lstrip": false,
1542
+ "normalized": false,
1543
+ "rstrip": false,
1544
+ "single_word": false,
1545
+ "special": true
1546
+ },
1547
+ "256189": {
1548
+ "content": "<extra_id_110>",
1549
+ "lstrip": false,
1550
+ "normalized": false,
1551
+ "rstrip": false,
1552
+ "single_word": false,
1553
+ "special": true
1554
+ },
1555
+ "256190": {
1556
+ "content": "<extra_id_109>",
1557
+ "lstrip": false,
1558
+ "normalized": false,
1559
+ "rstrip": false,
1560
+ "single_word": false,
1561
+ "special": true
1562
+ },
1563
+ "256191": {
1564
+ "content": "<extra_id_108>",
1565
+ "lstrip": false,
1566
+ "normalized": false,
1567
+ "rstrip": false,
1568
+ "single_word": false,
1569
+ "special": true
1570
+ },
1571
+ "256192": {
1572
+ "content": "<extra_id_107>",
1573
+ "lstrip": false,
1574
+ "normalized": false,
1575
+ "rstrip": false,
1576
+ "single_word": false,
1577
+ "special": true
1578
+ },
1579
+ "256193": {
1580
+ "content": "<extra_id_106>",
1581
+ "lstrip": false,
1582
+ "normalized": false,
1583
+ "rstrip": false,
1584
+ "single_word": false,
1585
+ "special": true
1586
+ },
1587
+ "256194": {
1588
+ "content": "<extra_id_105>",
1589
+ "lstrip": false,
1590
+ "normalized": false,
1591
+ "rstrip": false,
1592
+ "single_word": false,
1593
+ "special": true
1594
+ },
1595
+ "256195": {
1596
+ "content": "<extra_id_104>",
1597
+ "lstrip": false,
1598
+ "normalized": false,
1599
+ "rstrip": false,
1600
+ "single_word": false,
1601
+ "special": true
1602
+ },
1603
+ "256196": {
1604
+ "content": "<extra_id_103>",
1605
+ "lstrip": false,
1606
+ "normalized": false,
1607
+ "rstrip": false,
1608
+ "single_word": false,
1609
+ "special": true
1610
+ },
1611
+ "256197": {
1612
+ "content": "<extra_id_102>",
1613
+ "lstrip": false,
1614
+ "normalized": false,
1615
+ "rstrip": false,
1616
+ "single_word": false,
1617
+ "special": true
1618
+ },
1619
+ "256198": {
1620
+ "content": "<extra_id_101>",
1621
+ "lstrip": false,
1622
+ "normalized": false,
1623
+ "rstrip": false,
1624
+ "single_word": false,
1625
+ "special": true
1626
+ },
1627
+ "256199": {
1628
+ "content": "<extra_id_100>",
1629
+ "lstrip": false,
1630
+ "normalized": false,
1631
+ "rstrip": false,
1632
+ "single_word": false,
1633
+ "special": true
1634
+ },
1635
+ "256200": {
1636
+ "content": "<extra_id_99>",
1637
+ "lstrip": false,
1638
+ "normalized": false,
1639
+ "rstrip": false,
1640
+ "single_word": false,
1641
+ "special": true
1642
+ },
1643
+ "256201": {
1644
+ "content": "<extra_id_98>",
1645
+ "lstrip": false,
1646
+ "normalized": false,
1647
+ "rstrip": false,
1648
+ "single_word": false,
1649
+ "special": true
1650
+ },
1651
+ "256202": {
1652
+ "content": "<extra_id_97>",
1653
+ "lstrip": false,
1654
+ "normalized": false,
1655
+ "rstrip": false,
1656
+ "single_word": false,
1657
+ "special": true
1658
+ },
1659
+ "256203": {
1660
+ "content": "<extra_id_96>",
1661
+ "lstrip": false,
1662
+ "normalized": false,
1663
+ "rstrip": false,
1664
+ "single_word": false,
1665
+ "special": true
1666
+ },
1667
+ "256204": {
1668
+ "content": "<extra_id_95>",
1669
+ "lstrip": false,
1670
+ "normalized": false,
1671
+ "rstrip": false,
1672
+ "single_word": false,
1673
+ "special": true
1674
+ },
1675
+ "256205": {
1676
+ "content": "<extra_id_94>",
1677
+ "lstrip": false,
1678
+ "normalized": false,
1679
+ "rstrip": false,
1680
+ "single_word": false,
1681
+ "special": true
1682
+ },
1683
+ "256206": {
1684
+ "content": "<extra_id_93>",
1685
+ "lstrip": false,
1686
+ "normalized": false,
1687
+ "rstrip": false,
1688
+ "single_word": false,
1689
+ "special": true
1690
+ },
1691
+ "256207": {
1692
+ "content": "<extra_id_92>",
1693
+ "lstrip": false,
1694
+ "normalized": false,
1695
+ "rstrip": false,
1696
+ "single_word": false,
1697
+ "special": true
1698
+ },
1699
+ "256208": {
1700
+ "content": "<extra_id_91>",
1701
+ "lstrip": false,
1702
+ "normalized": false,
1703
+ "rstrip": false,
1704
+ "single_word": false,
1705
+ "special": true
1706
+ },
1707
+ "256209": {
1708
+ "content": "<extra_id_90>",
1709
+ "lstrip": false,
1710
+ "normalized": false,
1711
+ "rstrip": false,
1712
+ "single_word": false,
1713
+ "special": true
1714
+ },
1715
+ "256210": {
1716
+ "content": "<extra_id_89>",
1717
+ "lstrip": false,
1718
+ "normalized": false,
1719
+ "rstrip": false,
1720
+ "single_word": false,
1721
+ "special": true
1722
+ },
1723
+ "256211": {
1724
+ "content": "<extra_id_88>",
1725
+ "lstrip": false,
1726
+ "normalized": false,
1727
+ "rstrip": false,
1728
+ "single_word": false,
1729
+ "special": true
1730
+ },
1731
+ "256212": {
1732
+ "content": "<extra_id_87>",
1733
+ "lstrip": false,
1734
+ "normalized": false,
1735
+ "rstrip": false,
1736
+ "single_word": false,
1737
+ "special": true
1738
+ },
1739
+ "256213": {
1740
+ "content": "<extra_id_86>",
1741
+ "lstrip": false,
1742
+ "normalized": false,
1743
+ "rstrip": false,
1744
+ "single_word": false,
1745
+ "special": true
1746
+ },
1747
+ "256214": {
1748
+ "content": "<extra_id_85>",
1749
+ "lstrip": false,
1750
+ "normalized": false,
1751
+ "rstrip": false,
1752
+ "single_word": false,
1753
+ "special": true
1754
+ },
1755
+ "256215": {
1756
+ "content": "<extra_id_84>",
1757
+ "lstrip": false,
1758
+ "normalized": false,
1759
+ "rstrip": false,
1760
+ "single_word": false,
1761
+ "special": true
1762
+ },
1763
+ "256216": {
1764
+ "content": "<extra_id_83>",
1765
+ "lstrip": false,
1766
+ "normalized": false,
1767
+ "rstrip": false,
1768
+ "single_word": false,
1769
+ "special": true
1770
+ },
1771
+ "256217": {
1772
+ "content": "<extra_id_82>",
1773
+ "lstrip": false,
1774
+ "normalized": false,
1775
+ "rstrip": false,
1776
+ "single_word": false,
1777
+ "special": true
1778
+ },
1779
+ "256218": {
1780
+ "content": "<extra_id_81>",
1781
+ "lstrip": false,
1782
+ "normalized": false,
1783
+ "rstrip": false,
1784
+ "single_word": false,
1785
+ "special": true
1786
+ },
1787
+ "256219": {
1788
+ "content": "<extra_id_80>",
1789
+ "lstrip": false,
1790
+ "normalized": false,
1791
+ "rstrip": false,
1792
+ "single_word": false,
1793
+ "special": true
1794
+ },
1795
+ "256220": {
1796
+ "content": "<extra_id_79>",
1797
+ "lstrip": false,
1798
+ "normalized": false,
1799
+ "rstrip": false,
1800
+ "single_word": false,
1801
+ "special": true
1802
+ },
1803
+ "256221": {
1804
+ "content": "<extra_id_78>",
1805
+ "lstrip": false,
1806
+ "normalized": false,
1807
+ "rstrip": false,
1808
+ "single_word": false,
1809
+ "special": true
1810
+ },
1811
+ "256222": {
1812
+ "content": "<extra_id_77>",
1813
+ "lstrip": false,
1814
+ "normalized": false,
1815
+ "rstrip": false,
1816
+ "single_word": false,
1817
+ "special": true
1818
+ },
1819
+ "256223": {
1820
+ "content": "<extra_id_76>",
1821
+ "lstrip": false,
1822
+ "normalized": false,
1823
+ "rstrip": false,
1824
+ "single_word": false,
1825
+ "special": true
1826
+ },
1827
+ "256224": {
1828
+ "content": "<extra_id_75>",
1829
+ "lstrip": false,
1830
+ "normalized": false,
1831
+ "rstrip": false,
1832
+ "single_word": false,
1833
+ "special": true
1834
+ },
1835
+ "256225": {
1836
+ "content": "<extra_id_74>",
1837
+ "lstrip": false,
1838
+ "normalized": false,
1839
+ "rstrip": false,
1840
+ "single_word": false,
1841
+ "special": true
1842
+ },
1843
+ "256226": {
1844
+ "content": "<extra_id_73>",
1845
+ "lstrip": false,
1846
+ "normalized": false,
1847
+ "rstrip": false,
1848
+ "single_word": false,
1849
+ "special": true
1850
+ },
1851
+ "256227": {
1852
+ "content": "<extra_id_72>",
1853
+ "lstrip": false,
1854
+ "normalized": false,
1855
+ "rstrip": false,
1856
+ "single_word": false,
1857
+ "special": true
1858
+ },
1859
+ "256228": {
1860
+ "content": "<extra_id_71>",
1861
+ "lstrip": false,
1862
+ "normalized": false,
1863
+ "rstrip": false,
1864
+ "single_word": false,
1865
+ "special": true
1866
+ },
1867
+ "256229": {
1868
+ "content": "<extra_id_70>",
1869
+ "lstrip": false,
1870
+ "normalized": false,
1871
+ "rstrip": false,
1872
+ "single_word": false,
1873
+ "special": true
1874
+ },
1875
+ "256230": {
1876
+ "content": "<extra_id_69>",
1877
+ "lstrip": false,
1878
+ "normalized": false,
1879
+ "rstrip": false,
1880
+ "single_word": false,
1881
+ "special": true
1882
+ },
1883
+ "256231": {
1884
+ "content": "<extra_id_68>",
1885
+ "lstrip": false,
1886
+ "normalized": false,
1887
+ "rstrip": false,
1888
+ "single_word": false,
1889
+ "special": true
1890
+ },
1891
+ "256232": {
1892
+ "content": "<extra_id_67>",
1893
+ "lstrip": false,
1894
+ "normalized": false,
1895
+ "rstrip": false,
1896
+ "single_word": false,
1897
+ "special": true
1898
+ },
1899
+ "256233": {
1900
+ "content": "<extra_id_66>",
1901
+ "lstrip": false,
1902
+ "normalized": false,
1903
+ "rstrip": false,
1904
+ "single_word": false,
1905
+ "special": true
1906
+ },
1907
+ "256234": {
1908
+ "content": "<extra_id_65>",
1909
+ "lstrip": false,
1910
+ "normalized": false,
1911
+ "rstrip": false,
1912
+ "single_word": false,
1913
+ "special": true
1914
+ },
1915
+ "256235": {
1916
+ "content": "<extra_id_64>",
1917
+ "lstrip": false,
1918
+ "normalized": false,
1919
+ "rstrip": false,
1920
+ "single_word": false,
1921
+ "special": true
1922
+ },
1923
+ "256236": {
1924
+ "content": "<extra_id_63>",
1925
+ "lstrip": false,
1926
+ "normalized": false,
1927
+ "rstrip": false,
1928
+ "single_word": false,
1929
+ "special": true
1930
+ },
1931
+ "256237": {
1932
+ "content": "<extra_id_62>",
1933
+ "lstrip": false,
1934
+ "normalized": false,
1935
+ "rstrip": false,
1936
+ "single_word": false,
1937
+ "special": true
1938
+ },
1939
+ "256238": {
1940
+ "content": "<extra_id_61>",
1941
+ "lstrip": false,
1942
+ "normalized": false,
1943
+ "rstrip": false,
1944
+ "single_word": false,
1945
+ "special": true
1946
+ },
1947
+ "256239": {
1948
+ "content": "<extra_id_60>",
1949
+ "lstrip": false,
1950
+ "normalized": false,
1951
+ "rstrip": false,
1952
+ "single_word": false,
1953
+ "special": true
1954
+ },
1955
+ "256240": {
1956
+ "content": "<extra_id_59>",
1957
+ "lstrip": false,
1958
+ "normalized": false,
1959
+ "rstrip": false,
1960
+ "single_word": false,
1961
+ "special": true
1962
+ },
1963
+ "256241": {
1964
+ "content": "<extra_id_58>",
1965
+ "lstrip": false,
1966
+ "normalized": false,
1967
+ "rstrip": false,
1968
+ "single_word": false,
1969
+ "special": true
1970
+ },
1971
+ "256242": {
1972
+ "content": "<extra_id_57>",
1973
+ "lstrip": false,
1974
+ "normalized": false,
1975
+ "rstrip": false,
1976
+ "single_word": false,
1977
+ "special": true
1978
+ },
1979
+ "256243": {
1980
+ "content": "<extra_id_56>",
1981
+ "lstrip": false,
1982
+ "normalized": false,
1983
+ "rstrip": false,
1984
+ "single_word": false,
1985
+ "special": true
1986
+ },
1987
+ "256244": {
1988
+ "content": "<extra_id_55>",
1989
+ "lstrip": false,
1990
+ "normalized": false,
1991
+ "rstrip": false,
1992
+ "single_word": false,
1993
+ "special": true
1994
+ },
1995
+ "256245": {
1996
+ "content": "<extra_id_54>",
1997
+ "lstrip": false,
1998
+ "normalized": false,
1999
+ "rstrip": false,
2000
+ "single_word": false,
2001
+ "special": true
2002
+ },
2003
+ "256246": {
2004
+ "content": "<extra_id_53>",
2005
+ "lstrip": false,
2006
+ "normalized": false,
2007
+ "rstrip": false,
2008
+ "single_word": false,
2009
+ "special": true
2010
+ },
2011
+ "256247": {
2012
+ "content": "<extra_id_52>",
2013
+ "lstrip": false,
2014
+ "normalized": false,
2015
+ "rstrip": false,
2016
+ "single_word": false,
2017
+ "special": true
2018
+ },
2019
+ "256248": {
2020
+ "content": "<extra_id_51>",
2021
+ "lstrip": false,
2022
+ "normalized": false,
2023
+ "rstrip": false,
2024
+ "single_word": false,
2025
+ "special": true
2026
+ },
2027
+ "256249": {
2028
+ "content": "<extra_id_50>",
2029
+ "lstrip": false,
2030
+ "normalized": false,
2031
+ "rstrip": false,
2032
+ "single_word": false,
2033
+ "special": true
2034
+ },
2035
+ "256250": {
2036
+ "content": "<extra_id_49>",
2037
+ "lstrip": false,
2038
+ "normalized": false,
2039
+ "rstrip": false,
2040
+ "single_word": false,
2041
+ "special": true
2042
+ },
2043
+ "256251": {
2044
+ "content": "<extra_id_48>",
2045
+ "lstrip": false,
2046
+ "normalized": false,
2047
+ "rstrip": false,
2048
+ "single_word": false,
2049
+ "special": true
2050
+ },
2051
+ "256252": {
2052
+ "content": "<extra_id_47>",
2053
+ "lstrip": false,
2054
+ "normalized": false,
2055
+ "rstrip": false,
2056
+ "single_word": false,
2057
+ "special": true
2058
+ },
2059
+ "256253": {
2060
+ "content": "<extra_id_46>",
2061
+ "lstrip": false,
2062
+ "normalized": false,
2063
+ "rstrip": false,
2064
+ "single_word": false,
2065
+ "special": true
2066
+ },
2067
+ "256254": {
2068
+ "content": "<extra_id_45>",
2069
+ "lstrip": false,
2070
+ "normalized": false,
2071
+ "rstrip": false,
2072
+ "single_word": false,
2073
+ "special": true
2074
+ },
2075
+ "256255": {
2076
+ "content": "<extra_id_44>",
2077
+ "lstrip": false,
2078
+ "normalized": false,
2079
+ "rstrip": false,
2080
+ "single_word": false,
2081
+ "special": true
2082
+ },
2083
+ "256256": {
2084
+ "content": "<extra_id_43>",
2085
+ "lstrip": false,
2086
+ "normalized": false,
2087
+ "rstrip": false,
2088
+ "single_word": false,
2089
+ "special": true
2090
+ },
2091
+ "256257": {
2092
+ "content": "<extra_id_42>",
2093
+ "lstrip": false,
2094
+ "normalized": false,
2095
+ "rstrip": false,
2096
+ "single_word": false,
2097
+ "special": true
2098
+ },
2099
+ "256258": {
2100
+ "content": "<extra_id_41>",
2101
+ "lstrip": false,
2102
+ "normalized": false,
2103
+ "rstrip": false,
2104
+ "single_word": false,
2105
+ "special": true
2106
+ },
2107
+ "256259": {
2108
+ "content": "<extra_id_40>",
2109
+ "lstrip": false,
2110
+ "normalized": false,
2111
+ "rstrip": false,
2112
+ "single_word": false,
2113
+ "special": true
2114
+ },
2115
+ "256260": {
2116
+ "content": "<extra_id_39>",
2117
+ "lstrip": false,
2118
+ "normalized": false,
2119
+ "rstrip": false,
2120
+ "single_word": false,
2121
+ "special": true
2122
+ },
2123
+ "256261": {
2124
+ "content": "<extra_id_38>",
2125
+ "lstrip": false,
2126
+ "normalized": false,
2127
+ "rstrip": false,
2128
+ "single_word": false,
2129
+ "special": true
2130
+ },
2131
+ "256262": {
2132
+ "content": "<extra_id_37>",
2133
+ "lstrip": false,
2134
+ "normalized": false,
2135
+ "rstrip": false,
2136
+ "single_word": false,
2137
+ "special": true
2138
+ },
2139
+ "256263": {
2140
+ "content": "<extra_id_36>",
2141
+ "lstrip": false,
2142
+ "normalized": false,
2143
+ "rstrip": false,
2144
+ "single_word": false,
2145
+ "special": true
2146
+ },
2147
+ "256264": {
2148
+ "content": "<extra_id_35>",
2149
+ "lstrip": false,
2150
+ "normalized": false,
2151
+ "rstrip": false,
2152
+ "single_word": false,
2153
+ "special": true
2154
+ },
2155
+ "256265": {
2156
+ "content": "<extra_id_34>",
2157
+ "lstrip": false,
2158
+ "normalized": false,
2159
+ "rstrip": false,
2160
+ "single_word": false,
2161
+ "special": true
2162
+ },
2163
+ "256266": {
2164
+ "content": "<extra_id_33>",
2165
+ "lstrip": false,
2166
+ "normalized": false,
2167
+ "rstrip": false,
2168
+ "single_word": false,
2169
+ "special": true
2170
+ },
2171
+ "256267": {
2172
+ "content": "<extra_id_32>",
2173
+ "lstrip": false,
2174
+ "normalized": false,
2175
+ "rstrip": false,
2176
+ "single_word": false,
2177
+ "special": true
2178
+ },
2179
+ "256268": {
2180
+ "content": "<extra_id_31>",
2181
+ "lstrip": false,
2182
+ "normalized": false,
2183
+ "rstrip": false,
2184
+ "single_word": false,
2185
+ "special": true
2186
+ },
2187
+ "256269": {
2188
+ "content": "<extra_id_30>",
2189
+ "lstrip": false,
2190
+ "normalized": false,
2191
+ "rstrip": false,
2192
+ "single_word": false,
2193
+ "special": true
2194
+ },
2195
+ "256270": {
2196
+ "content": "<extra_id_29>",
2197
+ "lstrip": false,
2198
+ "normalized": false,
2199
+ "rstrip": false,
2200
+ "single_word": false,
2201
+ "special": true
2202
+ },
2203
+ "256271": {
2204
+ "content": "<extra_id_28>",
2205
+ "lstrip": false,
2206
+ "normalized": false,
2207
+ "rstrip": false,
2208
+ "single_word": false,
2209
+ "special": true
2210
+ },
2211
+ "256272": {
2212
+ "content": "<extra_id_27>",
2213
+ "lstrip": false,
2214
+ "normalized": false,
2215
+ "rstrip": false,
2216
+ "single_word": false,
2217
+ "special": true
2218
+ },
2219
+ "256273": {
2220
+ "content": "<extra_id_26>",
2221
+ "lstrip": false,
2222
+ "normalized": false,
2223
+ "rstrip": false,
2224
+ "single_word": false,
2225
+ "special": true
2226
+ },
2227
+ "256274": {
2228
+ "content": "<extra_id_25>",
2229
+ "lstrip": false,
2230
+ "normalized": false,
2231
+ "rstrip": false,
2232
+ "single_word": false,
2233
+ "special": true
2234
+ },
2235
+ "256275": {
2236
+ "content": "<extra_id_24>",
2237
+ "lstrip": false,
2238
+ "normalized": false,
2239
+ "rstrip": false,
2240
+ "single_word": false,
2241
+ "special": true
2242
+ },
2243
+ "256276": {
2244
+ "content": "<extra_id_23>",
2245
+ "lstrip": false,
2246
+ "normalized": false,
2247
+ "rstrip": false,
2248
+ "single_word": false,
2249
+ "special": true
2250
+ },
2251
+ "256277": {
2252
+ "content": "<extra_id_22>",
2253
+ "lstrip": false,
2254
+ "normalized": false,
2255
+ "rstrip": false,
2256
+ "single_word": false,
2257
+ "special": true
2258
+ },
2259
+ "256278": {
2260
+ "content": "<extra_id_21>",
2261
+ "lstrip": false,
2262
+ "normalized": false,
2263
+ "rstrip": false,
2264
+ "single_word": false,
2265
+ "special": true
2266
+ },
2267
+ "256279": {
2268
+ "content": "<extra_id_20>",
2269
+ "lstrip": false,
2270
+ "normalized": false,
2271
+ "rstrip": false,
2272
+ "single_word": false,
2273
+ "special": true
2274
+ },
2275
+ "256280": {
2276
+ "content": "<extra_id_19>",
2277
+ "lstrip": false,
2278
+ "normalized": false,
2279
+ "rstrip": false,
2280
+ "single_word": false,
2281
+ "special": true
2282
+ },
2283
+ "256281": {
2284
+ "content": "<extra_id_18>",
2285
+ "lstrip": false,
2286
+ "normalized": false,
2287
+ "rstrip": false,
2288
+ "single_word": false,
2289
+ "special": true
2290
+ },
2291
+ "256282": {
2292
+ "content": "<extra_id_17>",
2293
+ "lstrip": false,
2294
+ "normalized": false,
2295
+ "rstrip": false,
2296
+ "single_word": false,
2297
+ "special": true
2298
+ },
2299
+ "256283": {
2300
+ "content": "<extra_id_16>",
2301
+ "lstrip": false,
2302
+ "normalized": false,
2303
+ "rstrip": false,
2304
+ "single_word": false,
2305
+ "special": true
2306
+ },
2307
+ "256284": {
2308
+ "content": "<extra_id_15>",
2309
+ "lstrip": false,
2310
+ "normalized": false,
2311
+ "rstrip": false,
2312
+ "single_word": false,
2313
+ "special": true
2314
+ },
2315
+ "256285": {
2316
+ "content": "<extra_id_14>",
2317
+ "lstrip": false,
2318
+ "normalized": false,
2319
+ "rstrip": false,
2320
+ "single_word": false,
2321
+ "special": true
2322
+ },
2323
+ "256286": {
2324
+ "content": "<extra_id_13>",
2325
+ "lstrip": false,
2326
+ "normalized": false,
2327
+ "rstrip": false,
2328
+ "single_word": false,
2329
+ "special": true
2330
+ },
2331
+ "256287": {
2332
+ "content": "<extra_id_12>",
2333
+ "lstrip": false,
2334
+ "normalized": false,
2335
+ "rstrip": false,
2336
+ "single_word": false,
2337
+ "special": true
2338
+ },
2339
+ "256288": {
2340
+ "content": "<extra_id_11>",
2341
+ "lstrip": false,
2342
+ "normalized": false,
2343
+ "rstrip": false,
2344
+ "single_word": false,
2345
+ "special": true
2346
+ },
2347
+ "256289": {
2348
+ "content": "<extra_id_10>",
2349
+ "lstrip": false,
2350
+ "normalized": false,
2351
+ "rstrip": false,
2352
+ "single_word": false,
2353
+ "special": true
2354
+ },
2355
+ "256290": {
2356
+ "content": "<extra_id_9>",
2357
+ "lstrip": false,
2358
+ "normalized": false,
2359
+ "rstrip": false,
2360
+ "single_word": false,
2361
+ "special": true
2362
+ },
2363
+ "256291": {
2364
+ "content": "<extra_id_8>",
2365
+ "lstrip": false,
2366
+ "normalized": false,
2367
+ "rstrip": false,
2368
+ "single_word": false,
2369
+ "special": true
2370
+ },
2371
+ "256292": {
2372
+ "content": "<extra_id_7>",
2373
+ "lstrip": false,
2374
+ "normalized": false,
2375
+ "rstrip": false,
2376
+ "single_word": false,
2377
+ "special": true
2378
+ },
2379
+ "256293": {
2380
+ "content": "<extra_id_6>",
2381
+ "lstrip": false,
2382
+ "normalized": false,
2383
+ "rstrip": false,
2384
+ "single_word": false,
2385
+ "special": true
2386
+ },
2387
+ "256294": {
2388
+ "content": "<extra_id_5>",
2389
+ "lstrip": false,
2390
+ "normalized": false,
2391
+ "rstrip": false,
2392
+ "single_word": false,
2393
+ "special": true
2394
+ },
2395
+ "256295": {
2396
+ "content": "<extra_id_4>",
2397
+ "lstrip": false,
2398
+ "normalized": false,
2399
+ "rstrip": false,
2400
+ "single_word": false,
2401
+ "special": true
2402
+ },
2403
+ "256296": {
2404
+ "content": "<extra_id_3>",
2405
+ "lstrip": false,
2406
+ "normalized": false,
2407
+ "rstrip": false,
2408
+ "single_word": false,
2409
+ "special": true
2410
+ },
2411
+ "256297": {
2412
+ "content": "<extra_id_2>",
2413
+ "lstrip": false,
2414
+ "normalized": false,
2415
+ "rstrip": false,
2416
+ "single_word": false,
2417
+ "special": true
2418
+ },
2419
+ "256298": {
2420
+ "content": "<extra_id_1>",
2421
+ "lstrip": false,
2422
+ "normalized": false,
2423
+ "rstrip": false,
2424
+ "single_word": false,
2425
+ "special": true
2426
+ },
2427
+ "256299": {
2428
+ "content": "<extra_id_0>",
2429
+ "lstrip": false,
2430
+ "normalized": false,
2431
+ "rstrip": false,
2432
+ "single_word": false,
2433
+ "special": true
2434
+ }
2435
+ },
2436
+ "additional_special_tokens": [
2437
+ "<extra_id_0>",
2438
+ "<extra_id_1>",
2439
+ "<extra_id_2>",
2440
+ "<extra_id_3>",
2441
+ "<extra_id_4>",
2442
+ "<extra_id_5>",
2443
+ "<extra_id_6>",
2444
+ "<extra_id_7>",
2445
+ "<extra_id_8>",
2446
+ "<extra_id_9>",
2447
+ "<extra_id_10>",
2448
+ "<extra_id_11>",
2449
+ "<extra_id_12>",
2450
+ "<extra_id_13>",
2451
+ "<extra_id_14>",
2452
+ "<extra_id_15>",
2453
+ "<extra_id_16>",
2454
+ "<extra_id_17>",
2455
+ "<extra_id_18>",
2456
+ "<extra_id_19>",
2457
+ "<extra_id_20>",
2458
+ "<extra_id_21>",
2459
+ "<extra_id_22>",
2460
+ "<extra_id_23>",
2461
+ "<extra_id_24>",
2462
+ "<extra_id_25>",
2463
+ "<extra_id_26>",
2464
+ "<extra_id_27>",
2465
+ "<extra_id_28>",
2466
+ "<extra_id_29>",
2467
+ "<extra_id_30>",
2468
+ "<extra_id_31>",
2469
+ "<extra_id_32>",
2470
+ "<extra_id_33>",
2471
+ "<extra_id_34>",
2472
+ "<extra_id_35>",
2473
+ "<extra_id_36>",
2474
+ "<extra_id_37>",
2475
+ "<extra_id_38>",
2476
+ "<extra_id_39>",
2477
+ "<extra_id_40>",
2478
+ "<extra_id_41>",
2479
+ "<extra_id_42>",
2480
+ "<extra_id_43>",
2481
+ "<extra_id_44>",
2482
+ "<extra_id_45>",
2483
+ "<extra_id_46>",
2484
+ "<extra_id_47>",
2485
+ "<extra_id_48>",
2486
+ "<extra_id_49>",
2487
+ "<extra_id_50>",
2488
+ "<extra_id_51>",
2489
+ "<extra_id_52>",
2490
+ "<extra_id_53>",
2491
+ "<extra_id_54>",
2492
+ "<extra_id_55>",
2493
+ "<extra_id_56>",
2494
+ "<extra_id_57>",
2495
+ "<extra_id_58>",
2496
+ "<extra_id_59>",
2497
+ "<extra_id_60>",
2498
+ "<extra_id_61>",
2499
+ "<extra_id_62>",
2500
+ "<extra_id_63>",
2501
+ "<extra_id_64>",
2502
+ "<extra_id_65>",
2503
+ "<extra_id_66>",
2504
+ "<extra_id_67>",
2505
+ "<extra_id_68>",
2506
+ "<extra_id_69>",
2507
+ "<extra_id_70>",
2508
+ "<extra_id_71>",
2509
+ "<extra_id_72>",
2510
+ "<extra_id_73>",
2511
+ "<extra_id_74>",
2512
+ "<extra_id_75>",
2513
+ "<extra_id_76>",
2514
+ "<extra_id_77>",
2515
+ "<extra_id_78>",
2516
+ "<extra_id_79>",
2517
+ "<extra_id_80>",
2518
+ "<extra_id_81>",
2519
+ "<extra_id_82>",
2520
+ "<extra_id_83>",
2521
+ "<extra_id_84>",
2522
+ "<extra_id_85>",
2523
+ "<extra_id_86>",
2524
+ "<extra_id_87>",
2525
+ "<extra_id_88>",
2526
+ "<extra_id_89>",
2527
+ "<extra_id_90>",
2528
+ "<extra_id_91>",
2529
+ "<extra_id_92>",
2530
+ "<extra_id_93>",
2531
+ "<extra_id_94>",
2532
+ "<extra_id_95>",
2533
+ "<extra_id_96>",
2534
+ "<extra_id_97>",
2535
+ "<extra_id_98>",
2536
+ "<extra_id_99>",
2537
+ "<extra_id_100>",
2538
+ "<extra_id_101>",
2539
+ "<extra_id_102>",
2540
+ "<extra_id_103>",
2541
+ "<extra_id_104>",
2542
+ "<extra_id_105>",
2543
+ "<extra_id_106>",
2544
+ "<extra_id_107>",
2545
+ "<extra_id_108>",
2546
+ "<extra_id_109>",
2547
+ "<extra_id_110>",
2548
+ "<extra_id_111>",
2549
+ "<extra_id_112>",
2550
+ "<extra_id_113>",
2551
+ "<extra_id_114>",
2552
+ "<extra_id_115>",
2553
+ "<extra_id_116>",
2554
+ "<extra_id_117>",
2555
+ "<extra_id_118>",
2556
+ "<extra_id_119>",
2557
+ "<extra_id_120>",
2558
+ "<extra_id_121>",
2559
+ "<extra_id_122>",
2560
+ "<extra_id_123>",
2561
+ "<extra_id_124>",
2562
+ "<extra_id_125>",
2563
+ "<extra_id_126>",
2564
+ "<extra_id_127>",
2565
+ "<extra_id_128>",
2566
+ "<extra_id_129>",
2567
+ "<extra_id_130>",
2568
+ "<extra_id_131>",
2569
+ "<extra_id_132>",
2570
+ "<extra_id_133>",
2571
+ "<extra_id_134>",
2572
+ "<extra_id_135>",
2573
+ "<extra_id_136>",
2574
+ "<extra_id_137>",
2575
+ "<extra_id_138>",
2576
+ "<extra_id_139>",
2577
+ "<extra_id_140>",
2578
+ "<extra_id_141>",
2579
+ "<extra_id_142>",
2580
+ "<extra_id_143>",
2581
+ "<extra_id_144>",
2582
+ "<extra_id_145>",
2583
+ "<extra_id_146>",
2584
+ "<extra_id_147>",
2585
+ "<extra_id_148>",
2586
+ "<extra_id_149>",
2587
+ "<extra_id_150>",
2588
+ "<extra_id_151>",
2589
+ "<extra_id_152>",
2590
+ "<extra_id_153>",
2591
+ "<extra_id_154>",
2592
+ "<extra_id_155>",
2593
+ "<extra_id_156>",
2594
+ "<extra_id_157>",
2595
+ "<extra_id_158>",
2596
+ "<extra_id_159>",
2597
+ "<extra_id_160>",
2598
+ "<extra_id_161>",
2599
+ "<extra_id_162>",
2600
+ "<extra_id_163>",
2601
+ "<extra_id_164>",
2602
+ "<extra_id_165>",
2603
+ "<extra_id_166>",
2604
+ "<extra_id_167>",
2605
+ "<extra_id_168>",
2606
+ "<extra_id_169>",
2607
+ "<extra_id_170>",
2608
+ "<extra_id_171>",
2609
+ "<extra_id_172>",
2610
+ "<extra_id_173>",
2611
+ "<extra_id_174>",
2612
+ "<extra_id_175>",
2613
+ "<extra_id_176>",
2614
+ "<extra_id_177>",
2615
+ "<extra_id_178>",
2616
+ "<extra_id_179>",
2617
+ "<extra_id_180>",
2618
+ "<extra_id_181>",
2619
+ "<extra_id_182>",
2620
+ "<extra_id_183>",
2621
+ "<extra_id_184>",
2622
+ "<extra_id_185>",
2623
+ "<extra_id_186>",
2624
+ "<extra_id_187>",
2625
+ "<extra_id_188>",
2626
+ "<extra_id_189>",
2627
+ "<extra_id_190>",
2628
+ "<extra_id_191>",
2629
+ "<extra_id_192>",
2630
+ "<extra_id_193>",
2631
+ "<extra_id_194>",
2632
+ "<extra_id_195>",
2633
+ "<extra_id_196>",
2634
+ "<extra_id_197>",
2635
+ "<extra_id_198>",
2636
+ "<extra_id_199>",
2637
+ "<extra_id_200>",
2638
+ "<extra_id_201>",
2639
+ "<extra_id_202>",
2640
+ "<extra_id_203>",
2641
+ "<extra_id_204>",
2642
+ "<extra_id_205>",
2643
+ "<extra_id_206>",
2644
+ "<extra_id_207>",
2645
+ "<extra_id_208>",
2646
+ "<extra_id_209>",
2647
+ "<extra_id_210>",
2648
+ "<extra_id_211>",
2649
+ "<extra_id_212>",
2650
+ "<extra_id_213>",
2651
+ "<extra_id_214>",
2652
+ "<extra_id_215>",
2653
+ "<extra_id_216>",
2654
+ "<extra_id_217>",
2655
+ "<extra_id_218>",
2656
+ "<extra_id_219>",
2657
+ "<extra_id_220>",
2658
+ "<extra_id_221>",
2659
+ "<extra_id_222>",
2660
+ "<extra_id_223>",
2661
+ "<extra_id_224>",
2662
+ "<extra_id_225>",
2663
+ "<extra_id_226>",
2664
+ "<extra_id_227>",
2665
+ "<extra_id_228>",
2666
+ "<extra_id_229>",
2667
+ "<extra_id_230>",
2668
+ "<extra_id_231>",
2669
+ "<extra_id_232>",
2670
+ "<extra_id_233>",
2671
+ "<extra_id_234>",
2672
+ "<extra_id_235>",
2673
+ "<extra_id_236>",
2674
+ "<extra_id_237>",
2675
+ "<extra_id_238>",
2676
+ "<extra_id_239>",
2677
+ "<extra_id_240>",
2678
+ "<extra_id_241>",
2679
+ "<extra_id_242>",
2680
+ "<extra_id_243>",
2681
+ "<extra_id_244>",
2682
+ "<extra_id_245>",
2683
+ "<extra_id_246>",
2684
+ "<extra_id_247>",
2685
+ "<extra_id_248>",
2686
+ "<extra_id_249>",
2687
+ "<extra_id_250>",
2688
+ "<extra_id_251>",
2689
+ "<extra_id_252>",
2690
+ "<extra_id_253>",
2691
+ "<extra_id_254>",
2692
+ "<extra_id_255>",
2693
+ "<extra_id_256>",
2694
+ "<extra_id_257>",
2695
+ "<extra_id_258>",
2696
+ "<extra_id_259>",
2697
+ "<extra_id_260>",
2698
+ "<extra_id_261>",
2699
+ "<extra_id_262>",
2700
+ "<extra_id_263>",
2701
+ "<extra_id_264>",
2702
+ "<extra_id_265>",
2703
+ "<extra_id_266>",
2704
+ "<extra_id_267>",
2705
+ "<extra_id_268>",
2706
+ "<extra_id_269>",
2707
+ "<extra_id_270>",
2708
+ "<extra_id_271>",
2709
+ "<extra_id_272>",
2710
+ "<extra_id_273>",
2711
+ "<extra_id_274>",
2712
+ "<extra_id_275>",
2713
+ "<extra_id_276>",
2714
+ "<extra_id_277>",
2715
+ "<extra_id_278>",
2716
+ "<extra_id_279>",
2717
+ "<extra_id_280>",
2718
+ "<extra_id_281>",
2719
+ "<extra_id_282>",
2720
+ "<extra_id_283>",
2721
+ "<extra_id_284>",
2722
+ "<extra_id_285>",
2723
+ "<extra_id_286>",
2724
+ "<extra_id_287>",
2725
+ "<extra_id_288>",
2726
+ "<extra_id_289>",
2727
+ "<extra_id_290>",
2728
+ "<extra_id_291>",
2729
+ "<extra_id_292>",
2730
+ "<extra_id_293>",
2731
+ "<extra_id_294>",
2732
+ "<extra_id_295>",
2733
+ "<extra_id_296>",
2734
+ "<extra_id_297>",
2735
+ "<extra_id_298>",
2736
+ "<extra_id_299>"
2737
+ ],
2738
+ "bos_token": "<s>",
2739
+ "clean_up_tokenization_spaces": true,
2740
+ "eos_token": "</s>",
2741
+ "extra_ids": 300,
2742
+ "model_max_length": 1000000000000000019884624838656,
2743
+ "pad_token": "<pad>",
2744
+ "sp_model_kwargs": {},
2745
+ "spaces_between_special_tokens": false,
2746
+ "tokenizer_class": "T5Tokenizer",
2747
+ "unk_token": "<unk>"
2748
+ }
configs/transformer_config_i2v.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "WanModel",
3
+ "_diffusers_version": "0.30.0",
4
+ "dim": 5120,
5
+ "eps": 1e-06,
6
+ "ffn_dim": 13824,
7
+ "freq_dim": 256,
8
+ "in_dim": 36,
9
+ "model_type": "i2v",
10
+ "num_heads": 40,
11
+ "num_layers": 40,
12
+ "out_dim": 16,
13
+ "text_len": 512
14
+ }
context_windows/context.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from typing import Callable, Optional, List
3
+ import torch
4
+ from ..utils import log
5
+
6
+ def ordered_halving(val):
7
+ bin_str = f"{val:064b}"
8
+ bin_flip = bin_str[::-1]
9
+ as_int = int(bin_flip, 2)
10
+
11
+ return as_int / (1 << 64)
12
+
13
+ def does_window_roll_over(window: list[int], num_frames: int) -> tuple[bool, int]:
14
+ prev_val = -1
15
+ for i, val in enumerate(window):
16
+ val = val % num_frames
17
+ if val < prev_val:
18
+ return True, i
19
+ prev_val = val
20
+ return False, -1
21
+
22
+ def shift_window_to_start(window: list[int], num_frames: int):
23
+ start_val = window[0]
24
+ for i in range(len(window)):
25
+ # 1) subtract each element by start_val to move vals relative to the start of all frames
26
+ # 2) add num_frames and take modulus to get adjusted vals
27
+ window[i] = ((window[i] - start_val) + num_frames) % num_frames
28
+
29
+ def shift_window_to_end(window: list[int], num_frames: int):
30
+ # 1) shift window to start
31
+ shift_window_to_start(window, num_frames)
32
+ end_val = window[-1]
33
+ end_delta = num_frames - end_val - 1
34
+ for i in range(len(window)):
35
+ # 2) add end_delta to each val to slide windows to end
36
+ window[i] = window[i] + end_delta
37
+
38
+ def get_missing_indexes(windows: list[list[int]], num_frames: int) -> list[int]:
39
+ all_indexes = list(range(num_frames))
40
+ for w in windows:
41
+ for val in w:
42
+ try:
43
+ all_indexes.remove(val)
44
+ except ValueError:
45
+ pass
46
+ return all_indexes
47
+
48
+ def uniform_looped(
49
+ step: int = ...,
50
+ num_steps: Optional[int] = None,
51
+ num_frames: int = ...,
52
+ context_size: Optional[int] = None,
53
+ context_stride: int = 3,
54
+ context_overlap: int = 4,
55
+ closed_loop: bool = True,
56
+ ):
57
+ if num_frames <= context_size:
58
+ yield list(range(num_frames))
59
+ return
60
+
61
+ context_stride = min(context_stride, int(np.ceil(np.log2(num_frames / context_size))) + 1)
62
+
63
+ for context_step in 1 << np.arange(context_stride):
64
+ pad = int(round(num_frames * ordered_halving(step)))
65
+ for j in range(
66
+ int(ordered_halving(step) * context_step) + pad,
67
+ num_frames + pad + (0 if closed_loop else -context_overlap),
68
+ (context_size * context_step - context_overlap),
69
+ ):
70
+ yield [e % num_frames for e in range(j, j + context_size * context_step, context_step)]
71
+
72
+ #from AnimateDiff-Evolved by Kosinkadink (https://github.com/Kosinkadink/ComfyUI-AnimateDiff-Evolved)
73
+ def uniform_standard(
74
+ step: int = ...,
75
+ num_steps: Optional[int] = None,
76
+ num_frames: int = ...,
77
+ context_size: Optional[int] = None,
78
+ context_stride: int = 3,
79
+ context_overlap: int = 4,
80
+ closed_loop: bool = True,
81
+ ):
82
+ windows = []
83
+ if num_frames <= context_size:
84
+ windows.append(list(range(num_frames)))
85
+ return windows
86
+
87
+ context_stride = min(context_stride, int(np.ceil(np.log2(num_frames / context_size))) + 1)
88
+
89
+ for context_step in 1 << np.arange(context_stride):
90
+ pad = int(round(num_frames * ordered_halving(step)))
91
+ for j in range(
92
+ int(ordered_halving(step) * context_step) + pad,
93
+ num_frames + pad + (0 if closed_loop else -context_overlap),
94
+ (context_size * context_step - context_overlap),
95
+ ):
96
+ windows.append([e % num_frames for e in range(j, j + context_size * context_step, context_step)])
97
+
98
+ # now that windows are created, shift any windows that loop, and delete duplicate windows
99
+ delete_idxs = []
100
+ win_i = 0
101
+ while win_i < len(windows):
102
+ # if window is rolls over itself, need to shift it
103
+ is_roll, roll_idx = does_window_roll_over(windows[win_i], num_frames)
104
+ if is_roll:
105
+ roll_val = windows[win_i][roll_idx] # roll_val might not be 0 for windows of higher strides
106
+ shift_window_to_end(windows[win_i], num_frames=num_frames)
107
+ # check if next window (cyclical) is missing roll_val
108
+ if roll_val not in windows[(win_i+1) % len(windows)]:
109
+ # need to insert new window here - just insert window starting at roll_val
110
+ windows.insert(win_i+1, list(range(roll_val, roll_val + context_size)))
111
+ # delete window if it's not unique
112
+ for pre_i in range(0, win_i):
113
+ if windows[win_i] == windows[pre_i]:
114
+ delete_idxs.append(win_i)
115
+ break
116
+ win_i += 1
117
+
118
+ # reverse delete_idxs so that they will be deleted in an order that doesn't break idx correlation
119
+ delete_idxs.reverse()
120
+ for i in delete_idxs:
121
+ windows.pop(i)
122
+ return windows
123
+
124
+ def static_standard(
125
+ step: int = ...,
126
+ num_steps: Optional[int] = None,
127
+ num_frames: int = ...,
128
+ context_size: Optional[int] = None,
129
+ context_stride: int = 3,
130
+ context_overlap: int = 4,
131
+ closed_loop: bool = True,
132
+ ):
133
+ windows = []
134
+ if num_frames <= context_size:
135
+ windows.append(list(range(num_frames)))
136
+ return windows
137
+ # always return the same set of windows
138
+ delta = context_size - context_overlap
139
+ for start_idx in range(0, num_frames, delta):
140
+ # if past the end of frames, move start_idx back to allow same context_length
141
+ ending = start_idx + context_size
142
+ if ending >= num_frames:
143
+ final_delta = ending - num_frames
144
+ final_start_idx = start_idx - final_delta
145
+ windows.append(list(range(final_start_idx, final_start_idx + context_size)))
146
+ break
147
+ windows.append(list(range(start_idx, start_idx + context_size)))
148
+ return windows
149
+
150
+ def get_context_scheduler(name: str) -> Callable:
151
+ if name == "uniform_looped":
152
+ return uniform_looped
153
+ elif name == "uniform_standard":
154
+ return uniform_standard
155
+ elif name == "static_standard":
156
+ return static_standard
157
+ else:
158
+ raise ValueError(f"Unknown context_overlap policy {name}")
159
+
160
+
161
+ def get_total_steps(
162
+ scheduler,
163
+ timesteps: List[int],
164
+ num_steps: Optional[int] = None,
165
+ num_frames: int = ...,
166
+ context_size: Optional[int] = None,
167
+ context_stride: int = 3,
168
+ context_overlap: int = 4,
169
+ closed_loop: bool = True,
170
+ ):
171
+ return sum(
172
+ len(
173
+ list(
174
+ scheduler(
175
+ i,
176
+ num_steps,
177
+ num_frames,
178
+ context_size,
179
+ context_stride,
180
+ context_overlap,
181
+ )
182
+ )
183
+ )
184
+ for i in range(len(timesteps))
185
+ )
186
+
187
+ def create_window_mask(noise_pred_context, c, latent_video_length, context_overlap, looped=False, window_type="linear"):
188
+ window_mask = torch.ones_like(noise_pred_context)
189
+
190
+ if window_type == "pyramid":
191
+ # Create pyramid weights that peak in the middle
192
+ length = noise_pred_context.shape[1]
193
+ if length % 2 == 0:
194
+ max_weight = length // 2
195
+ weight_sequence = list(range(1, max_weight + 1, 1)) + list(range(max_weight, 0, -1))
196
+ else:
197
+ max_weight = (length + 1) // 2
198
+ weight_sequence = list(range(1, max_weight, 1)) + [max_weight] + list(range(max_weight - 1, 0, -1))
199
+
200
+ # Normalize weights to range from 0 to 1
201
+ max_val = max(weight_sequence)
202
+ weight_sequence = [w / max_val for w in weight_sequence]
203
+
204
+ # Apply the weights to create the mask
205
+ weights_tensor = torch.tensor(weight_sequence, device=noise_pred_context.device)
206
+ weights_tensor = weights_tensor.view(1, -1, 1, 1)
207
+ window_mask = weights_tensor.expand_as(window_mask).clone()
208
+
209
+ # Adjust for position in sequence if needed
210
+ if not looped:
211
+ if min(c) == 0: # First chunk
212
+ left_ramp = torch.linspace(0, 1, context_overlap, device=noise_pred_context.device).view(1, -1, 1, 1)
213
+ # Clone to avoid in-place memory conflict
214
+ left_section = window_mask[:, :context_overlap].clone()
215
+ window_mask[:, :context_overlap] = torch.maximum(left_section, left_ramp)
216
+
217
+ if max(c) == latent_video_length - 1: # Last chunk
218
+ right_ramp = torch.linspace(1, 0, context_overlap, device=noise_pred_context.device).view(1, -1, 1, 1)
219
+ # Clone to avoid in-place memory conflict
220
+ right_section = window_mask[:, -context_overlap:].clone()
221
+ window_mask[:, -context_overlap:] = torch.maximum(right_section, right_ramp)
222
+ else: # Original "linear" window masking
223
+ # Apply left-side blending for all except first chunk (or always in loop mode)
224
+ if min(c) > 0 or (looped and max(c) == latent_video_length - 1):
225
+ ramp_up = torch.linspace(0, 1, context_overlap, device=noise_pred_context.device)
226
+ ramp_up = ramp_up.view(1, -1, 1, 1)
227
+ window_mask[:, :context_overlap] = ramp_up
228
+
229
+ # Apply right-side blending for all except last chunk (or always in loop mode)
230
+ if max(c) < latent_video_length - 1 or (looped and min(c) == 0):
231
+ ramp_down = torch.linspace(1, 0, context_overlap, device=noise_pred_context.device)
232
+ ramp_down = ramp_down.view(1, -1, 1, 1)
233
+ window_mask[:, -context_overlap:] = ramp_down
234
+
235
+ return window_mask
236
+
237
+ class WindowTracker:
238
+ def __init__(self, verbose=False):
239
+ self.window_map = {} # Maps frame sequence to persistent ID
240
+ self.next_id = 0
241
+ self.cache_states = {} # Maps persistent ID to teacache state
242
+ self.verbose = verbose
243
+
244
+ def get_window_id(self, frames):
245
+ key = tuple(sorted(frames)) # Order-independent frame sequence
246
+ if key not in self.window_map:
247
+ self.window_map[key] = self.next_id
248
+ if self.verbose:
249
+ log.info(f"New window pattern {key} -> ID {self.next_id}")
250
+ self.next_id += 1
251
+ return self.window_map[key]
252
+
253
+ def get_teacache(self, window_id, base_state):
254
+ if window_id not in self.cache_states:
255
+ if self.verbose:
256
+ log.info(f"Initializing persistent teacache for window {window_id}")
257
+ self.cache_states[window_id] = base_state.copy()
258
+ return self.cache_states[window_id]
controlnet/nodes.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import torch
3
+ from ..utils import log
4
+ import comfy.model_management as mm
5
+ from comfy.utils import load_torch_file
6
+ from tqdm import tqdm
7
+ import gc
8
+
9
+ from accelerate import init_empty_weights
10
+ from accelerate.utils import set_module_tensor_to_device
11
+ import folder_paths
12
+
13
+ class WanVideoControlnetLoader:
14
+ @classmethod
15
+ def INPUT_TYPES(s):
16
+ return {
17
+ "required": {
18
+ "model": (folder_paths.get_filename_list("controlnet"), {"tooltip": "These models are loaded from the 'ComfyUI/models/controlnet' -folder",}),
19
+
20
+ "base_precision": (["fp32", "bf16", "fp16"], {"default": "bf16"}),
21
+ "quantization": (['disabled', 'fp8_e4m3fn', 'fp8_e4m3fn_fast', 'fp8_e5m2', 'fp8_e4m3fn_fast_no_ffn'], {"default": 'disabled', "tooltip": "optional quantization method"}),
22
+ "load_device": (["main_device", "offload_device"], {"default": "main_device", "tooltip": "Initial device to load the model to, NOT recommended with the larger models unless you have 48GB+ VRAM"}),
23
+ },
24
+ }
25
+
26
+ RETURN_TYPES = ("WANVIDEOCONTROLNET",)
27
+ RETURN_NAMES = ("controlnet", )
28
+ FUNCTION = "loadmodel"
29
+ CATEGORY = "WanVideoWrapper"
30
+ DESCRIPTION = "Loads ControlNet model from 'https://huggingface.co/collections/TheDenk/wan21-controlnets-68302b430411dafc0d74d2fc'"
31
+
32
+ def loadmodel(self, model, base_precision, load_device, quantization):
33
+
34
+ device = mm.get_torch_device()
35
+ offload_device = mm.unet_offload_device()
36
+
37
+ transformer_load_device = device if load_device == "main_device" else offload_device
38
+
39
+ base_dtype = {"fp8_e4m3fn": torch.float8_e4m3fn, "fp8_e4m3fn_fast": torch.float8_e4m3fn, "bf16": torch.bfloat16, "fp16": torch.float16, "fp16_fast": torch.float16, "fp32": torch.float32}[base_precision]
40
+
41
+ model_path = folder_paths.get_full_path_or_raise("controlnet", model)
42
+
43
+ sd = load_torch_file(model_path, device=transformer_load_device, safe_load=True)
44
+
45
+ num_layers = 8 if "blocks.7.scale_shift_table" in sd else 6
46
+ out_proj_dim = sd["controlnet_blocks.0.bias"].shape[0]
47
+ downscale_coef = 16 if out_proj_dim == 3072 else 8
48
+ vae_channels = 48 if out_proj_dim == 3072 else 16
49
+
50
+ if not "control_encoder.0.0.weight" in sd:
51
+ raise ValueError("Invalid ControlNet model")
52
+
53
+ controlnet_cfg = {
54
+ "added_kv_proj_dim": None,
55
+ "attention_head_dim": 128,
56
+ "cross_attn_norm": None,
57
+ "downscale_coef": downscale_coef,
58
+ "eps": 1e-06,
59
+ "ffn_dim": 8960,
60
+ "freq_dim": 256,
61
+ "image_dim": None,
62
+ "in_channels": 3,
63
+ "num_attention_heads": 12,
64
+ "num_layers": num_layers,
65
+ "out_proj_dim": out_proj_dim,
66
+ "patch_size": [
67
+ 1,
68
+ 2,
69
+ 2
70
+ ],
71
+ "qk_norm": "rms_norm_across_heads",
72
+ "rope_max_seq_len": 1024,
73
+ "text_dim": 4096,
74
+ "vae_channels": vae_channels
75
+ }
76
+ print(f"Loading WanControlnet with config: {controlnet_cfg}")
77
+
78
+ from .wan_controlnet import WanControlnet
79
+
80
+ with init_empty_weights():
81
+ controlnet = WanControlnet(**controlnet_cfg)
82
+ controlnet.eval()
83
+
84
+ if quantization == "disabled":
85
+ for k, v in sd.items():
86
+ if isinstance(v, torch.Tensor):
87
+ if v.dtype == torch.float8_e4m3fn:
88
+ quantization = "fp8_e4m3fn"
89
+ break
90
+ elif v.dtype == torch.float8_e5m2:
91
+ quantization = "fp8_e5m2"
92
+ break
93
+
94
+ if "fp8_e4m3fn" in quantization:
95
+ dtype = torch.float8_e4m3fn
96
+ elif quantization == "fp8_e5m2":
97
+ dtype = torch.float8_e5m2
98
+ else:
99
+ dtype = base_dtype
100
+ params_to_keep = {"norm", "head", "time_in", "vector_in", "controlnet_patch_embedding", "time_", "img_emb", "modulation", "text_embedding", "adapter"}
101
+
102
+ log.info("Using accelerate to load and assign controlnet model weights to device...")
103
+ param_count = sum(1 for _ in controlnet.named_parameters())
104
+ for name, param in tqdm(controlnet.named_parameters(),
105
+ desc=f"Loading transformer parameters to {transformer_load_device}",
106
+ total=param_count,
107
+ leave=True):
108
+ dtype_to_use = base_dtype if any(keyword in name for keyword in params_to_keep) else dtype
109
+ if "controlnet_patch_embedding" in name:
110
+ dtype_to_use = torch.float32
111
+ set_module_tensor_to_device(controlnet, name, device=transformer_load_device, dtype=dtype_to_use, value=sd[name])
112
+
113
+ del sd
114
+
115
+ if load_device == "offload_device" and controlnet.device != offload_device:
116
+ log.info(f"Moving controlnet model from {controlnet.device} to {offload_device}")
117
+ controlnet.to(offload_device)
118
+ gc.collect()
119
+ mm.soft_empty_cache()
120
+
121
+ return (controlnet,)
122
+
123
+ class WanVideoControlnetApply:
124
+ @classmethod
125
+ def INPUT_TYPES(s):
126
+ return {
127
+ "required": {
128
+ "model": ("WANVIDEOMODEL", ),
129
+ "controlnet": ("WANVIDEOCONTROLNET", ),
130
+ "control_images": ("IMAGE", ),
131
+ "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.0001, "tooltip": "controlnet strength"}),
132
+ "control_stride": ("INT", {"default": 3, "min": 1, "max": 8, "step": 1, "tooltip": "controlnet stride"}),
133
+ "control_start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "Start percent of the steps to apply controlnet"}),
134
+ "control_end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "End percent of the steps to apply controlnet"}),
135
+ }
136
+ }
137
+
138
+ RETURN_TYPES = ("WANVIDEOMODEL",)
139
+ RETURN_NAMES = ("model", )
140
+ FUNCTION = "loadmodel"
141
+ CATEGORY = "WanVideoWrapper"
142
+
143
+ def loadmodel(self, model, controlnet, control_images, strength, control_stride, control_start_percent, control_end_percent):
144
+
145
+ patcher = model.clone()
146
+ if 'transformer_options' not in patcher.model_options:
147
+ patcher.model_options['transformer_options'] = {}
148
+
149
+ control_input = control_images.permute(3, 0, 1, 2).unsqueeze(0).contiguous()
150
+ control_input = control_input * 2.0 - 1.0
151
+
152
+ controlnet = {
153
+ "controlnet": controlnet,
154
+ "control_latents": control_input,
155
+ "controlnet_strength": strength,
156
+ "control_stride": control_stride,
157
+ "controlnet_start": control_start_percent,
158
+ "controlnet_end": control_end_percent
159
+ }
160
+ patcher.model_options["transformer_options"]["controlnet"] = controlnet
161
+
162
+ return (patcher,)
163
+
164
+ NODE_CLASS_MAPPINGS = {
165
+ "WanVideoControlnetLoader": WanVideoControlnetLoader,
166
+ "WanVideoControlnet": WanVideoControlnetApply,
167
+ }
168
+ NODE_DISPLAY_NAME_MAPPINGS = {
169
+ "WanVideoControlnetLoader": "WanVideo Controlnet Loader",
170
+ "WanVideoControlnet": "WanVideo Controlnet Apply",
171
+ }
172
+
173
+
controlnet/wan_controlnet.py ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # source https://github.com/TheDenk/wan2.1-dilated-controlnet/blob/main/wan_controlnet.py
2
+ from typing import Any, Dict, Optional, Tuple, Union
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+
7
+ from diffusers.configuration_utils import ConfigMixin, register_to_config
8
+ from diffusers.loaders import FromOriginalModelMixin, PeftAdapterMixin
9
+ from diffusers.utils import USE_PEFT_BACKEND, logging, scale_lora_layers, unscale_lora_layers
10
+ from diffusers.models.modeling_outputs import Transformer2DModelOutput
11
+ from diffusers.models.modeling_utils import ModelMixin
12
+ from diffusers.models.transformers.transformer_wan import (
13
+ WanTimeTextImageEmbedding,
14
+ WanRotaryPosEmbed,
15
+ WanTransformerBlock
16
+ )
17
+
18
+ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
19
+
20
+ def zero_module(module):
21
+ for p in module.parameters():
22
+ nn.init.zeros_(p)
23
+ return module
24
+
25
+
26
+ class WanControlnet(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin):
27
+ r"""
28
+ A Controlnet Transformer model for video-like data used in the Wan model.
29
+
30
+ Args:
31
+ patch_size (`Tuple[int]`, defaults to `(1, 2, 2)`):
32
+ 3D patch dimensions for video embedding (t_patch, h_patch, w_patch).
33
+ num_attention_heads (`int`, defaults to `40`):
34
+ Fixed length for text embeddings.
35
+ attention_head_dim (`int`, defaults to `128`):
36
+ The number of channels in each head.
37
+ vae_channels (`int`, defaults to `16`):
38
+ The number of channels in the vae input.
39
+ in_channels (`int`, defaults to `16`):
40
+ The number of channels in the controlnet input.
41
+ text_dim (`int`, defaults to `512`):
42
+ Input dimension for text embeddings.
43
+ freq_dim (`int`, defaults to `256`):
44
+ Dimension for sinusoidal time embeddings.
45
+ ffn_dim (`int`, defaults to `13824`):
46
+ Intermediate dimension in feed-forward network.
47
+ num_layers (`int`, defaults to `40`):
48
+ The number of layers of transformer blocks to use.
49
+ window_size (`Tuple[int]`, defaults to `(-1, -1)`):
50
+ Window size for local attention (-1 indicates global attention).
51
+ cross_attn_norm (`bool`, defaults to `True`):
52
+ Enable cross-attention normalization.
53
+ qk_norm (`bool`, defaults to `True`):
54
+ Enable query/key normalization.
55
+ eps (`float`, defaults to `1e-6`):
56
+ Epsilon value for normalization layers.
57
+ add_img_emb (`bool`, defaults to `False`):
58
+ Whether to use img_emb.
59
+ added_kv_proj_dim (`int`, *optional*, defaults to `None`):
60
+ The number of channels to use for the added key and value projections. If `None`, no projection is used.
61
+ downscale_coef (`int`, *optional*, defaults to `8`):
62
+ Coeficient for downscale controlnet input video.
63
+ out_proj_dim (`int`, *optional*, defaults to `128 * 12`):
64
+ Output projection dimention for last linear layers.
65
+ """
66
+
67
+ _supports_gradient_checkpointing = True
68
+ _skip_layerwise_casting_patterns = ["patch_embedding", "condition_embedder", "norm"]
69
+ _no_split_modules = ["WanTransformerBlock"]
70
+ _keep_in_fp32_modules = ["time_embedder", "scale_shift_table", "norm1", "norm2", "norm3"]
71
+ _keys_to_ignore_on_load_unexpected = ["norm_added_q"]
72
+
73
+ @register_to_config
74
+ def __init__(
75
+ self,
76
+ patch_size: Tuple[int] = (1, 2, 2),
77
+ num_attention_heads: int = 40,
78
+ attention_head_dim: int = 128,
79
+ in_channels: int = 3,
80
+ vae_channels: int = 16,
81
+ text_dim: int = 4096,
82
+ freq_dim: int = 256,
83
+ ffn_dim: int = 13824,
84
+ num_layers: int = 20,
85
+ cross_attn_norm: bool = True,
86
+ qk_norm: Optional[str] = "rms_norm_across_heads",
87
+ eps: float = 1e-6,
88
+ image_dim: Optional[int] = None,
89
+ added_kv_proj_dim: Optional[int] = None,
90
+ rope_max_seq_len: int = 1024,
91
+ downscale_coef: int = 8,
92
+ out_proj_dim: int = 128 * 12,
93
+ ) -> None:
94
+ super().__init__()
95
+
96
+ start_channels = in_channels * (downscale_coef ** 2)
97
+ input_channels = [start_channels, start_channels // 2, start_channels // 4]
98
+
99
+ self.control_encoder = nn.ModuleList([
100
+ ## Spatial compression with time awareness
101
+ nn.Sequential(
102
+ nn.Conv3d(
103
+ in_channels,
104
+ input_channels[0],
105
+ kernel_size=(3, downscale_coef + 1, downscale_coef + 1),
106
+ stride=(1, downscale_coef, downscale_coef),
107
+ padding=(1, downscale_coef // 2, downscale_coef // 2)
108
+ ),
109
+ nn.GELU(approximate="tanh"),
110
+ nn.GroupNorm(2, input_channels[0]),
111
+ ),
112
+ ## Spatio-Temporal compression with spatial awareness
113
+ nn.Sequential(
114
+ nn.Conv3d(input_channels[0], input_channels[1], kernel_size=3, stride=(2, 1, 1), padding=1),
115
+ nn.GELU(approximate="tanh"),
116
+ nn.GroupNorm(2, input_channels[1]),
117
+ ),
118
+ ## Temporal compression with spatial awareness
119
+ nn.Sequential(
120
+ nn.Conv3d(input_channels[1], input_channels[2], kernel_size=3, stride=(2, 1, 1), padding=1),
121
+ nn.GELU(approximate="tanh"),
122
+ nn.GroupNorm(2, input_channels[2]),
123
+ )
124
+ ])
125
+
126
+ inner_dim = num_attention_heads * attention_head_dim
127
+
128
+ # 1. Patch & position embedding
129
+ self.rope = WanRotaryPosEmbed(attention_head_dim, patch_size, rope_max_seq_len)
130
+ self.patch_embedding = nn.Conv3d(vae_channels + input_channels[2], inner_dim, kernel_size=patch_size, stride=patch_size)
131
+
132
+ # 2. Condition embeddings
133
+ # image_embedding_dim=1280 for I2V model
134
+ self.condition_embedder = WanTimeTextImageEmbedding(
135
+ dim=inner_dim,
136
+ time_freq_dim=freq_dim,
137
+ time_proj_dim=inner_dim * 6,
138
+ text_embed_dim=text_dim,
139
+ image_embed_dim=image_dim,
140
+ )
141
+ # 3. Transformer blocks
142
+ self.blocks = nn.ModuleList(
143
+ [
144
+ WanTransformerBlock(
145
+ inner_dim, ffn_dim, num_attention_heads, qk_norm, cross_attn_norm, eps, added_kv_proj_dim
146
+ )
147
+ for _ in range(num_layers)
148
+ ]
149
+ )
150
+
151
+ # 4 Controlnet modules
152
+ self.controlnet_blocks = nn.ModuleList([])
153
+
154
+ for _ in range(len(self.blocks)):
155
+ controlnet_block = nn.Linear(inner_dim, out_proj_dim)
156
+ controlnet_block = zero_module(controlnet_block)
157
+ self.controlnet_blocks.append(controlnet_block)
158
+
159
+ self.gradient_checkpointing = False
160
+
161
+ def forward(
162
+ self,
163
+ hidden_states: torch.Tensor,
164
+ timestep: torch.LongTensor,
165
+ encoder_hidden_states: torch.Tensor,
166
+ controlnet_states: torch.Tensor,
167
+ encoder_hidden_states_image: Optional[torch.Tensor] = None,
168
+ return_dict: bool = True,
169
+ attention_kwargs: Optional[Dict[str, Any]] = None,
170
+ ) -> Union[torch.Tensor, Dict[str, torch.Tensor]]:
171
+ if attention_kwargs is not None:
172
+ attention_kwargs = attention_kwargs.copy()
173
+ lora_scale = attention_kwargs.pop("scale", 1.0)
174
+ else:
175
+ lora_scale = 1.0
176
+
177
+ if USE_PEFT_BACKEND:
178
+ # weight the lora layers by setting `lora_scale` for each PEFT layer
179
+ scale_lora_layers(self, lora_scale)
180
+ else:
181
+ if attention_kwargs is not None and attention_kwargs.get("scale", None) is not None:
182
+ logger.warning(
183
+ "Passing `scale` via `attention_kwargs` when not using the PEFT backend is ineffective."
184
+ )
185
+ rotary_emb = self.rope(hidden_states)
186
+
187
+ # 0. Controlnet encoder
188
+ for control_encoder_block in self.control_encoder:
189
+ controlnet_states = control_encoder_block(controlnet_states)
190
+
191
+ hidden_states = torch.cat([hidden_states, controlnet_states], dim=1)
192
+
193
+ ## 1. Patch embedding and stack
194
+ hidden_states = self.patch_embedding(hidden_states)
195
+ hidden_states = hidden_states.flatten(2).transpose(1, 2)
196
+
197
+ # timestep shape: batch_size, or batch_size, seq_len (wan 2.2 ti2v)
198
+ if timestep.ndim == 2:
199
+ ## for ComfyUI workflow
200
+ if hidden_states.shape[1] != timestep.shape[1]:
201
+ timestep = timestep.repeat_interleave(hidden_states.shape[1] // timestep.shape[1], dim=1)
202
+ ts_seq_len = timestep.shape[1]
203
+ timestep = timestep.flatten() # batch_size * seq_len
204
+ else:
205
+ ts_seq_len = None
206
+
207
+ temb, timestep_proj, encoder_hidden_states, encoder_hidden_states_image = self.condition_embedder(
208
+ timestep, encoder_hidden_states, encoder_hidden_states_image, timestep_seq_len=ts_seq_len
209
+ )
210
+ if ts_seq_len is not None:
211
+ # batch_size, seq_len, 6, inner_dim
212
+ timestep_proj = timestep_proj.unflatten(2, (6, -1))
213
+ else:
214
+ # batch_size, 6, inner_dim
215
+ timestep_proj = timestep_proj.unflatten(1, (6, -1))
216
+
217
+ if encoder_hidden_states_image is not None:
218
+ encoder_hidden_states = torch.concat([encoder_hidden_states_image, encoder_hidden_states], dim=1)
219
+
220
+ # 4. Transformer blocks
221
+ controlnet_hidden_states = ()
222
+ if torch.is_grad_enabled() and self.gradient_checkpointing:
223
+ for block, controlnet_block in zip(self.blocks, self.controlnet_blocks):
224
+ hidden_states = self._gradient_checkpointing_func(
225
+ block, hidden_states, encoder_hidden_states, timestep_proj, rotary_emb
226
+ )
227
+ controlnet_hidden_states += (controlnet_block(hidden_states),)
228
+ else:
229
+ for block, controlnet_block in zip(self.blocks, self.controlnet_blocks):
230
+ hidden_states = block(hidden_states, encoder_hidden_states, timestep_proj, rotary_emb)
231
+ controlnet_hidden_states += (controlnet_block(hidden_states),)
232
+
233
+
234
+ if USE_PEFT_BACKEND:
235
+ # remove `lora_scale` from each PEFT layer
236
+ unscale_lora_layers(self, lora_scale)
237
+
238
+ if not return_dict:
239
+ return (controlnet_hidden_states,)
240
+
241
+ return Transformer2DModelOutput(sample=controlnet_hidden_states)
242
+
243
+
244
+ if __name__ == "__main__":
245
+ parameters = {
246
+ "added_kv_proj_dim": None,
247
+ "attention_head_dim": 128,
248
+ "cross_attn_norm": True,
249
+ "eps": 1e-06,
250
+ "ffn_dim": 8960,
251
+ "freq_dim": 256,
252
+ "image_dim": None,
253
+ "in_channels": 3,
254
+ "num_attention_heads": 12,
255
+ "num_layers": 2,
256
+ "patch_size": [1, 2, 2],
257
+ "qk_norm": "rms_norm_across_heads",
258
+ "rope_max_seq_len": 1024,
259
+ "text_dim": 4096,
260
+ "downscale_coef": 8,
261
+ "out_proj_dim": 12 * 128,
262
+ "vae_channels": 16
263
+ }
264
+ controlnet = WanControlnet(**parameters)
265
+
266
+ hidden_states = torch.rand(1, 16, 13, 60, 90)
267
+ timestep = torch.tensor([1000]).repeat(17550).unsqueeze(0) #torch.randint(low=0, high=1000, size=(1,), dtype=torch.long)
268
+ encoder_hidden_states = torch.rand(1, 512, 4096)
269
+ controlnet_states = torch.rand(1, 3, 49, 480, 720)
270
+
271
+ controlnet_hidden_states = controlnet(
272
+ hidden_states=hidden_states,
273
+ timestep=timestep,
274
+ encoder_hidden_states=encoder_hidden_states,
275
+ controlnet_states=controlnet_states,
276
+ return_dict=False
277
+ )
278
+ print("Output states count", len(controlnet_hidden_states[0]))
279
+ for out_hidden_states in controlnet_hidden_states[0]:
280
+ print(out_hidden_states.shape)
281
+
custom_linear.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from accelerate import init_empty_weights
4
+ from comfy.ops import cast_bias_weight
5
+
6
+ #based on https://github.com/huggingface/diffusers/blob/main/src/diffusers/quantizers/gguf/utils.py
7
+ def _replace_linear(model, compute_dtype, state_dict, prefix="", patches=None, scale_weights=None):
8
+
9
+ has_children = list(model.children())
10
+ if not has_children:
11
+ return
12
+ for name, module in model.named_children():
13
+ module_prefix = prefix + name + "."
14
+ _replace_linear(module, compute_dtype, state_dict, module_prefix, patches, scale_weights)
15
+
16
+ if isinstance(module, nn.Linear) and "loras" not in module_prefix:
17
+ in_features = state_dict[module_prefix + "weight"].shape[1]
18
+ out_features = state_dict[module_prefix + "weight"].shape[0]
19
+ if scale_weights is not None:
20
+ scale_key = f"{module_prefix}scale_weight"
21
+
22
+ with init_empty_weights():
23
+ model._modules[name] = CustomLinear(
24
+ in_features,
25
+ out_features,
26
+ module.bias is not None,
27
+ compute_dtype=compute_dtype,
28
+ scale_weight=scale_weights.get(scale_key) if scale_weights else None
29
+ )
30
+ #set_lora_params(model._modules[name], patches, module_prefix)
31
+ model._modules[name].source_cls = type(module)
32
+ # Force requires_grad to False to avoid unexpected errors
33
+ model._modules[name].requires_grad_(False)
34
+
35
+ return model
36
+
37
+ def set_lora_params(module, patches, module_prefix=""):
38
+ # Recursively set lora_diffs and lora_strengths for all CustomLinear layers
39
+ for name, child in module.named_children():
40
+ child_prefix = (f"{module_prefix}{name}.")
41
+ set_lora_params(child, patches, child_prefix)
42
+ if isinstance(module, CustomLinear):
43
+ key = f"diffusion_model.{module_prefix}weight"
44
+ patch = patches.get(key, [])
45
+ #print(f"Processing LoRA patches for {key}: {len(patch)} patches found")
46
+ if len(patch) != 0:
47
+ lora_diffs = []
48
+ for p in patch:
49
+ lora_obj = p[1]
50
+ if "head" in key:
51
+ continue # For now skip LoRA for head layers
52
+ elif hasattr(lora_obj, "weights"):
53
+ lora_diffs.append(lora_obj.weights)
54
+ elif isinstance(lora_obj, tuple) and lora_obj[0] == "diff":
55
+ lora_diffs.append(lora_obj[1])
56
+ else:
57
+ continue
58
+ lora_strengths = [p[0] for p in patch]
59
+ module.lora = (lora_diffs, lora_strengths)
60
+ module.step = 0 # Initialize step for LoRA scheduling
61
+
62
+
63
+ class CustomLinear(nn.Linear):
64
+ def __init__(
65
+ self,
66
+ in_features,
67
+ out_features,
68
+ bias=False,
69
+ compute_dtype=None,
70
+ device=None,
71
+ scale_weight=None
72
+ ) -> None:
73
+ super().__init__(in_features, out_features, bias, device)
74
+ self.compute_dtype = compute_dtype
75
+ self.lora = None
76
+ self.step = 0
77
+ self.scale_weight = scale_weight
78
+ self.bias_function = []
79
+ self.weight_function = []
80
+
81
+ def forward(self, input):
82
+ weight, bias = cast_bias_weight(self, input)
83
+
84
+ if self.scale_weight is not None:
85
+ if weight.numel() < input.numel():
86
+ weight = weight * self.scale_weight
87
+ else:
88
+ input = input * self.scale_weight
89
+
90
+ if self.lora is not None:
91
+ weight = self.apply_lora(weight).to(self.compute_dtype)
92
+
93
+ return torch.nn.functional.linear(input, weight, bias)
94
+
95
+ @torch.compiler.disable()
96
+ def apply_lora(self, weight):
97
+ for lora_diff, lora_strength in zip(self.lora[0], self.lora[1]):
98
+ if isinstance(lora_strength, list):
99
+ lora_strength = lora_strength[self.step]
100
+ if lora_strength == 0.0:
101
+ continue
102
+ elif lora_strength == 0.0:
103
+ continue
104
+ patch_diff = torch.mm(
105
+ lora_diff[0].flatten(start_dim=1).to(weight.device),
106
+ lora_diff[1].flatten(start_dim=1).to(weight.device)
107
+ ).reshape(weight.shape)
108
+ alpha = lora_diff[2] / lora_diff[1].shape[0] if lora_diff[2] is not None else 1.0
109
+ scale = lora_strength * alpha
110
+ weight = weight.add(patch_diff, alpha=scale)
111
+ return weight
112
+
113
+ def remove_lora_from_module(module):
114
+ for name, submodule in module.named_modules():
115
+ submodule.lora = None
diffsynth/vram_management/LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [2023] [Zhongjie Duan]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
diffsynth/vram_management/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .layers import *
diffsynth/vram_management/layers.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch, copy
2
+ from .utils import init_weights_on_device
3
+
4
+
5
+ def cast_to(weight, dtype, device):
6
+ r = torch.empty_like(weight, dtype=dtype, device=device)
7
+ r.copy_(weight)
8
+ return r
9
+
10
+
11
+ class AutoWrappedModule(torch.nn.Module):
12
+ def __init__(self, module: torch.nn.Module, offload_dtype, offload_device, onload_dtype, onload_device, computation_dtype, computation_device):
13
+ super().__init__()
14
+ self.module = module.to(dtype=offload_dtype, device=offload_device)
15
+ self.offload_dtype = offload_dtype
16
+ self.offload_device = offload_device
17
+ self.onload_dtype = onload_dtype
18
+ self.onload_device = onload_device
19
+ self.computation_dtype = computation_dtype
20
+ self.computation_device = computation_device
21
+ self.state = 0
22
+
23
+ def offload(self):
24
+ if self.state == 1 and (self.offload_dtype != self.onload_dtype or self.offload_device != self.onload_device):
25
+ self.module.to(dtype=self.offload_dtype, device=self.offload_device)
26
+ self.state = 0
27
+
28
+ def onload(self):
29
+ if self.state == 0 and (self.offload_dtype != self.onload_dtype or self.offload_device != self.onload_device):
30
+ self.module.to(dtype=self.onload_dtype, device=self.onload_device)
31
+ self.state = 1
32
+
33
+ def forward(self, *args, **kwargs):
34
+ if self.onload_dtype == self.computation_dtype and self.onload_device == self.computation_device:
35
+ module = self.module
36
+ else:
37
+ module = copy.deepcopy(self.module).to(dtype=self.computation_dtype, device=self.computation_device)
38
+ return module(*args, **kwargs)
39
+
40
+
41
+ class AutoWrappedLinear(torch.nn.Linear):
42
+ def __init__(self, module: torch.nn.Linear, offload_dtype, offload_device, onload_dtype, onload_device, computation_dtype, computation_device):
43
+ with init_weights_on_device(device=torch.device("meta")):
44
+ super().__init__(in_features=module.in_features, out_features=module.out_features, bias=module.bias is not None, dtype=offload_dtype, device=offload_device)
45
+ self.weight = module.weight
46
+ self.bias = module.bias
47
+ self.offload_dtype = offload_dtype
48
+ self.offload_device = offload_device
49
+ self.onload_dtype = onload_dtype
50
+ self.onload_device = onload_device
51
+ self.computation_dtype = computation_dtype
52
+ self.computation_device = computation_device
53
+ self.state = 0
54
+
55
+ def offload(self):
56
+ if self.state == 1 and (self.offload_dtype != self.onload_dtype or self.offload_device != self.onload_device):
57
+ self.to(dtype=self.offload_dtype, device=self.offload_device)
58
+ self.state = 0
59
+
60
+ def onload(self):
61
+ if self.state == 0 and (self.offload_dtype != self.onload_dtype or self.offload_device != self.onload_device):
62
+ self.to(dtype=self.onload_dtype, device=self.onload_device)
63
+ self.state = 1
64
+
65
+ def forward(self, x, *args, **kwargs):
66
+ if self.onload_dtype == self.computation_dtype and self.onload_device == self.computation_device:
67
+ weight, bias = self.weight, self.bias
68
+ else:
69
+ weight = cast_to(self.weight, self.computation_dtype, self.computation_device)
70
+ bias = None if self.bias is None else cast_to(self.bias, self.computation_dtype, self.computation_device)
71
+ return torch.nn.functional.linear(x, weight, bias)
72
+
73
+
74
+ def enable_vram_management_recursively(model: torch.nn.Module, module_map: dict, module_config: dict, max_num_param=None, overflow_module_config: dict = None, total_num_param=0, compile_args=None):
75
+ for name, module in model.named_children():
76
+ for source_module, target_module in module_map.items():
77
+ if isinstance(module, source_module):
78
+ if "rope_embedder" in name or "patch_embedding" in name or "emb_pos" in name:
79
+ continue
80
+
81
+ num_param = sum(p.numel() for p in module.parameters())
82
+ if max_num_param is not None and total_num_param + num_param > max_num_param:
83
+ module_config_ = overflow_module_config
84
+ else:
85
+ module_config_ = module_config
86
+ if compile_args is not None:
87
+ print("Compiling", name)
88
+ torch._dynamo.config.cache_size_limit = compile_args["dynamo_cache_size_limit"]
89
+ torch._dynamo.config.recompile_limit = compile_args["dynamo_cache_size_limit"]
90
+ module_ = torch.compile(target_module(module, **module_config_), fullgraph=compile_args["fullgraph"], dynamic=compile_args["dynamic"], backend=compile_args["backend"], mode=compile_args["mode"])
91
+ else:
92
+ module_ = target_module(module, **module_config_)
93
+ setattr(model, name, module_)
94
+ total_num_param += num_param
95
+ break
96
+ else:
97
+ total_num_param = enable_vram_management_recursively(module, module_map, module_config, max_num_param, overflow_module_config, total_num_param)
98
+ return total_num_param
99
+
100
+
101
+ def enable_vram_management(model: torch.nn.Module, module_map: dict, module_config: dict, max_num_param=None, overflow_module_config: dict = None, compile_args=None):
102
+ enable_vram_management_recursively(model, module_map, module_config, max_num_param, overflow_module_config, total_num_param=0, compile_args=compile_args)
103
+ model.vram_management_enabled = True
diffsynth/vram_management/utils.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from contextlib import contextmanager
3
+
4
+ @contextmanager
5
+ def init_weights_on_device(device = torch.device("meta"), include_buffers :bool = False):
6
+
7
+ old_register_parameter = torch.nn.Module.register_parameter
8
+ if include_buffers:
9
+ old_register_buffer = torch.nn.Module.register_buffer
10
+
11
+ def register_empty_parameter(module, name, param):
12
+ old_register_parameter(module, name, param)
13
+ if param is not None:
14
+ param_cls = type(module._parameters[name])
15
+ kwargs = module._parameters[name].__dict__
16
+ kwargs["requires_grad"] = param.requires_grad
17
+ module._parameters[name] = param_cls(module._parameters[name].to(device), **kwargs)
18
+
19
+ def register_empty_buffer(module, name, buffer, persistent=True):
20
+ old_register_buffer(module, name, buffer, persistent=persistent)
21
+ if buffer is not None:
22
+ module._buffers[name] = module._buffers[name].to(device)
23
+
24
+ def patch_tensor_constructor(fn):
25
+ def wrapper(*args, **kwargs):
26
+ kwargs["device"] = device
27
+ return fn(*args, **kwargs)
28
+
29
+ return wrapper
30
+
31
+ if include_buffers:
32
+ tensor_constructors_to_patch = {
33
+ torch_function_name: getattr(torch, torch_function_name)
34
+ for torch_function_name in ["empty", "zeros", "ones", "full"]
35
+ }
36
+ else:
37
+ tensor_constructors_to_patch = {}
38
+
39
+ try:
40
+ torch.nn.Module.register_parameter = register_empty_parameter
41
+ if include_buffers:
42
+ torch.nn.Module.register_buffer = register_empty_buffer
43
+ for torch_function_name in tensor_constructors_to_patch.keys():
44
+ setattr(torch, torch_function_name, patch_tensor_constructor(getattr(torch, torch_function_name)))
45
+ yield
46
+ finally:
47
+ torch.nn.Module.register_parameter = old_register_parameter
48
+ if include_buffers:
49
+ torch.nn.Module.register_buffer = old_register_buffer
50
+ for torch_function_name, old_torch_function in tensor_constructors_to_patch.items():
51
+ setattr(torch, torch_function_name, old_torch_function)
echoshot/echoshot.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from comfy.model_management import get_autocast_device, get_torch_device
3
+
4
+ @torch.autocast(device_type=get_autocast_device(get_torch_device()), enabled=False)
5
+ @torch.compiler.disable()
6
+ def rope_apply_z(x, grid_sizes, freqs, inner_t, shift=6):
7
+ n, c = x.size(2), x.size(3) // 2
8
+
9
+ # loop over samples
10
+ output = []
11
+ for i, (f, h, w) in enumerate(grid_sizes.tolist()):
12
+ seq_len = f * h * w
13
+
14
+ # precompute multipliers
15
+ x_i = torch.view_as_complex(
16
+ x[i, :seq_len].to(torch.float64).reshape(seq_len, n, -1, 2)
17
+ )
18
+ start_ind = [sum(inner_t[i][:_]) for _ in range(len(inner_t[i]))]
19
+ end_ind = [sum(inner_t[i][:_+1]) for _ in range(len(inner_t[i]))]
20
+
21
+ freq_select = []
22
+ for shot_ind, (s, e) in enumerate(zip(start_ind, end_ind)):
23
+ freq_select += [shot_ind * shift] * (e - s)
24
+ shot_freqs = freqs[freq_select]
25
+
26
+ freqs_i = shot_freqs.view(f, 1, 1, -1).expand(f, h, w, -1).reshape(seq_len, 1, -1)
27
+
28
+ # apply rotary embedding
29
+ x_i = torch.view_as_real(x_i * freqs_i).flatten(2)
30
+ x_i = torch.cat([x_i, x[i, seq_len:]])
31
+
32
+ # append to collection
33
+ output.append(x_i)
34
+ return torch.stack(output).float()
35
+
36
+
37
+ @torch.autocast(device_type=get_autocast_device(get_torch_device()), enabled=False)
38
+ @torch.compiler.disable()
39
+ def rope_apply_c(x, freqs, inner_c, shift=6):
40
+
41
+ b, s, n, c = x.size(0), x.size(1), x.size(2), x.size(3) // 2
42
+
43
+ # loop over samples
44
+ output = []
45
+ for i in range(b):
46
+
47
+ # precompute multipliers
48
+ x_i = torch.view_as_complex(
49
+ x[i].to(torch.float64).reshape(s, n, -1, 2)
50
+ )
51
+
52
+ freq_select = []
53
+ for shot_ind, c_len in enumerate(inner_c[i]):
54
+ freq_select += [shot_ind * shift] * c_len
55
+ freq_select += [shot_ind+10] * (s-len(freq_select)) # extra suppression for the empty token
56
+ shot_freqs = freqs[freq_select]
57
+
58
+ freqs_i = shot_freqs.view(s, 1, -1)
59
+
60
+ # apply rotary embedding
61
+ x_i = torch.view_as_real(x_i * freqs_i).flatten(2)
62
+
63
+ # append to collection
64
+ output.append(x_i)
65
+ return torch.stack(output).float()
66
+
67
+ @torch.autocast(device_type=get_autocast_device(get_torch_device()), enabled=False)
68
+ @torch.compiler.disable()
69
+ def rope_apply_echoshot(x, grid_sizes, freqs, inner_t, shift=4):
70
+ n, c = x.size(2), x.size(3) // 2
71
+
72
+ # split freqs
73
+ freqs = freqs.split([c - 2 * (c // 3), c // 3, c // 3], dim=1)
74
+
75
+ # loop over samples
76
+ output = []
77
+ for i, (f, h, w) in enumerate(grid_sizes.tolist()):
78
+ seq_len = f * h * w
79
+
80
+ # precompute multipliers
81
+ x_i = torch.view_as_complex(
82
+ x[i, :seq_len].to(torch.float64).reshape(seq_len, n, -1, 2)
83
+ )
84
+ start_ind = [sum(inner_t[i][:_]) for _ in range(len(inner_t[i]))]
85
+ end_ind = [sum(inner_t[i][:_+1]) for _ in range(len(inner_t[i]))]
86
+ freq_select = []
87
+ for shot_ind, (s, e) in enumerate(zip(start_ind, end_ind)):
88
+ freq_select += list(range(shot_ind * shift + s, shot_ind * shift + e))
89
+ t_freqs = freqs[0][freq_select]
90
+
91
+ freqs_i = torch.cat([
92
+ # freqs[0][:f].view(f, 1, 1, -1).expand(f, h, w, -1),
93
+ t_freqs.view(f, 1, 1, -1).expand(f, h, w, -1), ###
94
+ freqs[1][:h].view(1, h, 1, -1).expand(f, h, w, -1),
95
+ freqs[2][:w].view(1, 1, w, -1).expand(f, h, w, -1)
96
+ ], dim=-1).reshape(seq_len, 1, -1)
97
+
98
+ # apply rotary embedding
99
+ x_i = torch.view_as_real(x_i * freqs_i).flatten(2)
100
+ x_i = torch.cat([x_i, x[i, seq_len:]])
101
+
102
+ # append to collection
103
+ output.append(x_i)
104
+ return torch.stack(output).float()
enhance_a_video/LICENSE ADDED
@@ -0,0 +1,562 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright VideoSys
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
202
+
203
+ ## Some of Enhance-A-Video's model is derived from others projects, which is subject to the following copyright notice:
204
+
205
+ ================================= Diffusers =================================
206
+
207
+ Apache License
208
+ Version 2.0, January 2004
209
+ http://www.apache.org/licenses/
210
+
211
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
212
+
213
+ 1. Definitions.
214
+
215
+ "License" shall mean the terms and conditions for use, reproduction,
216
+ and distribution as defined by Sections 1 through 9 of this document.
217
+
218
+ "Licensor" shall mean the copyright owner or entity authorized by
219
+ the copyright owner that is granting the License.
220
+
221
+ "Legal Entity" shall mean the union of the acting entity and all
222
+ other entities that control, are controlled by, or are under common
223
+ control with that entity. For the purposes of this definition,
224
+ "control" means (i) the power, direct or indirect, to cause the
225
+ direction or management of such entity, whether by contract or
226
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
227
+ outstanding shares, or (iii) beneficial ownership of such entity.
228
+
229
+ "You" (or "Your") shall mean an individual or Legal Entity
230
+ exercising permissions granted by this License.
231
+
232
+ "Source" form shall mean the preferred form for making modifications,
233
+ including but not limited to software source code, documentation
234
+ source, and configuration files.
235
+
236
+ "Object" form shall mean any form resulting from mechanical
237
+ transformation or translation of a Source form, including but
238
+ not limited to compiled object code, generated documentation,
239
+ and conversions to other media types.
240
+
241
+ "Work" shall mean the work of authorship, whether in Source or
242
+ Object form, made available under the License, as indicated by a
243
+ copyright notice that is included in or attached to the work
244
+ (an example is provided in the Appendix below).
245
+
246
+ "Derivative Works" shall mean any work, whether in Source or Object
247
+ form, that is based on (or derived from) the Work and for which the
248
+ editorial revisions, annotations, elaborations, or other modifications
249
+ represent, as a whole, an original work of authorship. For the purposes
250
+ of this License, Derivative Works shall not include works that remain
251
+ separable from, or merely link (or bind by name) to the interfaces of,
252
+ the Work and Derivative Works thereof.
253
+
254
+ "Contribution" shall mean any work of authorship, including
255
+ the original version of the Work and any modifications or additions
256
+ to that Work or Derivative Works thereof, that is intentionally
257
+ submitted to Licensor for inclusion in the Work by the copyright owner
258
+ or by an individual or Legal Entity authorized to submit on behalf of
259
+ the copyright owner. For the purposes of this definition, "submitted"
260
+ means any form of electronic, verbal, or written communication sent
261
+ to the Licensor or its representatives, including but not limited to
262
+ communication on electronic mailing lists, source code control systems,
263
+ and issue tracking systems that are managed by, or on behalf of, the
264
+ Licensor for the purpose of discussing and improving the Work, but
265
+ excluding communication that is conspicuously marked or otherwise
266
+ designated in writing by the copyright owner as "Not a Contribution."
267
+
268
+ "Contributor" shall mean Licensor and any individual or Legal Entity
269
+ on behalf of whom a Contribution has been received by Licensor and
270
+ subsequently incorporated within the Work.
271
+
272
+ 2. Grant of Copyright License. Subject to the terms and conditions of
273
+ this License, each Contributor hereby grants to You a perpetual,
274
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
275
+ copyright license to reproduce, prepare Derivative Works of,
276
+ publicly display, publicly perform, sublicense, and distribute the
277
+ Work and such Derivative Works in Source or Object form.
278
+
279
+ 3. Grant of Patent License. Subject to the terms and conditions of
280
+ this License, each Contributor hereby grants to You a perpetual,
281
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
282
+ (except as stated in this section) patent license to make, have made,
283
+ use, offer to sell, sell, import, and otherwise transfer the Work,
284
+ where such license applies only to those patent claims licensable
285
+ by such Contributor that are necessarily infringed by their
286
+ Contribution(s) alone or by combination of their Contribution(s)
287
+ with the Work to which such Contribution(s) was submitted. If You
288
+ institute patent litigation against any entity (including a
289
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
290
+ or a Contribution incorporated within the Work constitutes direct
291
+ or contributory patent infringement, then any patent licenses
292
+ granted to You under this License for that Work shall terminate
293
+ as of the date such litigation is filed.
294
+
295
+ 4. Redistribution. You may reproduce and distribute copies of the
296
+ Work or Derivative Works thereof in any medium, with or without
297
+ modifications, and in Source or Object form, provided that You
298
+ meet the following conditions:
299
+
300
+ (a) You must give any other recipients of the Work or
301
+ Derivative Works a copy of this License; and
302
+
303
+ (b) You must cause any modified files to carry prominent notices
304
+ stating that You changed the files; and
305
+
306
+ (c) You must retain, in the Source form of any Derivative Works
307
+ that You distribute, all copyright, patent, trademark, and
308
+ attribution notices from the Source form of the Work,
309
+ excluding those notices that do not pertain to any part of
310
+ the Derivative Works; and
311
+
312
+ (d) If the Work includes a "NOTICE" text file as part of its
313
+ distribution, then any Derivative Works that You distribute must
314
+ include a readable copy of the attribution notices contained
315
+ within such NOTICE file, excluding those notices that do not
316
+ pertain to any part of the Derivative Works, in at least one
317
+ of the following places: within a NOTICE text file distributed
318
+ as part of the Derivative Works; within the Source form or
319
+ documentation, if provided along with the Derivative Works; or,
320
+ within a display generated by the Derivative Works, if and
321
+ wherever such third-party notices normally appear. The contents
322
+ of the NOTICE file are for informational purposes only and
323
+ do not modify the License. You may add Your own attribution
324
+ notices within Derivative Works that You distribute, alongside
325
+ or as an addendum to the NOTICE text from the Work, provided
326
+ that such additional attribution notices cannot be construed
327
+ as modifying the License.
328
+
329
+ You may add Your own copyright statement to Your modifications and
330
+ may provide additional or different license terms and conditions
331
+ for use, reproduction, or distribution of Your modifications, or
332
+ for any such Derivative Works as a whole, provided Your use,
333
+ reproduction, and distribution of the Work otherwise complies with
334
+ the conditions stated in this License.
335
+
336
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
337
+ any Contribution intentionally submitted for inclusion in the Work
338
+ by You to the Licensor shall be under the terms and conditions of
339
+ this License, without any additional terms or conditions.
340
+ Notwithstanding the above, nothing herein shall supersede or modify
341
+ the terms of any separate license agreement you may have executed
342
+ with Licensor regarding such Contributions.
343
+
344
+ 6. Trademarks. This License does not grant permission to use the trade
345
+ names, trademarks, service marks, or product names of the Licensor,
346
+ except as required for reasonable and customary use in describing the
347
+ origin of the Work and reproducing the content of the NOTICE file.
348
+
349
+ 7. Disclaimer of Warranty. Unless required by applicable law or
350
+ agreed to in writing, Licensor provides the Work (and each
351
+ Contributor provides its Contributions) on an "AS IS" BASIS,
352
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
353
+ implied, including, without limitation, any warranties or conditions
354
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
355
+ PARTICULAR PURPOSE. You are solely responsible for determining the
356
+ appropriateness of using or redistributing the Work and assume any
357
+ risks associated with Your exercise of permissions under this License.
358
+
359
+ 8. Limitation of Liability. In no event and under no legal theory,
360
+ whether in tort (including negligence), contract, or otherwise,
361
+ unless required by applicable law (such as deliberate and grossly
362
+ negligent acts) or agreed to in writing, shall any Contributor be
363
+ liable to You for damages, including any direct, indirect, special,
364
+ incidental, or consequential damages of any character arising as a
365
+ result of this License or out of the use or inability to use the
366
+ Work (including but not limited to damages for loss of goodwill,
367
+ work stoppage, computer failure or malfunction, or any and all
368
+ other commercial damages or losses), even if such Contributor
369
+ has been advised of the possibility of such damages.
370
+
371
+ 9. Accepting Warranty or Additional Liability. While redistributing
372
+ the Work or Derivative Works thereof, You may choose to offer,
373
+ and charge a fee for, acceptance of support, warranty, indemnity,
374
+ or other liability obligations and/or rights consistent with this
375
+ License. However, in accepting such obligations, You may act only
376
+ on Your own behalf and on Your sole responsibility, not on behalf
377
+ of any other Contributor, and only if You agree to indemnify,
378
+ defend, and hold each Contributor harmless for any liability
379
+ incurred by, or claims asserted against, such Contributor by reason
380
+ of your accepting any such warranty or additional liability.
381
+
382
+ END OF TERMS AND CONDITIONS
383
+
384
+ APPENDIX: How to apply the Apache License to your work.
385
+
386
+ To apply the Apache License to your work, attach the following
387
+ boilerplate notice, with the fields enclosed by brackets "[]"
388
+ replaced with your own identifying information. (Don't include
389
+ the brackets!) The text should be enclosed in the appropriate
390
+ comment syntax for the file format. We also recommend that a
391
+ file or class name and description of purpose be included on the
392
+ same "printed page" as the copyright notice for easier
393
+ identification within third-party archives.
394
+
395
+ Copyright [yyyy] [name of copyright owner]
396
+
397
+ Licensed under the Apache License, Version 2.0 (the "License");
398
+ you may not use this file except in compliance with the License.
399
+ You may obtain a copy of the License at
400
+
401
+ http://www.apache.org/licenses/LICENSE-2.0
402
+
403
+ Unless required by applicable law or agreed to in writing, software
404
+ distributed under the License is distributed on an "AS IS" BASIS,
405
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
406
+ See the License for the specific language governing permissions and
407
+ limitations under the License.
408
+
409
+
410
+ ================================= CogVideoX =================================
411
+
412
+ The CogVideoX License
413
+
414
+ 1. Definitions
415
+
416
+ “Licensor” means the CogVideoX Model Team that distributes its Software.
417
+
418
+ “Software” means the CogVideoX model parameters made available under this license.
419
+
420
+ 2. License Grant
421
+
422
+ Under the terms and conditions of this license, the licensor hereby grants you a non-exclusive, worldwide, non-transferable, non-sublicensable, revocable, royalty-free copyright license. The intellectual property rights of the generated content belong to the user to the extent permitted by applicable local laws.
423
+ This license allows you to freely use all open-source models in this repository for academic research. Users who wish to use the models for commercial purposes must register and obtain a basic commercial license in https://open.bigmodel.cn/mla/form .
424
+ Users who have registered and obtained the basic commercial license can use the models for commercial activities for free, but must comply with all terms and conditions of this license. Additionally, the number of service users (visits) for your commercial activities must not exceed 1 million visits per month.
425
+ If the number of service users (visits) for your commercial activities exceeds 1 million visits per month, you need to contact our business team to obtain more commercial licenses.
426
+ The above copyright statement and this license statement should be included in all copies or significant portions of this software.
427
+
428
+ 3. Restriction
429
+
430
+ You will not use, copy, modify, merge, publish, distribute, reproduce, or create derivative works of the Software, in whole or in part, for any military, or illegal purposes.
431
+
432
+ You will not use the Software for any act that may undermine China's national security and national unity, harm the public interest of society, or infringe upon the rights and interests of human beings.
433
+
434
+ 4. Disclaimer
435
+
436
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
437
+
438
+ 5. Limitation of Liability
439
+
440
+ EXCEPT TO THE EXTENT PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER BASED IN TORT, NEGLIGENCE, CONTRACT, LIABILITY, OR OTHERWISE WILL ANY LICENSOR BE LIABLE TO YOU FOR ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES, OR ANY OTHER COMMERCIAL LOSSES, EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
441
+
442
+ 6. Dispute Resolution
443
+
444
+ This license shall be governed and construed in accordance with the laws of People’s Republic of China. Any dispute arising from or in connection with this License shall be submitted to Haidian District People's Court in Beijing.
445
+
446
+ Note that the license is subject to update to a more comprehensive version. For any questions related to the license and copyright, please contact us at license@zhipuai.cn.
447
+
448
+ 1. 定义
449
+
450
+ “许可方”是指分发其软件的 CogVideoX 模型团队。
451
+
452
+ “软件”是指根据本许可提供的 CogVideoX 模型参数。
453
+
454
+ 2. 许可授予
455
+
456
+ 根据本许可的条款和条件,许可方特此授予您非排他性、全球性、不可转让、不可再许可、可撤销、免版税的版权许可。生成内容的知识产权所属,可根据适用当地法律的规定,在法律允许的范围内由用户享有生成内容的知识产权或其他权利。
457
+ 本许可允许您免费使用本仓库中的所有开源模型进行学术研究。对于希望将模型用于商业目的的用户,需在 https://open.bigmodel.cn/mla/form 完成登记并获得基础商用授权。
458
+
459
+ 经过登记并获得基础商用授权的用户可以免费使用本模型进行商业活动,但必须遵守本许可的所有条款和条件。
460
+ 在本许可证下,您的商业活动的服务用户数量(访问量)不得超过100万人次访问 / 每月。如果超过,您需要与我们的商业团队联系以获得更多的商业许可。
461
+ 上述版权声明和本许可声明应包含在本软件的所有副本或重要部分中。
462
+
463
+ 3.限制
464
+
465
+ 您不得出于任何军事或非法目的使用、复制、修改、合并、发布、分发、复制或创建本软件的全部或部分衍生作品。
466
+
467
+ 您不得利用本软件从事任何危害国家安全和国家统一、危害社会公共利益、侵犯人身权益的行为。
468
+
469
+ 4.免责声明
470
+
471
+ 本软件“按原样”提供,不提供任何明示或暗示的保证,包括但不限于对适销性、特定用途的适用性和非侵权性的保证。
472
+ 在任何情况下,作者或版权持有人均不对任何索赔、损害或其他责任负责,无论是在合同诉讼、侵权行为还是其他方面,由软件或软件的使用或其他交易引起、由软件引起或与之相关 软件。
473
+
474
+ 5. 责任限制
475
+
476
+ 除适用法律禁止的范围外,在任何情况下且根据任何法律理论,无论是基于侵权行为、疏忽、合同、责任或其他原因,任何许可方均不对您承担任何直接、间接、特殊、偶然、示范性、 或间接损害,或任何其他商业损失,即使许可人已被告知此类损害的可能性。
477
+
478
+ 6.争议解决
479
+
480
+ 本许可受中华人民共和国法律管辖并按其解释。 因本许可引起的或与本许可有关的任何争议应提交北京市海淀区人民法院。
481
+
482
+ 请注意,许可证可能会更新到更全面的版本。 有关许可和版权的任何问题,请通过 license@zhipuai.cn 与我们联系。
483
+
484
+ ============================================ HunyuanVideo ============================================
485
+
486
+ TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT
487
+ Tencent HunyuanVideo Release Date: December 3, 2024
488
+ THIS LICENSE AGREEMENT DOES NOT APPLY IN THE EUROPEAN UNION, UNITED KINGDOM AND SOUTH KOREA AND IS EXPRESSLY LIMITED TO THE TERRITORY, AS DEFINED BELOW.
489
+ By clicking to agree or by using, reproducing, modifying, distributing, performing or displaying any portion or element of the Tencent Hunyuan Works, including via any Hosted Service, You will be deemed to have recognized and accepted the content of this Agreement, which is effective immediately.
490
+ 1. DEFINITIONS.
491
+ a. “Acceptable Use Policy” shall mean the policy made available by Tencent as set forth in the Exhibit A.
492
+ b. “Agreement” shall mean the terms and conditions for use, reproduction, distribution, modification, performance and displaying of Tencent Hunyuan Works or any portion or element thereof set forth herein.
493
+ c. “Documentation” shall mean the specifications, manuals and documentation for Tencent Hunyuan made publicly available by Tencent.
494
+ d. “Hosted Service” shall mean a hosted service offered via an application programming interface (API), web access, or any other electronic or remote means.
495
+ e. “Licensee,” “You” or “Your” shall mean a natural person or legal entity exercising the rights granted by this Agreement and/or using the Tencent Hunyuan Works for any purpose and in any field of use.
496
+ f. “Materials” shall mean, collectively, Tencent’s proprietary Tencent Hunyuan and Documentation (and any portion thereof) as made available by Tencent under this Agreement.
497
+ g. “Model Derivatives” shall mean all: (i) modifications to Tencent Hunyuan or any Model Derivative of Tencent Hunyuan; (ii) works based on Tencent Hunyuan or any Model Derivative of Tencent Hunyuan; or (iii) any other machine learning model which is created by transfer of patterns of the weights, parameters, operations, or Output of Tencent Hunyuan or any Model Derivative of Tencent Hunyuan, to that model in order to cause that model to perform similarly to Tencent Hunyuan or a Model Derivative of Tencent Hunyuan, including distillation methods, methods that use intermediate data representations, or methods based on the generation of synthetic data Outputs by Tencent Hunyuan or a Model Derivative of Tencent Hunyuan for training that model. For clarity, Outputs by themselves are not deemed Model Derivatives.
498
+ h. “Output” shall mean the information and/or content output of Tencent Hunyuan or a Model Derivative that results from operating or otherwise using Tencent Hunyuan or a Model Derivative, including via a Hosted Service.
499
+ i. “Tencent,” “We” or “Us” shall mean THL A29 Limited.
500
+ j. “Tencent Hunyuan” shall mean the large language models, text/image/video/audio/3D generation models, and multimodal large language models and their software and algorithms, including trained model weights, parameters (including optimizer states), machine-learning model code, inference-enabling code, training-enabling code, fine-tuning enabling code and other elements of the foregoing made publicly available by Us, including, without limitation to, Tencent HunyuanVideo released at [https://github.com/Tencent/HunyuanVideo].
501
+ k. “Tencent Hunyuan Works” shall mean: (i) the Materials; (ii) Model Derivatives; and (iii) all derivative works thereof.
502
+ l. “Territory” shall mean the worldwide territory, excluding the territory of the European Union, United Kingdom and South Korea.
503
+ m. “Third Party” or “Third Parties” shall mean individuals or legal entities that are not under common control with Us or You.
504
+ n. “including” shall mean including but not limited to.
505
+ 2. GRANT OF RIGHTS.
506
+ We grant You, for the Territory only, a non-exclusive, non-transferable and royalty-free limited license under Tencent’s intellectual property or other rights owned by Us embodied in or utilized by the Materials to use, reproduce, distribute, create derivative works of (including Model Derivatives), and make modifications to the Materials, only in accordance with the terms of this Agreement and the Acceptable Use Policy, and You must not violate (or encourage or permit anyone else to violate) any term of this Agreement or the Acceptable Use Policy.
507
+ 3. DISTRIBUTION.
508
+ You may, subject to Your compliance with this Agreement, distribute or make available to Third Parties the Tencent Hunyuan Works, exclusively in the Territory, provided that You meet all of the following conditions:
509
+ a. You must provide all such Third Party recipients of the Tencent Hunyuan Works or products or services using them a copy of this Agreement;
510
+ b. You must cause any modified files to carry prominent notices stating that You changed the files;
511
+ c. You are encouraged to: (i) publish at least one technology introduction blogpost or one public statement expressing Your experience of using the Tencent Hunyuan Works; and (ii) mark the products or services developed by using the Tencent Hunyuan Works to indicate that the product/service is “Powered by Tencent Hunyuan”; and
512
+ d. All distributions to Third Parties (other than through a Hosted Service) must be accompanied by a “Notice” text file that contains the following notice: “Tencent Hunyuan is licensed under the Tencent Hunyuan Community License Agreement, Copyright © 2024 Tencent. All Rights Reserved. The trademark rights of “Tencent Hunyuan” are owned by Tencent or its affiliate.”
513
+ You may add Your own copyright statement to Your modifications and, except as set forth in this Section and in Section 5, may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Model Derivatives as a whole, provided Your use, reproduction, modification, distribution, performance and display of the work otherwise complies with the terms and conditions of this Agreement (including as regards the Territory). If You receive Tencent Hunyuan Works from a Licensee as part of an integrated end user product, then this Section 3 of this Agreement will not apply to You.
514
+ 4. ADDITIONAL COMMERCIAL TERMS.
515
+ If, on the Tencent Hunyuan version release date, the monthly active users of all products or services made available by or for Licensee is greater than 100 million monthly active users in the preceding calendar month, You must request a license from Tencent, which Tencent may grant to You in its sole discretion, and You are not authorized to exercise any of the rights under this Agreement unless or until Tencent otherwise expressly grants You such rights.
516
+ 5. RULES OF USE.
517
+ a. Your use of the Tencent Hunyuan Works must comply with applicable laws and regulations (including trade compliance laws and regulations) and adhere to the Acceptable Use Policy for the Tencent Hunyuan Works, which is hereby incorporated by reference into this Agreement. You must include the use restrictions referenced in these Sections 5(a) and 5(b) as an enforceable provision in any agreement (e.g., license agreement, terms of use, etc.) governing the use and/or distribution of Tencent Hunyuan Works and You must provide notice to subsequent users to whom You distribute that Tencent Hunyuan Works are subject to the use restrictions in these Sections 5(a) and 5(b).
518
+ b. You must not use the Tencent Hunyuan Works or any Output or results of the Tencent Hunyuan Works to improve any other AI model (other than Tencent Hunyuan or Model Derivatives thereof).
519
+ c. You must not use, reproduce, modify, distribute, or display the Tencent Hunyuan Works, Output or results of the Tencent Hunyuan Works outside the Territory. Any such use outside the Territory is unlicensed and unauthorized under this Agreement.
520
+ 6. INTELLECTUAL PROPERTY.
521
+ a. Subject to Tencent’s ownership of Tencent Hunyuan Works made by or for Tencent and intellectual property rights therein, conditioned upon Your compliance with the terms and conditions of this Agreement, as between You and Tencent, You will be the owner of any derivative works and modifications of the Materials and any Model Derivatives that are made by or for You.
522
+ b. No trademark licenses are granted under this Agreement, and in connection with the Tencent Hunyuan Works, Licensee may not use any name or mark owned by or associated with Tencent or any of its affiliates, except as required for reasonable and customary use in describing and distributing the Tencent Hunyuan Works. Tencent hereby grants You a license to use “Tencent Hunyuan” (the “Mark”) in the Territory solely as required to comply with the provisions of Section 3(c), provided that You comply with any applicable laws related to trademark protection. All goodwill arising out of Your use of the Mark will inure to the benefit of Tencent.
523
+ c. If You commence a lawsuit or other proceedings (including a cross-claim or counterclaim in a lawsuit) against Us or any person or entity alleging that the Materials or any Output, or any portion of any of the foregoing, infringe any intellectual property or other right owned or licensable by You, then all licenses granted to You under this Agreement shall terminate as of the date such lawsuit or other proceeding is filed. You will defend, indemnify and hold harmless Us from and against any claim by any Third Party arising out of or related to Your or the Third Party’s use or distribution of the Tencent Hunyuan Works.
524
+ d. Tencent claims no rights in Outputs You generate. You and Your users are solely responsible for Outputs and their subsequent uses.
525
+ 7. DISCLAIMERS OF WARRANTY AND LIMITATIONS OF LIABILITY.
526
+ a. We are not obligated to support, update, provide training for, or develop any further version of the Tencent Hunyuan Works or to grant any license thereto.
527
+ b. UNLESS AND ONLY TO THE EXTENT REQUIRED BY APPLICABLE LAW, THE TENCENT HUNYUAN WORKS AND ANY OUTPUT AND RESULTS THEREFROM ARE PROVIDED “AS IS” WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES OF ANY KIND INCLUDING ANY WARRANTIES OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, COURSE OF DEALING, USAGE OF TRADE, OR FITNESS FOR A PARTICULAR PURPOSE. YOU ARE SOLELY RESPONSIBLE FOR DETERMINING THE APPROPRIATENESS OF USING, REPRODUCING, MODIFYING, PERFORMING, DISPLAYING OR DISTRIBUTING ANY OF THE TENCENT HUNYUAN WORKS OR OUTPUTS AND ASSUME ANY AND ALL RISKS ASSOCIATED WITH YOUR OR A THIRD PARTY’S USE OR DISTRIBUTION OF ANY OF THE TENCENT HUNYUAN WORKS OR OUTPUTS AND YOUR EXERCISE OF RIGHTS AND PERMISSIONS UNDER THIS AGREEMENT.
528
+ c. TO THE FULLEST EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT SHALL TENCENT OR ITS AFFILIATES BE LIABLE UNDER ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, TORT, NEGLIGENCE, PRODUCTS LIABILITY, OR OTHERWISE, FOR ANY DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, EXEMPLARY, CONSEQUENTIAL OR PUNITIVE DAMAGES, OR LOST PROFITS OF ANY KIND ARISING FROM THIS AGREEMENT OR RELATED TO ANY OF THE TENCENT HUNYUAN WORKS OR OUTPUTS, EVEN IF TENCENT OR ITS AFFILIATES HAVE BEEN ADVISED OF THE POSSIBILITY OF ANY OF THE FOREGOING.
529
+ 8. SURVIVAL AND TERMINATION.
530
+ a. The term of this Agreement shall commence upon Your acceptance of this Agreement or access to the Materials and will continue in full force and effect until terminated in accordance with the terms and conditions herein.
531
+ b. We may terminate this Agreement if You breach any of the terms or conditions of this Agreement. Upon termination of this Agreement, You must promptly delete and cease use of the Tencent Hunyuan Works. Sections 6(a), 6(c), 7 and 9 shall survive the termination of this Agreement.
532
+ 9. GOVERNING LAW AND JURISDICTION.
533
+ a. This Agreement and any dispute arising out of or relating to it will be governed by the laws of the Hong Kong Special Administrative Region of the People’s Republic of China, without regard to conflict of law principles, and the UN Convention on Contracts for the International Sale of Goods does not apply to this Agreement.
534
+ b. Exclusive jurisdiction and venue for any dispute arising out of or relating to this Agreement will be a court of competent jurisdiction in the Hong Kong Special Administrative Region of the People’s Republic of China, and Tencent and Licensee consent to the exclusive jurisdiction of such court with respect to any such dispute.
535
+
536
+ EXHIBIT A
537
+ ACCEPTABLE USE POLICY
538
+
539
+ Tencent reserves the right to update this Acceptable Use Policy from time to time.
540
+ Last modified: November 5, 2024
541
+
542
+ Tencent endeavors to promote safe and fair use of its tools and features, including Tencent Hunyuan. You agree not to use Tencent Hunyuan or Model Derivatives:
543
+ 1. Outside the Territory;
544
+ 2. In any way that violates any applicable national, federal, state, local, international or any other law or regulation;
545
+ 3. To harm Yourself or others;
546
+ 4. To repurpose or distribute output from Tencent Hunyuan or any Model Derivatives to harm Yourself or others;
547
+ 5. To override or circumvent the safety guardrails and safeguards We have put in place;
548
+ 6. For the purpose of exploiting, harming or attempting to exploit or harm minors in any way;
549
+ 7. To generate or disseminate verifiably false information and/or content with the purpose of harming others or influencing elections;
550
+ 8. To generate or facilitate false online engagement, including fake reviews and other means of fake online engagement;
551
+ 9. To intentionally defame, disparage or otherwise harass others;
552
+ 10. To generate and/or disseminate malware (including ransomware) or any other content to be used for the purpose of harming electronic systems;
553
+ 11. To generate or disseminate personal identifiable information with the purpose of harming others;
554
+ 12. To generate or disseminate information (including images, code, posts, articles), and place the information in any public context (including –through the use of bot generated tweets), without expressly and conspicuously identifying that the information and/or content is machine generated;
555
+ 13. To impersonate another individual without consent, authorization, or legal right;
556
+ 14. To make high-stakes automated decisions in domains that affect an individual’s safety, rights or wellbeing (e.g., law enforcement, migration, medicine/health, management of critical infrastructure, safety components of products, essential services, credit, employment, housing, education, social scoring, or insurance);
557
+ 15. In a manner that violates or disrespects the social ethics and moral standards of other countries or regions;
558
+ 16. To perform, facilitate, threaten, incite, plan, promote or encourage violent extremism or terrorism;
559
+ 17. For any use intended to discriminate against or harm individuals or groups based on protected characteristics or categories, online or offline social behavior or known or predicted personal or personality characteristics;
560
+ 18. To intentionally exploit any of the vulnerabilities of a specific group of persons based on their age, social, physical or mental characteristics, in order to materially distort the behavior of a person pertaining to that group in a manner that causes or is likely to cause that person or another person physical or psychological harm;
561
+ 19. For military purposes;
562
+ 20. To engage in the unauthorized or unlicensed practice of any profession including, but not limited to, financial, legal, medical/health, or other professional practices.
enhance_a_video/__init__.py ADDED
File without changes
enhance_a_video/enhance.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from einops import rearrange
3
+ from .globals import get_enhance_weight, get_num_frames
4
+
5
+ @torch.compiler.disable()
6
+ def get_feta_scores(query, key):
7
+ img_q, img_k = query, key
8
+
9
+ num_frames = get_num_frames()
10
+
11
+ B, S, N, C = img_q.shape
12
+
13
+ # Calculate spatial dimension
14
+ spatial_dim = S // num_frames
15
+
16
+ # Add time dimension between spatial and head dims
17
+ query_image = img_q.reshape(B, spatial_dim, num_frames, N, C)
18
+ key_image = img_k.reshape(B, spatial_dim, num_frames, N, C)
19
+
20
+ # Expand time dimension
21
+ query_image = query_image.expand(-1, -1, num_frames, -1, -1) # [B, S, T, N, C]
22
+ key_image = key_image.expand(-1, -1, num_frames, -1, -1) # [B, S, T, N, C]
23
+
24
+ # Reshape to match feta_score input format: [(B S) N T C]
25
+ query_image = rearrange(query_image, "b s t n c -> (b s) n t c") #torch.Size([3200, 24, 5, 128])
26
+ key_image = rearrange(key_image, "b s t n c -> (b s) n t c")
27
+
28
+ return feta_score(query_image, key_image, C, num_frames)
29
+
30
+ @torch.compiler.disable()
31
+ def feta_score(query_image, key_image, head_dim, num_frames):
32
+ scale = head_dim**-0.5
33
+ query_image = query_image * scale
34
+ attn_temp = query_image @ key_image.transpose(-2, -1) # translate attn to float32
35
+ attn_temp = attn_temp.to(torch.float32)
36
+ attn_temp = attn_temp.softmax(dim=-1)
37
+
38
+ # Reshape to [batch_size * num_tokens, num_frames, num_frames]
39
+ attn_temp = attn_temp.reshape(-1, num_frames, num_frames)
40
+
41
+ # Create a mask for diagonal elements
42
+ diag_mask = torch.eye(num_frames, device=attn_temp.device).bool()
43
+ diag_mask = diag_mask.unsqueeze(0).expand(attn_temp.shape[0], -1, -1)
44
+
45
+ # Zero out diagonal elements
46
+ attn_wo_diag = attn_temp.masked_fill(diag_mask, 0)
47
+
48
+ # Calculate mean for each token's attention matrix
49
+ # Number of off-diagonal elements per matrix is n*n - n
50
+ num_off_diag = num_frames * num_frames - num_frames
51
+ mean_scores = attn_wo_diag.sum(dim=(1, 2)) / num_off_diag
52
+
53
+ enhance_scores = mean_scores.mean() * (num_frames + get_enhance_weight())
54
+ enhance_scores = enhance_scores.clamp(min=1)
55
+ return enhance_scores
enhance_a_video/globals.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ NUM_FRAMES = None
4
+ FETA_WEIGHT = None
5
+ ENABLE_FETA= False
6
+
7
+ @torch.compiler.disable()
8
+ def set_num_frames(num_frames: int):
9
+ global NUM_FRAMES
10
+ NUM_FRAMES = num_frames
11
+
12
+ @torch.compiler.disable()
13
+ def get_num_frames() -> int:
14
+ return NUM_FRAMES
15
+
16
+
17
+ def enable_enhance():
18
+ global ENABLE_FETA
19
+ ENABLE_FETA = True
20
+
21
+ def disable_enhance():
22
+ global ENABLE_FETA
23
+ ENABLE_FETA = False
24
+
25
+ @torch.compiler.disable()
26
+ def is_enhance_enabled() -> bool:
27
+ return ENABLE_FETA
28
+
29
+ @torch.compiler.disable()
30
+ def set_enhance_weight(feta_weight: float):
31
+ global FETA_WEIGHT
32
+ FETA_WEIGHT = feta_weight
33
+
34
+ @torch.compiler.disable()
35
+ def get_enhance_weight() -> float:
36
+ return FETA_WEIGHT
example_workflows/example_inputs/MTV_crafter_example_pose.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57f1d09148ca2bd7994de0647d73ed5a44fb186f4d300265edd9571a912f8a0d
3
+ size 318276
example_workflows/example_inputs/env.png ADDED

Git LFS Details

  • SHA256: 61d79eed86a7fd7d831dfc16ce0091f1da725690ee7e8a516f75b42d1c31624f
  • Pointer size: 132 Bytes
  • Size of remote file: 1.23 MB
example_workflows/example_inputs/human.png ADDED

Git LFS Details

  • SHA256: 3ee49acf2e6251ef452230019c9394addf95a34c784c421c50d4e3ccc664ed2f
  • Pointer size: 131 Bytes
  • Size of remote file: 210 kB
example_workflows/example_inputs/jeep.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67dc9e4ce73a7289901b159755953652965a17939fe43aedad43381934b32f55
3
+ size 185636
example_workflows/example_inputs/thing.png ADDED
example_workflows/example_inputs/wolf_interpolated.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56fadca209868e8930dc12bd825a2aa8bab822f0152812aaa2aeefd46176c74b
3
+ size 194949
example_workflows/example_inputs/woman.jpg ADDED

Git LFS Details

  • SHA256: 5662b7d55d57749a8ed53267be076b13234d2e2de445fdd5b58a695f894a40de
  • Pointer size: 131 Bytes
  • Size of remote file: 197 kB
example_workflows/example_inputs/woman.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e08584293621824d039c264132d90b654bede740f67d9384979544e3e2abfacc
3
+ size 1765454
example_workflows/wanvideo2_2_I2V_A14B_example_WIP.json ADDED
@@ -0,0 +1,2074 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "id": "c6e410bc-5e2c-460b-ae81-c91b6094fbb1",
3
+ "revision": 0,
4
+ "last_node_id": 97,
5
+ "last_link_id": 169,
6
+ "nodes": [
7
+ {
8
+ "id": 50,
9
+ "type": "CLIPTextEncode",
10
+ "pos": [
11
+ 354.00396728515625,
12
+ 922.6547241210938
13
+ ],
14
+ "size": [
15
+ 400,
16
+ 200
17
+ ],
18
+ "flags": {},
19
+ "order": 13,
20
+ "mode": 2,
21
+ "inputs": [
22
+ {
23
+ "name": "clip",
24
+ "type": "CLIP",
25
+ "link": 53
26
+ }
27
+ ],
28
+ "outputs": [
29
+ {
30
+ "name": "CONDITIONING",
31
+ "type": "CONDITIONING",
32
+ "slot_index": 0,
33
+ "links": [
34
+ 55
35
+ ]
36
+ }
37
+ ],
38
+ "properties": {
39
+ "cnr_id": "comfy-core",
40
+ "ver": "0.3.44",
41
+ "Node name for S&R": "CLIPTextEncode"
42
+ },
43
+ "widgets_values": [
44
+ "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
45
+ ],
46
+ "color": "#432",
47
+ "bgcolor": "#653"
48
+ },
49
+ {
50
+ "id": 48,
51
+ "type": "CLIPLoader",
52
+ "pos": [
53
+ -5.996182441711426,
54
+ 672.6546020507812
55
+ ],
56
+ "size": [
57
+ 315,
58
+ 106
59
+ ],
60
+ "flags": {},
61
+ "order": 0,
62
+ "mode": 2,
63
+ "inputs": [],
64
+ "outputs": [
65
+ {
66
+ "name": "CLIP",
67
+ "type": "CLIP",
68
+ "slot_index": 0,
69
+ "links": [
70
+ 52,
71
+ 53
72
+ ]
73
+ }
74
+ ],
75
+ "properties": {
76
+ "cnr_id": "comfy-core",
77
+ "ver": "0.3.44",
78
+ "Node name for S&R": "CLIPLoader"
79
+ },
80
+ "widgets_values": [
81
+ "umt5_xxl_fp16.safetensors",
82
+ "wan",
83
+ "default"
84
+ ],
85
+ "color": "#432",
86
+ "bgcolor": "#653"
87
+ },
88
+ {
89
+ "id": 51,
90
+ "type": "Note",
91
+ "pos": [
92
+ 24.003835678100586,
93
+ 502.65411376953125
94
+ ],
95
+ "size": [
96
+ 253.16725158691406,
97
+ 88
98
+ ],
99
+ "flags": {},
100
+ "order": 1,
101
+ "mode": 0,
102
+ "inputs": [],
103
+ "outputs": [],
104
+ "properties": {},
105
+ "widgets_values": [
106
+ "You can also use native ComfyUI text encoding with these nodes instead of the original, the models are node specific and can't otherwise be mixed."
107
+ ],
108
+ "color": "#432",
109
+ "bgcolor": "#653"
110
+ },
111
+ {
112
+ "id": 49,
113
+ "type": "CLIPTextEncode",
114
+ "pos": [
115
+ 354.00396728515625,
116
+ 672.6546020507812
117
+ ],
118
+ "size": [
119
+ 400,
120
+ 200
121
+ ],
122
+ "flags": {},
123
+ "order": 12,
124
+ "mode": 2,
125
+ "inputs": [
126
+ {
127
+ "name": "clip",
128
+ "type": "CLIP",
129
+ "link": 52
130
+ }
131
+ ],
132
+ "outputs": [
133
+ {
134
+ "name": "CONDITIONING",
135
+ "type": "CONDITIONING",
136
+ "slot_index": 0,
137
+ "links": [
138
+ 54
139
+ ]
140
+ }
141
+ ],
142
+ "properties": {
143
+ "cnr_id": "comfy-core",
144
+ "ver": "0.3.44",
145
+ "Node name for S&R": "CLIPTextEncode"
146
+ },
147
+ "widgets_values": [
148
+ "high quality nature video featuring a red panda balancing on a bamboo stem while a bird lands on it's head, on the background there is a waterfall"
149
+ ],
150
+ "color": "#432",
151
+ "bgcolor": "#653"
152
+ },
153
+ {
154
+ "id": 46,
155
+ "type": "WanVideoTextEmbedBridge",
156
+ "pos": [
157
+ 804.0042724609375,
158
+ 662.6546020507812
159
+ ],
160
+ "size": [
161
+ 315,
162
+ 46
163
+ ],
164
+ "flags": {},
165
+ "order": 19,
166
+ "mode": 2,
167
+ "inputs": [
168
+ {
169
+ "name": "positive",
170
+ "type": "CONDITIONING",
171
+ "link": 54
172
+ },
173
+ {
174
+ "name": "negative",
175
+ "shape": 7,
176
+ "type": "CONDITIONING",
177
+ "link": 55
178
+ }
179
+ ],
180
+ "outputs": [
181
+ {
182
+ "name": "text_embeds",
183
+ "type": "WANVIDEOTEXTEMBEDS",
184
+ "links": null
185
+ }
186
+ ],
187
+ "properties": {
188
+ "cnr_id": "ComfyUI-WanVideoWrapper",
189
+ "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
190
+ "Node name for S&R": "WanVideoTextEmbedBridge"
191
+ },
192
+ "widgets_values": []
193
+ },
194
+ {
195
+ "id": 44,
196
+ "type": "Note",
197
+ "pos": [
198
+ -960,
199
+ -810
200
+ ],
201
+ "size": [
202
+ 303.0501403808594,
203
+ 88
204
+ ],
205
+ "flags": {},
206
+ "order": 2,
207
+ "mode": 0,
208
+ "inputs": [],
209
+ "outputs": [],
210
+ "properties": {},
211
+ "widgets_values": [
212
+ "If you have Triton installed, connect this for ~30% speed increase"
213
+ ],
214
+ "color": "#432",
215
+ "bgcolor": "#653"
216
+ },
217
+ {
218
+ "id": 35,
219
+ "type": "WanVideoTorchCompileSettings",
220
+ "pos": [
221
+ -550,
222
+ -870
223
+ ],
224
+ "size": [
225
+ 390.5999755859375,
226
+ 202
227
+ ],
228
+ "flags": {},
229
+ "order": 3,
230
+ "mode": 0,
231
+ "inputs": [],
232
+ "outputs": [
233
+ {
234
+ "name": "torch_compile_args",
235
+ "type": "WANCOMPILEARGS",
236
+ "slot_index": 0,
237
+ "links": [
238
+ 111,
239
+ 112
240
+ ]
241
+ }
242
+ ],
243
+ "properties": {
244
+ "cnr_id": "ComfyUI-WanVideoWrapper",
245
+ "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
246
+ "Node name for S&R": "WanVideoTorchCompileSettings"
247
+ },
248
+ "widgets_values": [
249
+ "inductor",
250
+ false,
251
+ "default",
252
+ false,
253
+ 64,
254
+ true,
255
+ 128
256
+ ]
257
+ },
258
+ {
259
+ "id": 22,
260
+ "type": "WanVideoModelLoader",
261
+ "pos": [
262
+ -10,
263
+ -740
264
+ ],
265
+ "size": [
266
+ 477.4410095214844,
267
+ 274
268
+ ],
269
+ "flags": {},
270
+ "order": 14,
271
+ "mode": 0,
272
+ "inputs": [
273
+ {
274
+ "name": "compile_args",
275
+ "shape": 7,
276
+ "type": "WANCOMPILEARGS",
277
+ "link": 111
278
+ },
279
+ {
280
+ "name": "block_swap_args",
281
+ "shape": 7,
282
+ "type": "BLOCKSWAPARGS",
283
+ "link": null
284
+ },
285
+ {
286
+ "name": "lora",
287
+ "shape": 7,
288
+ "type": "WANVIDLORA",
289
+ "link": null
290
+ },
291
+ {
292
+ "name": "vram_management_args",
293
+ "shape": 7,
294
+ "type": "VRAM_MANAGEMENTARGS",
295
+ "link": null
296
+ },
297
+ {
298
+ "name": "vace_model",
299
+ "shape": 7,
300
+ "type": "VACEPATH",
301
+ "link": null
302
+ },
303
+ {
304
+ "name": "fantasytalking_model",
305
+ "shape": 7,
306
+ "type": "FANTASYTALKINGMODEL",
307
+ "link": null
308
+ },
309
+ {
310
+ "name": "multitalk_model",
311
+ "shape": 7,
312
+ "type": "MULTITALKMODEL",
313
+ "link": null
314
+ }
315
+ ],
316
+ "outputs": [
317
+ {
318
+ "name": "model",
319
+ "type": "WANVIDEOMODEL",
320
+ "slot_index": 0,
321
+ "links": [
322
+ 155
323
+ ]
324
+ }
325
+ ],
326
+ "properties": {
327
+ "cnr_id": "ComfyUI-WanVideoWrapper",
328
+ "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
329
+ "Node name for S&R": "WanVideoModelLoader"
330
+ },
331
+ "widgets_values": [
332
+ "WanVideo\\2_2\\Wan2_2-I2V-A14B-HIGH_fp8_e4m3fn_scaled_KJ.safetensors",
333
+ "fp16_fast",
334
+ "fp8_e4m3fn_scaled",
335
+ "offload_device",
336
+ "sageattn"
337
+ ],
338
+ "color": "#223",
339
+ "bgcolor": "#335"
340
+ },
341
+ {
342
+ "id": 71,
343
+ "type": "WanVideoModelLoader",
344
+ "pos": [
345
+ -10,
346
+ -380
347
+ ],
348
+ "size": [
349
+ 477.4410095214844,
350
+ 274
351
+ ],
352
+ "flags": {},
353
+ "order": 15,
354
+ "mode": 0,
355
+ "inputs": [
356
+ {
357
+ "name": "compile_args",
358
+ "shape": 7,
359
+ "type": "WANCOMPILEARGS",
360
+ "link": 112
361
+ },
362
+ {
363
+ "name": "block_swap_args",
364
+ "shape": 7,
365
+ "type": "BLOCKSWAPARGS",
366
+ "link": null
367
+ },
368
+ {
369
+ "name": "lora",
370
+ "shape": 7,
371
+ "type": "WANVIDLORA",
372
+ "link": null
373
+ },
374
+ {
375
+ "name": "vram_management_args",
376
+ "shape": 7,
377
+ "type": "VRAM_MANAGEMENTARGS",
378
+ "link": null
379
+ },
380
+ {
381
+ "name": "vace_model",
382
+ "shape": 7,
383
+ "type": "VACEPATH",
384
+ "link": null
385
+ },
386
+ {
387
+ "name": "fantasytalking_model",
388
+ "shape": 7,
389
+ "type": "FANTASYTALKINGMODEL",
390
+ "link": null
391
+ },
392
+ {
393
+ "name": "multitalk_model",
394
+ "shape": 7,
395
+ "type": "MULTITALKMODEL",
396
+ "link": null
397
+ }
398
+ ],
399
+ "outputs": [
400
+ {
401
+ "name": "model",
402
+ "type": "WANVIDEOMODEL",
403
+ "slot_index": 0,
404
+ "links": [
405
+ 160
406
+ ]
407
+ }
408
+ ],
409
+ "properties": {
410
+ "cnr_id": "ComfyUI-WanVideoWrapper",
411
+ "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
412
+ "Node name for S&R": "WanVideoModelLoader"
413
+ },
414
+ "widgets_values": [
415
+ "WanVideo\\2_2\\Wan2_2-I2V-A14B-LOW_fp8_e4m3fn_scaled_KJ.safetensors",
416
+ "fp16_fast",
417
+ "fp8_e4m3fn_scaled",
418
+ "offload_device",
419
+ "sageattn"
420
+ ],
421
+ "color": "#223",
422
+ "bgcolor": "#335"
423
+ },
424
+ {
425
+ "id": 92,
426
+ "type": "WanVideoSetBlockSwap",
427
+ "pos": [
428
+ 538.2930908203125,
429
+ -699.6842041015625
430
+ ],
431
+ "size": [
432
+ 201.76815795898438,
433
+ 46
434
+ ],
435
+ "flags": {},
436
+ "order": 20,
437
+ "mode": 0,
438
+ "inputs": [
439
+ {
440
+ "name": "model",
441
+ "type": "WANVIDEOMODEL",
442
+ "link": 155
443
+ },
444
+ {
445
+ "name": "block_swap_args",
446
+ "shape": 7,
447
+ "type": "BLOCKSWAPARGS",
448
+ "link": 156
449
+ }
450
+ ],
451
+ "outputs": [
452
+ {
453
+ "name": "model",
454
+ "type": "WANVIDEOMODEL",
455
+ "links": [
456
+ 157
457
+ ]
458
+ }
459
+ ],
460
+ "properties": {
461
+ "cnr_id": "ComfyUI-WanVideoWrapper",
462
+ "ver": "7e290c67bff1f906cdab84523018573f6c9d4d7f",
463
+ "Node name for S&R": "WanVideoSetBlockSwap"
464
+ },
465
+ "color": "#223",
466
+ "bgcolor": "#335"
467
+ },
468
+ {
469
+ "id": 93,
470
+ "type": "WanVideoSetBlockSwap",
471
+ "pos": [
472
+ 548.8602294921875,
473
+ -260.6182556152344
474
+ ],
475
+ "size": [
476
+ 201.76815795898438,
477
+ 46
478
+ ],
479
+ "flags": {},
480
+ "order": 21,
481
+ "mode": 0,
482
+ "inputs": [
483
+ {
484
+ "name": "model",
485
+ "type": "WANVIDEOMODEL",
486
+ "link": 160
487
+ },
488
+ {
489
+ "name": "block_swap_args",
490
+ "shape": 7,
491
+ "type": "BLOCKSWAPARGS",
492
+ "link": 159
493
+ }
494
+ ],
495
+ "outputs": [
496
+ {
497
+ "name": "model",
498
+ "type": "WANVIDEOMODEL",
499
+ "links": [
500
+ 161
501
+ ]
502
+ }
503
+ ],
504
+ "properties": {
505
+ "cnr_id": "ComfyUI-WanVideoWrapper",
506
+ "ver": "7e290c67bff1f906cdab84523018573f6c9d4d7f",
507
+ "Node name for S&R": "WanVideoSetBlockSwap"
508
+ },
509
+ "color": "#223",
510
+ "bgcolor": "#335"
511
+ },
512
+ {
513
+ "id": 79,
514
+ "type": "WanVideoSetLoRAs",
515
+ "pos": [
516
+ 969.6483764648438,
517
+ -216.53614807128906
518
+ ],
519
+ "size": [
520
+ 222.27981567382812,
521
+ 46
522
+ ],
523
+ "flags": {},
524
+ "order": 24,
525
+ "mode": 0,
526
+ "inputs": [
527
+ {
528
+ "name": "model",
529
+ "type": "WANVIDEOMODEL",
530
+ "link": 161
531
+ },
532
+ {
533
+ "name": "lora",
534
+ "shape": 7,
535
+ "type": "WANVIDLORA",
536
+ "link": 169
537
+ }
538
+ ],
539
+ "outputs": [
540
+ {
541
+ "name": "model",
542
+ "type": "WANVIDEOMODEL",
543
+ "links": [
544
+ 144
545
+ ]
546
+ }
547
+ ],
548
+ "properties": {
549
+ "cnr_id": "ComfyUI-WanVideoWrapper",
550
+ "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
551
+ "Node name for S&R": "WanVideoSetLoRAs"
552
+ },
553
+ "widgets_values": [],
554
+ "color": "#223",
555
+ "bgcolor": "#335"
556
+ },
557
+ {
558
+ "id": 38,
559
+ "type": "WanVideoVAELoader",
560
+ "pos": [
561
+ 1373.9725341796875,
562
+ -991.5189208984375
563
+ ],
564
+ "size": [
565
+ 315,
566
+ 82
567
+ ],
568
+ "flags": {},
569
+ "order": 4,
570
+ "mode": 0,
571
+ "inputs": [],
572
+ "outputs": [
573
+ {
574
+ "name": "vae",
575
+ "type": "WANVAE",
576
+ "slot_index": 0,
577
+ "links": [
578
+ 43,
579
+ 137
580
+ ]
581
+ }
582
+ ],
583
+ "properties": {
584
+ "cnr_id": "ComfyUI-WanVideoWrapper",
585
+ "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
586
+ "Node name for S&R": "WanVideoVAELoader"
587
+ },
588
+ "widgets_values": [
589
+ "wanvideo\\Wan2_1_VAE_bf16.safetensors",
590
+ "bf16"
591
+ ],
592
+ "color": "#322",
593
+ "bgcolor": "#533"
594
+ },
595
+ {
596
+ "id": 67,
597
+ "type": "LoadImage",
598
+ "pos": [
599
+ 330.2861633300781,
600
+ -1144.739013671875
601
+ ],
602
+ "size": [
603
+ 274.080078125,
604
+ 314
605
+ ],
606
+ "flags": {},
607
+ "order": 5,
608
+ "mode": 0,
609
+ "inputs": [],
610
+ "outputs": [
611
+ {
612
+ "name": "IMAGE",
613
+ "type": "IMAGE",
614
+ "links": [
615
+ 71
616
+ ]
617
+ },
618
+ {
619
+ "name": "MASK",
620
+ "type": "MASK",
621
+ "links": null
622
+ }
623
+ ],
624
+ "properties": {
625
+ "cnr_id": "comfy-core",
626
+ "ver": "0.3.44",
627
+ "Node name for S&R": "LoadImage"
628
+ },
629
+ "widgets_values": [
630
+ "oldman_upscaled.png",
631
+ "image"
632
+ ]
633
+ },
634
+ {
635
+ "id": 28,
636
+ "type": "WanVideoDecode",
637
+ "pos": [
638
+ 2620.946533203125,
639
+ -519.3373413085938
640
+ ],
641
+ "size": [
642
+ 315,
643
+ 198
644
+ ],
645
+ "flags": {},
646
+ "order": 27,
647
+ "mode": 0,
648
+ "inputs": [
649
+ {
650
+ "name": "vae",
651
+ "type": "WANVAE",
652
+ "link": 43
653
+ },
654
+ {
655
+ "name": "samples",
656
+ "type": "LATENT",
657
+ "link": 151
658
+ }
659
+ ],
660
+ "outputs": [
661
+ {
662
+ "name": "images",
663
+ "type": "IMAGE",
664
+ "slot_index": 0,
665
+ "links": [
666
+ 76
667
+ ]
668
+ }
669
+ ],
670
+ "properties": {
671
+ "cnr_id": "ComfyUI-WanVideoWrapper",
672
+ "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
673
+ "Node name for S&R": "WanVideoDecode"
674
+ },
675
+ "widgets_values": [
676
+ false,
677
+ 272,
678
+ 272,
679
+ 144,
680
+ 128,
681
+ "default"
682
+ ],
683
+ "color": "#322",
684
+ "bgcolor": "#533"
685
+ },
686
+ {
687
+ "id": 91,
688
+ "type": "INTConstant",
689
+ "pos": [
690
+ 1554.34130859375,
691
+ 293.0675964355469
692
+ ],
693
+ "size": [
694
+ 200,
695
+ 58
696
+ ],
697
+ "flags": {},
698
+ "order": 6,
699
+ "mode": 0,
700
+ "inputs": [],
701
+ "outputs": [
702
+ {
703
+ "name": "value",
704
+ "type": "INT",
705
+ "links": [
706
+ 153,
707
+ 154
708
+ ]
709
+ }
710
+ ],
711
+ "title": "Split_step",
712
+ "properties": {
713
+ "cnr_id": "comfyui-kjnodes",
714
+ "ver": "a6b867b63a29ca48ddb15c589e17a9f2d8530d57",
715
+ "Node name for S&R": "INTConstant"
716
+ },
717
+ "widgets_values": [
718
+ 3
719
+ ],
720
+ "color": "#1b4669",
721
+ "bgcolor": "#29699c"
722
+ },
723
+ {
724
+ "id": 80,
725
+ "type": "WanVideoSetLoRAs",
726
+ "pos": [
727
+ 985.8822021484375,
728
+ -458.0684814453125
729
+ ],
730
+ "size": [
731
+ 222.27981567382812,
732
+ 46
733
+ ],
734
+ "flags": {},
735
+ "order": 23,
736
+ "mode": 0,
737
+ "inputs": [
738
+ {
739
+ "name": "model",
740
+ "type": "WANVIDEOMODEL",
741
+ "link": 157
742
+ },
743
+ {
744
+ "name": "lora",
745
+ "shape": 7,
746
+ "type": "WANVIDLORA",
747
+ "link": 110
748
+ }
749
+ ],
750
+ "outputs": [
751
+ {
752
+ "name": "model",
753
+ "type": "WANVIDEOMODEL",
754
+ "links": [
755
+ 109
756
+ ]
757
+ }
758
+ ],
759
+ "properties": {
760
+ "cnr_id": "ComfyUI-WanVideoWrapper",
761
+ "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
762
+ "Node name for S&R": "WanVideoSetLoRAs"
763
+ },
764
+ "widgets_values": [],
765
+ "color": "#223",
766
+ "bgcolor": "#335"
767
+ },
768
+ {
769
+ "id": 69,
770
+ "type": "GetImageSizeAndCount",
771
+ "pos": [
772
+ 2725.78076171875,
773
+ -249.85873413085938
774
+ ],
775
+ "size": [
776
+ 240.41265869140625,
777
+ 86
778
+ ],
779
+ "flags": {},
780
+ "order": 28,
781
+ "mode": 0,
782
+ "inputs": [
783
+ {
784
+ "name": "image",
785
+ "type": "IMAGE",
786
+ "link": 76
787
+ }
788
+ ],
789
+ "outputs": [
790
+ {
791
+ "name": "image",
792
+ "type": "IMAGE",
793
+ "links": [
794
+ 77
795
+ ]
796
+ },
797
+ {
798
+ "label": "704 width",
799
+ "name": "width",
800
+ "type": "INT",
801
+ "links": null
802
+ },
803
+ {
804
+ "label": "704 height",
805
+ "name": "height",
806
+ "type": "INT",
807
+ "links": null
808
+ },
809
+ {
810
+ "label": "81 count",
811
+ "name": "count",
812
+ "type": "INT",
813
+ "links": null
814
+ }
815
+ ],
816
+ "properties": {
817
+ "cnr_id": "comfyui-kjnodes",
818
+ "ver": "a6b867b63a29ca48ddb15c589e17a9f2d8530d57",
819
+ "Node name for S&R": "GetImageSizeAndCount"
820
+ },
821
+ "widgets_values": []
822
+ },
823
+ {
824
+ "id": 27,
825
+ "type": "WanVideoSampler",
826
+ "pos": [
827
+ 1833.1953125,
828
+ -394.93426513671875
829
+ ],
830
+ "size": [
831
+ 315,
832
+ 975
833
+ ],
834
+ "flags": {},
835
+ "order": 25,
836
+ "mode": 0,
837
+ "inputs": [
838
+ {
839
+ "name": "model",
840
+ "type": "WANVIDEOMODEL",
841
+ "link": 109
842
+ },
843
+ {
844
+ "name": "image_embeds",
845
+ "type": "WANVIDIMAGE_EMBEDS",
846
+ "link": 138
847
+ },
848
+ {
849
+ "name": "text_embeds",
850
+ "shape": 7,
851
+ "type": "WANVIDEOTEXTEMBEDS",
852
+ "link": 30
853
+ },
854
+ {
855
+ "name": "samples",
856
+ "shape": 7,
857
+ "type": "LATENT",
858
+ "link": null
859
+ },
860
+ {
861
+ "name": "feta_args",
862
+ "shape": 7,
863
+ "type": "FETAARGS",
864
+ "link": null
865
+ },
866
+ {
867
+ "name": "context_options",
868
+ "shape": 7,
869
+ "type": "WANVIDCONTEXT",
870
+ "link": null
871
+ },
872
+ {
873
+ "name": "cache_args",
874
+ "shape": 7,
875
+ "type": "CACHEARGS",
876
+ "link": null
877
+ },
878
+ {
879
+ "name": "flowedit_args",
880
+ "shape": 7,
881
+ "type": "FLOWEDITARGS",
882
+ "link": null
883
+ },
884
+ {
885
+ "name": "slg_args",
886
+ "shape": 7,
887
+ "type": "SLGARGS",
888
+ "link": null
889
+ },
890
+ {
891
+ "name": "loop_args",
892
+ "shape": 7,
893
+ "type": "LOOPARGS",
894
+ "link": null
895
+ },
896
+ {
897
+ "name": "experimental_args",
898
+ "shape": 7,
899
+ "type": "EXPERIMENTALARGS",
900
+ "link": null
901
+ },
902
+ {
903
+ "name": "sigmas",
904
+ "shape": 7,
905
+ "type": "SIGMAS",
906
+ "link": null
907
+ },
908
+ {
909
+ "name": "unianimate_poses",
910
+ "shape": 7,
911
+ "type": "UNIANIMATE_POSE",
912
+ "link": null
913
+ },
914
+ {
915
+ "name": "fantasytalking_embeds",
916
+ "shape": 7,
917
+ "type": "FANTASYTALKING_EMBEDS",
918
+ "link": null
919
+ },
920
+ {
921
+ "name": "uni3c_embeds",
922
+ "shape": 7,
923
+ "type": "UNI3C_EMBEDS",
924
+ "link": null
925
+ },
926
+ {
927
+ "name": "multitalk_embeds",
928
+ "shape": 7,
929
+ "type": "MULTITALK_EMBEDS",
930
+ "link": null
931
+ },
932
+ {
933
+ "name": "freeinit_args",
934
+ "shape": 7,
935
+ "type": "FREEINITARGS",
936
+ "link": null
937
+ },
938
+ {
939
+ "name": "steps",
940
+ "type": "INT",
941
+ "widget": {
942
+ "name": "steps"
943
+ },
944
+ "link": 163
945
+ },
946
+ {
947
+ "name": "cfg",
948
+ "type": "FLOAT",
949
+ "widget": {
950
+ "name": "cfg"
951
+ },
952
+ "link": 167
953
+ },
954
+ {
955
+ "name": "end_step",
956
+ "shape": 7,
957
+ "type": "INT",
958
+ "widget": {
959
+ "name": "end_step"
960
+ },
961
+ "link": 153
962
+ }
963
+ ],
964
+ "outputs": [
965
+ {
966
+ "name": "samples",
967
+ "type": "LATENT",
968
+ "slot_index": 0,
969
+ "links": [
970
+ 143
971
+ ]
972
+ }
973
+ ],
974
+ "properties": {
975
+ "cnr_id": "ComfyUI-WanVideoWrapper",
976
+ "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
977
+ "Node name for S&R": "WanVideoSampler"
978
+ },
979
+ "widgets_values": [
980
+ 6,
981
+ 1,
982
+ 8,
983
+ 43,
984
+ "fixed",
985
+ true,
986
+ "dpm++_sde",
987
+ 0,
988
+ 1,
989
+ false,
990
+ "comfy",
991
+ 0,
992
+ 10,
993
+ ""
994
+ ]
995
+ },
996
+ {
997
+ "id": 90,
998
+ "type": "WanVideoSampler",
999
+ "pos": [
1000
+ 2206.047119140625,
1001
+ -393.0194396972656
1002
+ ],
1003
+ "size": [
1004
+ 315,
1005
+ 975
1006
+ ],
1007
+ "flags": {},
1008
+ "order": 26,
1009
+ "mode": 0,
1010
+ "inputs": [
1011
+ {
1012
+ "name": "model",
1013
+ "type": "WANVIDEOMODEL",
1014
+ "link": 144
1015
+ },
1016
+ {
1017
+ "name": "image_embeds",
1018
+ "type": "WANVIDIMAGE_EMBEDS",
1019
+ "link": 149
1020
+ },
1021
+ {
1022
+ "name": "text_embeds",
1023
+ "shape": 7,
1024
+ "type": "WANVIDEOTEXTEMBEDS",
1025
+ "link": 152
1026
+ },
1027
+ {
1028
+ "name": "samples",
1029
+ "shape": 7,
1030
+ "type": "LATENT",
1031
+ "link": 143
1032
+ },
1033
+ {
1034
+ "name": "feta_args",
1035
+ "shape": 7,
1036
+ "type": "FETAARGS",
1037
+ "link": null
1038
+ },
1039
+ {
1040
+ "name": "context_options",
1041
+ "shape": 7,
1042
+ "type": "WANVIDCONTEXT",
1043
+ "link": null
1044
+ },
1045
+ {
1046
+ "name": "cache_args",
1047
+ "shape": 7,
1048
+ "type": "CACHEARGS",
1049
+ "link": null
1050
+ },
1051
+ {
1052
+ "name": "flowedit_args",
1053
+ "shape": 7,
1054
+ "type": "FLOWEDITARGS",
1055
+ "link": null
1056
+ },
1057
+ {
1058
+ "name": "slg_args",
1059
+ "shape": 7,
1060
+ "type": "SLGARGS",
1061
+ "link": null
1062
+ },
1063
+ {
1064
+ "name": "loop_args",
1065
+ "shape": 7,
1066
+ "type": "LOOPARGS",
1067
+ "link": null
1068
+ },
1069
+ {
1070
+ "name": "experimental_args",
1071
+ "shape": 7,
1072
+ "type": "EXPERIMENTALARGS",
1073
+ "link": null
1074
+ },
1075
+ {
1076
+ "name": "sigmas",
1077
+ "shape": 7,
1078
+ "type": "SIGMAS",
1079
+ "link": null
1080
+ },
1081
+ {
1082
+ "name": "unianimate_poses",
1083
+ "shape": 7,
1084
+ "type": "UNIANIMATE_POSE",
1085
+ "link": null
1086
+ },
1087
+ {
1088
+ "name": "fantasytalking_embeds",
1089
+ "shape": 7,
1090
+ "type": "FANTASYTALKING_EMBEDS",
1091
+ "link": null
1092
+ },
1093
+ {
1094
+ "name": "uni3c_embeds",
1095
+ "shape": 7,
1096
+ "type": "UNI3C_EMBEDS",
1097
+ "link": null
1098
+ },
1099
+ {
1100
+ "name": "multitalk_embeds",
1101
+ "shape": 7,
1102
+ "type": "MULTITALK_EMBEDS",
1103
+ "link": null
1104
+ },
1105
+ {
1106
+ "name": "freeinit_args",
1107
+ "shape": 7,
1108
+ "type": "FREEINITARGS",
1109
+ "link": null
1110
+ },
1111
+ {
1112
+ "name": "steps",
1113
+ "type": "INT",
1114
+ "widget": {
1115
+ "name": "steps"
1116
+ },
1117
+ "link": 164
1118
+ },
1119
+ {
1120
+ "name": "start_step",
1121
+ "shape": 7,
1122
+ "type": "INT",
1123
+ "widget": {
1124
+ "name": "start_step"
1125
+ },
1126
+ "link": 154
1127
+ }
1128
+ ],
1129
+ "outputs": [
1130
+ {
1131
+ "name": "samples",
1132
+ "type": "LATENT",
1133
+ "slot_index": 0,
1134
+ "links": [
1135
+ 151
1136
+ ]
1137
+ }
1138
+ ],
1139
+ "properties": {
1140
+ "cnr_id": "ComfyUI-WanVideoWrapper",
1141
+ "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
1142
+ "Node name for S&R": "WanVideoSampler"
1143
+ },
1144
+ "widgets_values": [
1145
+ 6,
1146
+ 1,
1147
+ 8,
1148
+ 43,
1149
+ "fixed",
1150
+ true,
1151
+ "dpm++_sde",
1152
+ 0,
1153
+ 1,
1154
+ false,
1155
+ "comfy",
1156
+ 10,
1157
+ -1,
1158
+ ""
1159
+ ]
1160
+ },
1161
+ {
1162
+ "id": 94,
1163
+ "type": "INTConstant",
1164
+ "pos": [
1165
+ 1446.0140380859375,
1166
+ -77.41889953613281
1167
+ ],
1168
+ "size": [
1169
+ 200,
1170
+ 58
1171
+ ],
1172
+ "flags": {},
1173
+ "order": 7,
1174
+ "mode": 0,
1175
+ "inputs": [],
1176
+ "outputs": [
1177
+ {
1178
+ "name": "value",
1179
+ "type": "INT",
1180
+ "links": [
1181
+ 163,
1182
+ 164,
1183
+ 165
1184
+ ]
1185
+ }
1186
+ ],
1187
+ "title": "Steps",
1188
+ "properties": {
1189
+ "cnr_id": "comfyui-kjnodes",
1190
+ "ver": "a6b867b63a29ca48ddb15c589e17a9f2d8530d57",
1191
+ "Node name for S&R": "INTConstant"
1192
+ },
1193
+ "widgets_values": [
1194
+ 6
1195
+ ],
1196
+ "color": "#1b4669",
1197
+ "bgcolor": "#29699c"
1198
+ },
1199
+ {
1200
+ "id": 95,
1201
+ "type": "CreateCFGScheduleFloatList",
1202
+ "pos": [
1203
+ 1455.8336181640625,
1204
+ 44.19880294799805
1205
+ ],
1206
+ "size": [
1207
+ 298.3199157714844,
1208
+ 178
1209
+ ],
1210
+ "flags": {},
1211
+ "order": 17,
1212
+ "mode": 0,
1213
+ "inputs": [
1214
+ {
1215
+ "name": "steps",
1216
+ "type": "INT",
1217
+ "widget": {
1218
+ "name": "steps"
1219
+ },
1220
+ "link": 165
1221
+ }
1222
+ ],
1223
+ "outputs": [
1224
+ {
1225
+ "name": "float_list",
1226
+ "type": "FLOAT",
1227
+ "links": [
1228
+ 167
1229
+ ]
1230
+ }
1231
+ ],
1232
+ "properties": {
1233
+ "cnr_id": "ComfyUI-WanVideoWrapper",
1234
+ "ver": "7e290c67bff1f906cdab84523018573f6c9d4d7f",
1235
+ "Node name for S&R": "CreateCFGScheduleFloatList"
1236
+ },
1237
+ "widgets_values": [
1238
+ 30,
1239
+ 2,
1240
+ 2,
1241
+ "linear",
1242
+ 0,
1243
+ 0.01
1244
+ ]
1245
+ },
1246
+ {
1247
+ "id": 97,
1248
+ "type": "WanVideoLoraSelect",
1249
+ "pos": [
1250
+ -717.8909301757812,
1251
+ -252.48538208007812
1252
+ ],
1253
+ "size": [
1254
+ 624.4888305664062,
1255
+ 150
1256
+ ],
1257
+ "flags": {},
1258
+ "order": 8,
1259
+ "mode": 0,
1260
+ "inputs": [
1261
+ {
1262
+ "name": "prev_lora",
1263
+ "shape": 7,
1264
+ "type": "WANVIDLORA",
1265
+ "link": null
1266
+ },
1267
+ {
1268
+ "name": "blocks",
1269
+ "shape": 7,
1270
+ "type": "SELECTEDBLOCKS",
1271
+ "link": null
1272
+ }
1273
+ ],
1274
+ "outputs": [
1275
+ {
1276
+ "name": "lora",
1277
+ "type": "WANVIDLORA",
1278
+ "links": [
1279
+ 169
1280
+ ]
1281
+ }
1282
+ ],
1283
+ "properties": {
1284
+ "cnr_id": "ComfyUI-WanVideoWrapper",
1285
+ "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
1286
+ "Node name for S&R": "WanVideoLoraSelect"
1287
+ },
1288
+ "widgets_values": [
1289
+ "WanVideo\\Lightx2v\\lightx2v_I2V_14B_480p_cfg_step_distill_rank64_bf16.safetensors",
1290
+ 1,
1291
+ false,
1292
+ false
1293
+ ],
1294
+ "color": "#223",
1295
+ "bgcolor": "#335"
1296
+ },
1297
+ {
1298
+ "id": 56,
1299
+ "type": "WanVideoLoraSelect",
1300
+ "pos": [
1301
+ -765.4373168945312,
1302
+ -481.49810791015625
1303
+ ],
1304
+ "size": [
1305
+ 659.4812622070312,
1306
+ 150
1307
+ ],
1308
+ "flags": {},
1309
+ "order": 9,
1310
+ "mode": 0,
1311
+ "inputs": [
1312
+ {
1313
+ "name": "prev_lora",
1314
+ "shape": 7,
1315
+ "type": "WANVIDLORA",
1316
+ "link": null
1317
+ },
1318
+ {
1319
+ "name": "blocks",
1320
+ "shape": 7,
1321
+ "type": "SELECTEDBLOCKS",
1322
+ "link": null
1323
+ }
1324
+ ],
1325
+ "outputs": [
1326
+ {
1327
+ "name": "lora",
1328
+ "type": "WANVIDLORA",
1329
+ "links": [
1330
+ 110
1331
+ ]
1332
+ }
1333
+ ],
1334
+ "properties": {
1335
+ "cnr_id": "ComfyUI-WanVideoWrapper",
1336
+ "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
1337
+ "Node name for S&R": "WanVideoLoraSelect"
1338
+ },
1339
+ "widgets_values": [
1340
+ "WanVideo\\Lightx2v\\lightx2v_I2V_14B_480p_cfg_step_distill_rank64_bf16.safetensors",
1341
+ 3,
1342
+ false,
1343
+ false
1344
+ ],
1345
+ "color": "#223",
1346
+ "bgcolor": "#335"
1347
+ },
1348
+ {
1349
+ "id": 11,
1350
+ "type": "LoadWanVideoT5TextEncoder",
1351
+ "pos": [
1352
+ 205.93421936035156,
1353
+ -21.262622833251953
1354
+ ],
1355
+ "size": [
1356
+ 377.1661376953125,
1357
+ 130
1358
+ ],
1359
+ "flags": {},
1360
+ "order": 10,
1361
+ "mode": 0,
1362
+ "inputs": [],
1363
+ "outputs": [
1364
+ {
1365
+ "name": "wan_t5_model",
1366
+ "type": "WANTEXTENCODER",
1367
+ "slot_index": 0,
1368
+ "links": [
1369
+ 15
1370
+ ]
1371
+ }
1372
+ ],
1373
+ "properties": {
1374
+ "cnr_id": "ComfyUI-WanVideoWrapper",
1375
+ "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
1376
+ "Node name for S&R": "LoadWanVideoT5TextEncoder"
1377
+ },
1378
+ "widgets_values": [
1379
+ "umt5-xxl-enc-bf16.safetensors",
1380
+ "bf16",
1381
+ "offload_device",
1382
+ "disabled"
1383
+ ],
1384
+ "color": "#332922",
1385
+ "bgcolor": "#593930"
1386
+ },
1387
+ {
1388
+ "id": 60,
1389
+ "type": "VHS_VideoCombine",
1390
+ "pos": [
1391
+ 3150,
1392
+ -390
1393
+ ],
1394
+ "size": [
1395
+ 698.6392211914062,
1396
+ 1026.63916015625
1397
+ ],
1398
+ "flags": {},
1399
+ "order": 29,
1400
+ "mode": 0,
1401
+ "inputs": [
1402
+ {
1403
+ "name": "images",
1404
+ "type": "IMAGE",
1405
+ "link": 77
1406
+ },
1407
+ {
1408
+ "name": "audio",
1409
+ "shape": 7,
1410
+ "type": "AUDIO",
1411
+ "link": null
1412
+ },
1413
+ {
1414
+ "name": "meta_batch",
1415
+ "shape": 7,
1416
+ "type": "VHS_BatchManager",
1417
+ "link": null
1418
+ },
1419
+ {
1420
+ "name": "vae",
1421
+ "shape": 7,
1422
+ "type": "VAE",
1423
+ "link": null
1424
+ }
1425
+ ],
1426
+ "outputs": [
1427
+ {
1428
+ "name": "Filenames",
1429
+ "type": "VHS_FILENAMES",
1430
+ "links": null
1431
+ }
1432
+ ],
1433
+ "properties": {
1434
+ "cnr_id": "comfyui-videohelpersuite",
1435
+ "ver": "0a75c7958fe320efcb052f1d9f8451fd20c730a8",
1436
+ "Node name for S&R": "VHS_VideoCombine"
1437
+ },
1438
+ "widgets_values": {
1439
+ "frame_rate": 16,
1440
+ "loop_count": 0,
1441
+ "filename_prefix": "WanVideo2_2_I2V",
1442
+ "format": "video/h264-mp4",
1443
+ "pix_fmt": "yuv420p",
1444
+ "crf": 19,
1445
+ "save_metadata": true,
1446
+ "trim_to_audio": false,
1447
+ "pingpong": false,
1448
+ "save_output": false,
1449
+ "videopreview": {
1450
+ "hidden": false,
1451
+ "paused": false,
1452
+ "params": {
1453
+ "filename": "WanVideo2_2_I2V_00006.mp4",
1454
+ "subfolder": "",
1455
+ "type": "temp",
1456
+ "format": "video/h264-mp4",
1457
+ "frame_rate": 16,
1458
+ "workflow": "WanVideo2_2_I2V_00006.png",
1459
+ "fullpath": "N:\\AI\\ComfyUI\\temp\\WanVideo2_2_I2V_00006.mp4"
1460
+ }
1461
+ }
1462
+ }
1463
+ },
1464
+ {
1465
+ "id": 39,
1466
+ "type": "WanVideoBlockSwap",
1467
+ "pos": [
1468
+ 516.8650512695312,
1469
+ -526.5733642578125
1470
+ ],
1471
+ "size": [
1472
+ 315,
1473
+ 154
1474
+ ],
1475
+ "flags": {},
1476
+ "order": 11,
1477
+ "mode": 0,
1478
+ "inputs": [],
1479
+ "outputs": [
1480
+ {
1481
+ "name": "block_swap_args",
1482
+ "type": "BLOCKSWAPARGS",
1483
+ "slot_index": 0,
1484
+ "links": [
1485
+ 156,
1486
+ 159
1487
+ ]
1488
+ }
1489
+ ],
1490
+ "properties": {
1491
+ "cnr_id": "ComfyUI-WanVideoWrapper",
1492
+ "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
1493
+ "Node name for S&R": "WanVideoBlockSwap"
1494
+ },
1495
+ "widgets_values": [
1496
+ 20,
1497
+ false,
1498
+ false,
1499
+ false,
1500
+ 1
1501
+ ],
1502
+ "color": "#223",
1503
+ "bgcolor": "#335"
1504
+ },
1505
+ {
1506
+ "id": 89,
1507
+ "type": "WanVideoImageToVideoEncode",
1508
+ "pos": [
1509
+ 1401.11962890625,
1510
+ -698.7300415039062
1511
+ ],
1512
+ "size": [
1513
+ 308.2320251464844,
1514
+ 390
1515
+ ],
1516
+ "flags": {},
1517
+ "order": 22,
1518
+ "mode": 0,
1519
+ "inputs": [
1520
+ {
1521
+ "name": "vae",
1522
+ "type": "WANVAE",
1523
+ "link": 137
1524
+ },
1525
+ {
1526
+ "name": "clip_embeds",
1527
+ "shape": 7,
1528
+ "type": "WANVIDIMAGE_CLIPEMBEDS",
1529
+ "link": null
1530
+ },
1531
+ {
1532
+ "name": "start_image",
1533
+ "shape": 7,
1534
+ "type": "IMAGE",
1535
+ "link": 139
1536
+ },
1537
+ {
1538
+ "name": "end_image",
1539
+ "shape": 7,
1540
+ "type": "IMAGE",
1541
+ "link": null
1542
+ },
1543
+ {
1544
+ "name": "control_embeds",
1545
+ "shape": 7,
1546
+ "type": "WANVIDIMAGE_EMBEDS",
1547
+ "link": null
1548
+ },
1549
+ {
1550
+ "name": "temporal_mask",
1551
+ "shape": 7,
1552
+ "type": "MASK",
1553
+ "link": null
1554
+ },
1555
+ {
1556
+ "name": "extra_latents",
1557
+ "shape": 7,
1558
+ "type": "LATENT",
1559
+ "link": null
1560
+ },
1561
+ {
1562
+ "name": "add_cond_latents",
1563
+ "shape": 7,
1564
+ "type": "ADD_COND_LATENTS",
1565
+ "link": null
1566
+ },
1567
+ {
1568
+ "name": "width",
1569
+ "type": "INT",
1570
+ "widget": {
1571
+ "name": "width"
1572
+ },
1573
+ "link": 141
1574
+ },
1575
+ {
1576
+ "name": "height",
1577
+ "type": "INT",
1578
+ "widget": {
1579
+ "name": "height"
1580
+ },
1581
+ "link": 142
1582
+ }
1583
+ ],
1584
+ "outputs": [
1585
+ {
1586
+ "name": "image_embeds",
1587
+ "type": "WANVIDIMAGE_EMBEDS",
1588
+ "links": [
1589
+ 138,
1590
+ 149
1591
+ ]
1592
+ }
1593
+ ],
1594
+ "properties": {
1595
+ "cnr_id": "ComfyUI-WanVideoWrapper",
1596
+ "ver": "7e290c67bff1f906cdab84523018573f6c9d4d7f",
1597
+ "Node name for S&R": "WanVideoImageToVideoEncode"
1598
+ },
1599
+ "widgets_values": [
1600
+ 832,
1601
+ 480,
1602
+ 81,
1603
+ 0,
1604
+ 1,
1605
+ 1,
1606
+ true,
1607
+ false,
1608
+ false
1609
+ ],
1610
+ "color": "#322",
1611
+ "bgcolor": "#533"
1612
+ },
1613
+ {
1614
+ "id": 16,
1615
+ "type": "WanVideoTextEncode",
1616
+ "pos": [
1617
+ 675.8850708007812,
1618
+ -36.032100677490234
1619
+ ],
1620
+ "size": [
1621
+ 474.3573303222656,
1622
+ 316.48370361328125
1623
+ ],
1624
+ "flags": {},
1625
+ "order": 18,
1626
+ "mode": 0,
1627
+ "inputs": [
1628
+ {
1629
+ "name": "t5",
1630
+ "shape": 7,
1631
+ "type": "WANTEXTENCODER",
1632
+ "link": 15
1633
+ },
1634
+ {
1635
+ "name": "model_to_offload",
1636
+ "shape": 7,
1637
+ "type": "WANVIDEOMODEL",
1638
+ "link": null
1639
+ }
1640
+ ],
1641
+ "outputs": [
1642
+ {
1643
+ "name": "text_embeds",
1644
+ "type": "WANVIDEOTEXTEMBEDS",
1645
+ "slot_index": 0,
1646
+ "links": [
1647
+ 30,
1648
+ 152
1649
+ ]
1650
+ }
1651
+ ],
1652
+ "properties": {
1653
+ "cnr_id": "ComfyUI-WanVideoWrapper",
1654
+ "ver": "998a69cc0acbec503001b8b0ce0a5d5404420e1e",
1655
+ "Node name for S&R": "WanVideoTextEncode"
1656
+ },
1657
+ "widgets_values": [
1658
+ "old man gets up and jumps into the lake",
1659
+ "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走",
1660
+ true,
1661
+ false,
1662
+ "gpu"
1663
+ ],
1664
+ "color": "#332922",
1665
+ "bgcolor": "#593930"
1666
+ },
1667
+ {
1668
+ "id": 68,
1669
+ "type": "ImageResizeKJv2",
1670
+ "pos": [
1671
+ 696.0801391601562,
1672
+ -1143.5843505859375
1673
+ ],
1674
+ "size": [
1675
+ 270,
1676
+ 336
1677
+ ],
1678
+ "flags": {},
1679
+ "order": 16,
1680
+ "mode": 0,
1681
+ "inputs": [
1682
+ {
1683
+ "name": "image",
1684
+ "type": "IMAGE",
1685
+ "link": 71
1686
+ },
1687
+ {
1688
+ "name": "mask",
1689
+ "shape": 7,
1690
+ "type": "MASK",
1691
+ "link": null
1692
+ }
1693
+ ],
1694
+ "outputs": [
1695
+ {
1696
+ "name": "IMAGE",
1697
+ "type": "IMAGE",
1698
+ "links": [
1699
+ 139
1700
+ ]
1701
+ },
1702
+ {
1703
+ "name": "width",
1704
+ "type": "INT",
1705
+ "links": [
1706
+ 141
1707
+ ]
1708
+ },
1709
+ {
1710
+ "name": "height",
1711
+ "type": "INT",
1712
+ "links": [
1713
+ 142
1714
+ ]
1715
+ },
1716
+ {
1717
+ "name": "mask",
1718
+ "type": "MASK",
1719
+ "links": null
1720
+ }
1721
+ ],
1722
+ "properties": {
1723
+ "cnr_id": "comfyui-kjnodes",
1724
+ "ver": "a6b867b63a29ca48ddb15c589e17a9f2d8530d57",
1725
+ "Node name for S&R": "ImageResizeKJv2"
1726
+ },
1727
+ "widgets_values": [
1728
+ 720,
1729
+ 720,
1730
+ "lanczos",
1731
+ "crop",
1732
+ "0, 0, 0",
1733
+ "center",
1734
+ 32,
1735
+ "cpu",
1736
+ "<tr><td>Output: </td><td><b>1</b> x <b>704</b> x <b>704 | 5.67MB</b></td></tr>"
1737
+ ]
1738
+ }
1739
+ ],
1740
+ "links": [
1741
+ [
1742
+ 15,
1743
+ 11,
1744
+ 0,
1745
+ 16,
1746
+ 0,
1747
+ "WANTEXTENCODER"
1748
+ ],
1749
+ [
1750
+ 30,
1751
+ 16,
1752
+ 0,
1753
+ 27,
1754
+ 2,
1755
+ "WANVIDEOTEXTEMBEDS"
1756
+ ],
1757
+ [
1758
+ 43,
1759
+ 38,
1760
+ 0,
1761
+ 28,
1762
+ 0,
1763
+ "VAE"
1764
+ ],
1765
+ [
1766
+ 52,
1767
+ 48,
1768
+ 0,
1769
+ 49,
1770
+ 0,
1771
+ "CLIP"
1772
+ ],
1773
+ [
1774
+ 53,
1775
+ 48,
1776
+ 0,
1777
+ 50,
1778
+ 0,
1779
+ "CLIP"
1780
+ ],
1781
+ [
1782
+ 54,
1783
+ 49,
1784
+ 0,
1785
+ 46,
1786
+ 0,
1787
+ "CONDITIONING"
1788
+ ],
1789
+ [
1790
+ 55,
1791
+ 50,
1792
+ 0,
1793
+ 46,
1794
+ 1,
1795
+ "CONDITIONING"
1796
+ ],
1797
+ [
1798
+ 71,
1799
+ 67,
1800
+ 0,
1801
+ 68,
1802
+ 0,
1803
+ "IMAGE"
1804
+ ],
1805
+ [
1806
+ 76,
1807
+ 28,
1808
+ 0,
1809
+ 69,
1810
+ 0,
1811
+ "IMAGE"
1812
+ ],
1813
+ [
1814
+ 77,
1815
+ 69,
1816
+ 0,
1817
+ 60,
1818
+ 0,
1819
+ "IMAGE"
1820
+ ],
1821
+ [
1822
+ 109,
1823
+ 80,
1824
+ 0,
1825
+ 27,
1826
+ 0,
1827
+ "WANVIDEOMODEL"
1828
+ ],
1829
+ [
1830
+ 110,
1831
+ 56,
1832
+ 0,
1833
+ 80,
1834
+ 1,
1835
+ "WANVIDLORA"
1836
+ ],
1837
+ [
1838
+ 111,
1839
+ 35,
1840
+ 0,
1841
+ 22,
1842
+ 0,
1843
+ "WANCOMPILEARGS"
1844
+ ],
1845
+ [
1846
+ 112,
1847
+ 35,
1848
+ 0,
1849
+ 71,
1850
+ 0,
1851
+ "WANCOMPILEARGS"
1852
+ ],
1853
+ [
1854
+ 137,
1855
+ 38,
1856
+ 0,
1857
+ 89,
1858
+ 0,
1859
+ "WANVAE"
1860
+ ],
1861
+ [
1862
+ 138,
1863
+ 89,
1864
+ 0,
1865
+ 27,
1866
+ 1,
1867
+ "WANVIDIMAGE_EMBEDS"
1868
+ ],
1869
+ [
1870
+ 139,
1871
+ 68,
1872
+ 0,
1873
+ 89,
1874
+ 2,
1875
+ "IMAGE"
1876
+ ],
1877
+ [
1878
+ 141,
1879
+ 68,
1880
+ 1,
1881
+ 89,
1882
+ 8,
1883
+ "INT"
1884
+ ],
1885
+ [
1886
+ 142,
1887
+ 68,
1888
+ 2,
1889
+ 89,
1890
+ 9,
1891
+ "INT"
1892
+ ],
1893
+ [
1894
+ 143,
1895
+ 27,
1896
+ 0,
1897
+ 90,
1898
+ 3,
1899
+ "LATENT"
1900
+ ],
1901
+ [
1902
+ 144,
1903
+ 79,
1904
+ 0,
1905
+ 90,
1906
+ 0,
1907
+ "WANVIDEOMODEL"
1908
+ ],
1909
+ [
1910
+ 149,
1911
+ 89,
1912
+ 0,
1913
+ 90,
1914
+ 1,
1915
+ "WANVIDIMAGE_EMBEDS"
1916
+ ],
1917
+ [
1918
+ 151,
1919
+ 90,
1920
+ 0,
1921
+ 28,
1922
+ 1,
1923
+ "LATENT"
1924
+ ],
1925
+ [
1926
+ 152,
1927
+ 16,
1928
+ 0,
1929
+ 90,
1930
+ 2,
1931
+ "WANVIDEOTEXTEMBEDS"
1932
+ ],
1933
+ [
1934
+ 153,
1935
+ 91,
1936
+ 0,
1937
+ 27,
1938
+ 19,
1939
+ "INT"
1940
+ ],
1941
+ [
1942
+ 154,
1943
+ 91,
1944
+ 0,
1945
+ 90,
1946
+ 18,
1947
+ "INT"
1948
+ ],
1949
+ [
1950
+ 155,
1951
+ 22,
1952
+ 0,
1953
+ 92,
1954
+ 0,
1955
+ "WANVIDEOMODEL"
1956
+ ],
1957
+ [
1958
+ 156,
1959
+ 39,
1960
+ 0,
1961
+ 92,
1962
+ 1,
1963
+ "BLOCKSWAPARGS"
1964
+ ],
1965
+ [
1966
+ 157,
1967
+ 92,
1968
+ 0,
1969
+ 80,
1970
+ 0,
1971
+ "WANVIDEOMODEL"
1972
+ ],
1973
+ [
1974
+ 159,
1975
+ 39,
1976
+ 0,
1977
+ 93,
1978
+ 1,
1979
+ "BLOCKSWAPARGS"
1980
+ ],
1981
+ [
1982
+ 160,
1983
+ 71,
1984
+ 0,
1985
+ 93,
1986
+ 0,
1987
+ "WANVIDEOMODEL"
1988
+ ],
1989
+ [
1990
+ 161,
1991
+ 93,
1992
+ 0,
1993
+ 79,
1994
+ 0,
1995
+ "WANVIDEOMODEL"
1996
+ ],
1997
+ [
1998
+ 163,
1999
+ 94,
2000
+ 0,
2001
+ 27,
2002
+ 17,
2003
+ "INT"
2004
+ ],
2005
+ [
2006
+ 164,
2007
+ 94,
2008
+ 0,
2009
+ 90,
2010
+ 17,
2011
+ "INT"
2012
+ ],
2013
+ [
2014
+ 165,
2015
+ 94,
2016
+ 0,
2017
+ 95,
2018
+ 0,
2019
+ "INT"
2020
+ ],
2021
+ [
2022
+ 167,
2023
+ 95,
2024
+ 0,
2025
+ 27,
2026
+ 18,
2027
+ "FLOAT"
2028
+ ],
2029
+ [
2030
+ 169,
2031
+ 97,
2032
+ 0,
2033
+ 79,
2034
+ 1,
2035
+ "WANVIDLORA"
2036
+ ]
2037
+ ],
2038
+ "groups": [
2039
+ {
2040
+ "id": 1,
2041
+ "title": "ComfyUI text encoding alternative",
2042
+ "bounding": [
2043
+ -68.81207275390625,
2044
+ 358.7208251953125,
2045
+ 1210.621337890625,
2046
+ 805.9080810546875
2047
+ ],
2048
+ "color": "#3f789e",
2049
+ "font_size": 24,
2050
+ "flags": {}
2051
+ }
2052
+ ],
2053
+ "config": {},
2054
+ "extra": {
2055
+ "ds": {
2056
+ "scale": 1.3109994191500771,
2057
+ "offset": [
2058
+ -2108.9731183881954,
2059
+ 172.10398659016835
2060
+ ]
2061
+ },
2062
+ "node_versions": {
2063
+ "ComfyUI-WanVideoWrapper": "5a2383621a05825d0d0437781afcb8552d9590fd",
2064
+ "comfy-core": "0.3.26",
2065
+ "ComfyUI-VideoHelperSuite": "0a75c7958fe320efcb052f1d9f8451fd20c730a8"
2066
+ },
2067
+ "VHS_latentpreview": true,
2068
+ "VHS_latentpreviewrate": 0,
2069
+ "VHS_MetadataImage": true,
2070
+ "VHS_KeepIntermediate": true,
2071
+ "frontendVersion": "1.24.1"
2072
+ },
2073
+ "version": 0.4
2074
+ }
example_workflows/wanvideo_14B_pusa_I2V_example_01.json ADDED
@@ -0,0 +1,1326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "id": "206247b6-9fec-4ed2-8927-e4f388c674d4",
3
+ "revision": 0,
4
+ "last_node_id": 85,
5
+ "last_link_id": 119,
6
+ "nodes": [
7
+ {
8
+ "id": 46,
9
+ "type": "WanVideoTextEmbedBridge",
10
+ "pos": [
11
+ 854.3115844726562,
12
+ 664.818603515625
13
+ ],
14
+ "size": [
15
+ 315,
16
+ 46
17
+ ],
18
+ "flags": {},
19
+ "order": 13,
20
+ "mode": 2,
21
+ "inputs": [
22
+ {
23
+ "name": "positive",
24
+ "type": "CONDITIONING",
25
+ "link": 54
26
+ },
27
+ {
28
+ "name": "negative",
29
+ "shape": 7,
30
+ "type": "CONDITIONING",
31
+ "link": 55
32
+ }
33
+ ],
34
+ "outputs": [
35
+ {
36
+ "name": "text_embeds",
37
+ "type": "WANVIDEOTEXTEMBEDS",
38
+ "links": null
39
+ }
40
+ ],
41
+ "properties": {
42
+ "cnr_id": "ComfyUI-WanVideoWrapper",
43
+ "ver": "d9b1f4d1a5aea91d101ae97a54714a5861af3f50",
44
+ "Node name for S&R": "WanVideoTextEmbedBridge"
45
+ },
46
+ "widgets_values": []
47
+ },
48
+ {
49
+ "id": 50,
50
+ "type": "CLIPTextEncode",
51
+ "pos": [
52
+ 404.3124084472656,
53
+ 924.8187255859375
54
+ ],
55
+ "size": [
56
+ 400,
57
+ 200
58
+ ],
59
+ "flags": {},
60
+ "order": 10,
61
+ "mode": 2,
62
+ "inputs": [
63
+ {
64
+ "name": "clip",
65
+ "type": "CLIP",
66
+ "link": 53
67
+ }
68
+ ],
69
+ "outputs": [
70
+ {
71
+ "name": "CONDITIONING",
72
+ "type": "CONDITIONING",
73
+ "slot_index": 0,
74
+ "links": [
75
+ 55
76
+ ]
77
+ }
78
+ ],
79
+ "properties": {
80
+ "cnr_id": "comfy-core",
81
+ "ver": "0.3.26",
82
+ "Node name for S&R": "CLIPTextEncode"
83
+ },
84
+ "widgets_values": [
85
+ "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
86
+ ]
87
+ },
88
+ {
89
+ "id": 48,
90
+ "type": "CLIPLoader",
91
+ "pos": [
92
+ 44.31245803833008,
93
+ 674.818603515625
94
+ ],
95
+ "size": [
96
+ 315,
97
+ 106
98
+ ],
99
+ "flags": {},
100
+ "order": 0,
101
+ "mode": 2,
102
+ "inputs": [],
103
+ "outputs": [
104
+ {
105
+ "name": "CLIP",
106
+ "type": "CLIP",
107
+ "slot_index": 0,
108
+ "links": [
109
+ 52,
110
+ 53
111
+ ]
112
+ }
113
+ ],
114
+ "properties": {
115
+ "cnr_id": "comfy-core",
116
+ "ver": "0.3.26",
117
+ "Node name for S&R": "CLIPLoader"
118
+ },
119
+ "widgets_values": [
120
+ "umt5_xxl_fp16.safetensors",
121
+ "wan",
122
+ "default"
123
+ ]
124
+ },
125
+ {
126
+ "id": 49,
127
+ "type": "CLIPTextEncode",
128
+ "pos": [
129
+ 404.3124084472656,
130
+ 674.818603515625
131
+ ],
132
+ "size": [
133
+ 400,
134
+ 200
135
+ ],
136
+ "flags": {},
137
+ "order": 9,
138
+ "mode": 2,
139
+ "inputs": [
140
+ {
141
+ "name": "clip",
142
+ "type": "CLIP",
143
+ "link": 52
144
+ }
145
+ ],
146
+ "outputs": [
147
+ {
148
+ "name": "CONDITIONING",
149
+ "type": "CONDITIONING",
150
+ "slot_index": 0,
151
+ "links": [
152
+ 54
153
+ ]
154
+ }
155
+ ],
156
+ "properties": {
157
+ "cnr_id": "comfy-core",
158
+ "ver": "0.3.26",
159
+ "Node name for S&R": "CLIPTextEncode"
160
+ },
161
+ "widgets_values": [
162
+ "high quality nature video featuring a red panda balancing on a bamboo stem while a bird lands on it's head, on the background there is a waterfall"
163
+ ]
164
+ },
165
+ {
166
+ "id": 51,
167
+ "type": "Note",
168
+ "pos": [
169
+ 74.31259155273438,
170
+ 504.8180847167969
171
+ ],
172
+ "size": [
173
+ 253.16725158691406,
174
+ 88
175
+ ],
176
+ "flags": {},
177
+ "order": 1,
178
+ "mode": 0,
179
+ "inputs": [],
180
+ "outputs": [],
181
+ "properties": {},
182
+ "widgets_values": [
183
+ "You can also use native ComfyUI text encoding with these nodes instead of the original, the models are node specific and can't otherwise be mixed."
184
+ ],
185
+ "color": "#432",
186
+ "bgcolor": "#653"
187
+ },
188
+ {
189
+ "id": 11,
190
+ "type": "LoadWanVideoT5TextEncoder",
191
+ "pos": [
192
+ 161.7229461669922,
193
+ -501.2225036621094
194
+ ],
195
+ "size": [
196
+ 377.1661376953125,
197
+ 130
198
+ ],
199
+ "flags": {},
200
+ "order": 2,
201
+ "mode": 0,
202
+ "inputs": [],
203
+ "outputs": [
204
+ {
205
+ "name": "wan_t5_model",
206
+ "type": "WANTEXTENCODER",
207
+ "slot_index": 0,
208
+ "links": [
209
+ 15
210
+ ]
211
+ }
212
+ ],
213
+ "properties": {
214
+ "cnr_id": "ComfyUI-WanVideoWrapper",
215
+ "ver": "d9b1f4d1a5aea91d101ae97a54714a5861af3f50",
216
+ "Node name for S&R": "LoadWanVideoT5TextEncoder"
217
+ },
218
+ "widgets_values": [
219
+ "umt5-xxl-enc-bf16.safetensors",
220
+ "bf16",
221
+ "offload_device",
222
+ "disabled"
223
+ ],
224
+ "color": "#332922",
225
+ "bgcolor": "#593930"
226
+ },
227
+ {
228
+ "id": 38,
229
+ "type": "WanVideoVAELoader",
230
+ "pos": [
231
+ 169.25408935546875,
232
+ -322.9471740722656
233
+ ],
234
+ "size": [
235
+ 372.7727966308594,
236
+ 82
237
+ ],
238
+ "flags": {},
239
+ "order": 3,
240
+ "mode": 0,
241
+ "inputs": [],
242
+ "outputs": [
243
+ {
244
+ "name": "vae",
245
+ "type": "WANVAE",
246
+ "slot_index": 0,
247
+ "links": [
248
+ 43,
249
+ 88
250
+ ]
251
+ }
252
+ ],
253
+ "properties": {
254
+ "cnr_id": "ComfyUI-WanVideoWrapper",
255
+ "ver": "d9b1f4d1a5aea91d101ae97a54714a5861af3f50",
256
+ "Node name for S&R": "WanVideoVAELoader"
257
+ },
258
+ "widgets_values": [
259
+ "wanvideo\\Wan2_1_VAE_bf16.safetensors",
260
+ "bf16"
261
+ ],
262
+ "color": "#322",
263
+ "bgcolor": "#533"
264
+ },
265
+ {
266
+ "id": 30,
267
+ "type": "VHS_VideoCombine",
268
+ "pos": [
269
+ 1684.1597900390625,
270
+ -394.2595520019531
271
+ ],
272
+ "size": [
273
+ 697.7421264648438,
274
+ 1025.7421875
275
+ ],
276
+ "flags": {},
277
+ "order": 20,
278
+ "mode": 0,
279
+ "inputs": [
280
+ {
281
+ "name": "images",
282
+ "type": "IMAGE",
283
+ "link": 36
284
+ },
285
+ {
286
+ "name": "audio",
287
+ "shape": 7,
288
+ "type": "AUDIO",
289
+ "link": null
290
+ },
291
+ {
292
+ "name": "meta_batch",
293
+ "shape": 7,
294
+ "type": "VHS_BatchManager",
295
+ "link": null
296
+ },
297
+ {
298
+ "name": "vae",
299
+ "shape": 7,
300
+ "type": "VAE",
301
+ "link": null
302
+ }
303
+ ],
304
+ "outputs": [
305
+ {
306
+ "name": "Filenames",
307
+ "type": "VHS_FILENAMES",
308
+ "links": null
309
+ }
310
+ ],
311
+ "properties": {
312
+ "cnr_id": "comfyui-videohelpersuite",
313
+ "ver": "0a75c7958fe320efcb052f1d9f8451fd20c730a8",
314
+ "Node name for S&R": "VHS_VideoCombine"
315
+ },
316
+ "widgets_values": {
317
+ "frame_rate": 16,
318
+ "loop_count": 0,
319
+ "filename_prefix": "WanVideoWrapper_I2V",
320
+ "format": "video/h264-mp4",
321
+ "pix_fmt": "yuv420p",
322
+ "crf": 19,
323
+ "save_metadata": true,
324
+ "trim_to_audio": false,
325
+ "pingpong": false,
326
+ "save_output": true,
327
+ "videopreview": {
328
+ "hidden": false,
329
+ "paused": false,
330
+ "params": {
331
+ "filename": "WanVideoWrapper_I2V_00240.mp4",
332
+ "subfolder": "",
333
+ "type": "output",
334
+ "format": "video/h264-mp4",
335
+ "frame_rate": 16,
336
+ "workflow": "WanVideoWrapper_I2V_00240.png",
337
+ "fullpath": "N:\\AI\\ComfyUI\\output\\WanVideoWrapper_I2V_00240.mp4"
338
+ }
339
+ }
340
+ }
341
+ },
342
+ {
343
+ "id": 28,
344
+ "type": "WanVideoDecode",
345
+ "pos": [
346
+ 1688.0194091796875,
347
+ -647.6461791992188
348
+ ],
349
+ "size": [
350
+ 315,
351
+ 198
352
+ ],
353
+ "flags": {},
354
+ "order": 19,
355
+ "mode": 0,
356
+ "inputs": [
357
+ {
358
+ "name": "vae",
359
+ "type": "WANVAE",
360
+ "link": 43
361
+ },
362
+ {
363
+ "name": "samples",
364
+ "type": "LATENT",
365
+ "link": 117
366
+ }
367
+ ],
368
+ "outputs": [
369
+ {
370
+ "name": "images",
371
+ "type": "IMAGE",
372
+ "slot_index": 0,
373
+ "links": [
374
+ 36
375
+ ]
376
+ }
377
+ ],
378
+ "properties": {
379
+ "cnr_id": "ComfyUI-WanVideoWrapper",
380
+ "ver": "d9b1f4d1a5aea91d101ae97a54714a5861af3f50",
381
+ "Node name for S&R": "WanVideoDecode"
382
+ },
383
+ "widgets_values": [
384
+ false,
385
+ 272,
386
+ 272,
387
+ 144,
388
+ 128,
389
+ "default"
390
+ ],
391
+ "color": "#322",
392
+ "bgcolor": "#533"
393
+ },
394
+ {
395
+ "id": 68,
396
+ "type": "WanVideoLoraSelect",
397
+ "pos": [
398
+ -326.3398742675781,
399
+ -452.3577880859375
400
+ ],
401
+ "size": [
402
+ 406.5719909667969,
403
+ 188
404
+ ],
405
+ "flags": {},
406
+ "order": 11,
407
+ "mode": 0,
408
+ "inputs": [
409
+ {
410
+ "name": "prev_lora",
411
+ "shape": 7,
412
+ "type": "WANVIDLORA",
413
+ "link": 111
414
+ },
415
+ {
416
+ "name": "blocks",
417
+ "shape": 7,
418
+ "type": "SELECTEDBLOCKS",
419
+ "link": null
420
+ }
421
+ ],
422
+ "outputs": [
423
+ {
424
+ "name": "lora",
425
+ "type": "WANVIDLORA",
426
+ "links": [
427
+ 85
428
+ ]
429
+ }
430
+ ],
431
+ "properties": {
432
+ "cnr_id": "ComfyUI-WanVideoWrapper",
433
+ "ver": "17d48e3e450c7e75f60566e787404cb3d917f48e",
434
+ "Node name for S&R": "WanVideoLoraSelect"
435
+ },
436
+ "widgets_values": [
437
+ "WanVideo\\Pusa\\Wan21_PusaV1_LoRA_14B_rank512_bf16.safetensors",
438
+ 1.4,
439
+ false,
440
+ "<details><summary><b>Metadata</b></summary><table border='0' cellpadding='3'><tr><td colspan='2'><b>Metadata</b></td></tr><tr><td><b>format</b></td><td>pt</td></tr><tr><td><b>model_type</b></td><td>PusaV1-14B</td></tr></table></details>"
441
+ ]
442
+ },
443
+ {
444
+ "id": 75,
445
+ "type": "WanVideoLoraSelect",
446
+ "pos": [
447
+ -848.7334594726562,
448
+ -433.909423828125
449
+ ],
450
+ "size": [
451
+ 502.5318298339844,
452
+ 176
453
+ ],
454
+ "flags": {},
455
+ "order": 4,
456
+ "mode": 0,
457
+ "inputs": [
458
+ {
459
+ "name": "prev_lora",
460
+ "shape": 7,
461
+ "type": "WANVIDLORA",
462
+ "link": null
463
+ },
464
+ {
465
+ "name": "blocks",
466
+ "shape": 7,
467
+ "type": "SELECTEDBLOCKS",
468
+ "link": null
469
+ }
470
+ ],
471
+ "outputs": [
472
+ {
473
+ "name": "lora",
474
+ "type": "WANVIDLORA",
475
+ "links": [
476
+ 111
477
+ ]
478
+ }
479
+ ],
480
+ "properties": {
481
+ "cnr_id": "ComfyUI-WanVideoWrapper",
482
+ "ver": "17d48e3e450c7e75f60566e787404cb3d917f48e",
483
+ "Node name for S&R": "WanVideoLoraSelect"
484
+ },
485
+ "widgets_values": [
486
+ "WanVideo\\Lightx2v\\lightx2v_T2V_14B_cfg_step_distill_v2_lora_rank32_bf16_.safetensors",
487
+ 1,
488
+ false,
489
+ "<details><summary><b>Metadata</b></summary><table border='0' cellpadding='3'><tr><td colspan='2'><b>Metadata</b></td></tr><tr><td>No metadata found</td></tr></table></details>"
490
+ ]
491
+ },
492
+ {
493
+ "id": 84,
494
+ "type": "MarkdownNote",
495
+ "pos": [
496
+ -846.2874145507812,
497
+ -711.7315063476562
498
+ ],
499
+ "size": [
500
+ 555.7915649414062,
501
+ 202.42054748535156
502
+ ],
503
+ "flags": {},
504
+ "order": 5,
505
+ "mode": 0,
506
+ "inputs": [],
507
+ "outputs": [],
508
+ "properties": {},
509
+ "widgets_values": [
510
+ "LoRA links:\n\nPusa:\n\n[https://huggingface.co/Kijai/WanVideo_comfy/blob/main/Pusa/Wan21_PusaV1_LoRA_14B_rank512_bf16.safetensors](https://huggingface.co/Kijai/WanVideo_comfy/blob/main/Pusa/Wan21_PusaV1_LoRA_14B_rank512_bf16.safetensors)\n\nDistill:\n\n[https://huggingface.co/Kijai/WanVideo_comfy/blob/main/Lightx2v/lightx2v_T2V_14B_cfg_step_distill_v2_lora_rank32_bf16.safetensors](https://huggingface.co/Kijai/WanVideo_comfy/blob/main/Lightx2v/lightx2v_T2V_14B_cfg_step_distill_v2_lora_rank32_bf16.safetensors)"
511
+ ],
512
+ "color": "#432",
513
+ "bgcolor": "#653"
514
+ },
515
+ {
516
+ "id": 39,
517
+ "type": "WanVideoBlockSwap",
518
+ "pos": [
519
+ -214.50437927246094,
520
+ -775.3147583007812
521
+ ],
522
+ "size": [
523
+ 315,
524
+ 154
525
+ ],
526
+ "flags": {},
527
+ "order": 6,
528
+ "mode": 0,
529
+ "inputs": [],
530
+ "outputs": [
531
+ {
532
+ "name": "block_swap_args",
533
+ "type": "BLOCKSWAPARGS",
534
+ "slot_index": 0,
535
+ "links": [
536
+ 50
537
+ ]
538
+ }
539
+ ],
540
+ "properties": {
541
+ "cnr_id": "ComfyUI-WanVideoWrapper",
542
+ "ver": "d9b1f4d1a5aea91d101ae97a54714a5861af3f50",
543
+ "Node name for S&R": "WanVideoBlockSwap"
544
+ },
545
+ "widgets_values": [
546
+ 10,
547
+ false,
548
+ false,
549
+ true,
550
+ 0
551
+ ],
552
+ "color": "#223",
553
+ "bgcolor": "#335"
554
+ },
555
+ {
556
+ "id": 58,
557
+ "type": "LoadImage",
558
+ "pos": [
559
+ -1042.868408203125,
560
+ -97.90877532958984
561
+ ],
562
+ "size": [
563
+ 413.10479736328125,
564
+ 498.3180847167969
565
+ ],
566
+ "flags": {},
567
+ "order": 7,
568
+ "mode": 0,
569
+ "inputs": [],
570
+ "outputs": [
571
+ {
572
+ "name": "IMAGE",
573
+ "type": "IMAGE",
574
+ "links": [
575
+ 96
576
+ ]
577
+ },
578
+ {
579
+ "name": "MASK",
580
+ "type": "MASK",
581
+ "links": null
582
+ }
583
+ ],
584
+ "properties": {
585
+ "cnr_id": "comfy-core",
586
+ "ver": "0.3.26",
587
+ "Node name for S&R": "LoadImage"
588
+ },
589
+ "widgets_values": [
590
+ "oldman_upscaled.png",
591
+ "image"
592
+ ],
593
+ "color": "#2a363b",
594
+ "bgcolor": "#3f5159"
595
+ },
596
+ {
597
+ "id": 71,
598
+ "type": "ImageResizeKJv2",
599
+ "pos": [
600
+ -542.5422973632812,
601
+ -104.77092742919922
602
+ ],
603
+ "size": [
604
+ 270,
605
+ 336
606
+ ],
607
+ "flags": {},
608
+ "order": 12,
609
+ "mode": 0,
610
+ "inputs": [
611
+ {
612
+ "name": "image",
613
+ "type": "IMAGE",
614
+ "link": 96
615
+ },
616
+ {
617
+ "name": "mask",
618
+ "shape": 7,
619
+ "type": "MASK",
620
+ "link": null
621
+ }
622
+ ],
623
+ "outputs": [
624
+ {
625
+ "name": "IMAGE",
626
+ "type": "IMAGE",
627
+ "links": [
628
+ 115
629
+ ]
630
+ },
631
+ {
632
+ "name": "width",
633
+ "type": "INT",
634
+ "links": [
635
+ 100
636
+ ]
637
+ },
638
+ {
639
+ "name": "height",
640
+ "type": "INT",
641
+ "links": [
642
+ 101
643
+ ]
644
+ },
645
+ {
646
+ "name": "mask",
647
+ "type": "MASK",
648
+ "links": null
649
+ }
650
+ ],
651
+ "properties": {
652
+ "cnr_id": "comfyui-kjnodes",
653
+ "ver": "ad37ce656c13e9abea002b46e3a89be3dba32355",
654
+ "Node name for S&R": "ImageResizeKJv2"
655
+ },
656
+ "widgets_values": [
657
+ 720,
658
+ 720,
659
+ "lanczos",
660
+ "crop",
661
+ "0, 0, 0",
662
+ "center",
663
+ 16,
664
+ "cpu",
665
+ "<tr><td>Output: </td><td><b>1</b> x <b>720</b> x <b>720 | 5.93MB</b></td></tr>"
666
+ ]
667
+ },
668
+ {
669
+ "id": 70,
670
+ "type": "WanVideoEncode",
671
+ "pos": [
672
+ -81.78827667236328,
673
+ 24.176483154296875
674
+ ],
675
+ "size": [
676
+ 270,
677
+ 242
678
+ ],
679
+ "flags": {},
680
+ "order": 15,
681
+ "mode": 0,
682
+ "inputs": [
683
+ {
684
+ "name": "vae",
685
+ "type": "WANVAE",
686
+ "link": 88
687
+ },
688
+ {
689
+ "name": "image",
690
+ "type": "IMAGE",
691
+ "link": 115
692
+ },
693
+ {
694
+ "name": "mask",
695
+ "shape": 7,
696
+ "type": "MASK",
697
+ "link": null
698
+ }
699
+ ],
700
+ "outputs": [
701
+ {
702
+ "name": "samples",
703
+ "type": "LATENT",
704
+ "links": [
705
+ 103
706
+ ]
707
+ }
708
+ ],
709
+ "properties": {
710
+ "cnr_id": "ComfyUI-WanVideoWrapper",
711
+ "ver": "17d48e3e450c7e75f60566e787404cb3d917f48e",
712
+ "Node name for S&R": "WanVideoEncode"
713
+ },
714
+ "widgets_values": [
715
+ false,
716
+ 272,
717
+ 272,
718
+ 144,
719
+ 128,
720
+ 0,
721
+ 1
722
+ ]
723
+ },
724
+ {
725
+ "id": 16,
726
+ "type": "WanVideoTextEncode",
727
+ "pos": [
728
+ 787.8640747070312,
729
+ -91.52558898925781
730
+ ],
731
+ "size": [
732
+ 453.0067138671875,
733
+ 328.0239562988281
734
+ ],
735
+ "flags": {},
736
+ "order": 16,
737
+ "mode": 0,
738
+ "inputs": [
739
+ {
740
+ "name": "t5",
741
+ "type": "WANTEXTENCODER",
742
+ "link": 15
743
+ },
744
+ {
745
+ "name": "model_to_offload",
746
+ "shape": 7,
747
+ "type": "WANVIDEOMODEL",
748
+ "link": 79
749
+ }
750
+ ],
751
+ "outputs": [
752
+ {
753
+ "name": "text_embeds",
754
+ "type": "WANVIDEOTEXTEMBEDS",
755
+ "slot_index": 0,
756
+ "links": [
757
+ 30
758
+ ]
759
+ }
760
+ ],
761
+ "properties": {
762
+ "cnr_id": "ComfyUI-WanVideoWrapper",
763
+ "ver": "d9b1f4d1a5aea91d101ae97a54714a5861af3f50",
764
+ "Node name for S&R": "WanVideoTextEncode"
765
+ },
766
+ "widgets_values": [
767
+ "an old man takes of his hat",
768
+ "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards\"",
769
+ true
770
+ ],
771
+ "color": "#332922",
772
+ "bgcolor": "#593930"
773
+ },
774
+ {
775
+ "id": 27,
776
+ "type": "WanVideoSampler",
777
+ "pos": [
778
+ 1315.2401123046875,
779
+ -401.48028564453125
780
+ ],
781
+ "size": [
782
+ 315,
783
+ 927
784
+ ],
785
+ "flags": {},
786
+ "order": 18,
787
+ "mode": 0,
788
+ "inputs": [
789
+ {
790
+ "name": "model",
791
+ "type": "WANVIDEOMODEL",
792
+ "link": 29
793
+ },
794
+ {
795
+ "name": "image_embeds",
796
+ "type": "WANVIDIMAGE_EMBEDS",
797
+ "link": 102
798
+ },
799
+ {
800
+ "name": "text_embeds",
801
+ "shape": 7,
802
+ "type": "WANVIDEOTEXTEMBEDS",
803
+ "link": 30
804
+ },
805
+ {
806
+ "name": "samples",
807
+ "shape": 7,
808
+ "type": "LATENT",
809
+ "link": null
810
+ },
811
+ {
812
+ "name": "feta_args",
813
+ "shape": 7,
814
+ "type": "FETAARGS",
815
+ "link": null
816
+ },
817
+ {
818
+ "name": "context_options",
819
+ "shape": 7,
820
+ "type": "WANVIDCONTEXT",
821
+ "link": null
822
+ },
823
+ {
824
+ "name": "cache_args",
825
+ "shape": 7,
826
+ "type": "CACHEARGS",
827
+ "link": null
828
+ },
829
+ {
830
+ "name": "flowedit_args",
831
+ "shape": 7,
832
+ "type": "FLOWEDITARGS",
833
+ "link": null
834
+ },
835
+ {
836
+ "name": "slg_args",
837
+ "shape": 7,
838
+ "type": "SLGARGS",
839
+ "link": null
840
+ },
841
+ {
842
+ "name": "loop_args",
843
+ "shape": 7,
844
+ "type": "LOOPARGS",
845
+ "link": null
846
+ },
847
+ {
848
+ "name": "experimental_args",
849
+ "shape": 7,
850
+ "type": "EXPERIMENTALARGS",
851
+ "link": null
852
+ },
853
+ {
854
+ "name": "sigmas",
855
+ "shape": 7,
856
+ "type": "SIGMAS",
857
+ "link": null
858
+ },
859
+ {
860
+ "name": "unianimate_poses",
861
+ "shape": 7,
862
+ "type": "UNIANIMATE_POSE",
863
+ "link": null
864
+ },
865
+ {
866
+ "name": "fantasytalking_embeds",
867
+ "shape": 7,
868
+ "type": "FANTASYTALKING_EMBEDS",
869
+ "link": null
870
+ },
871
+ {
872
+ "name": "uni3c_embeds",
873
+ "shape": 7,
874
+ "type": "UNI3C_EMBEDS",
875
+ "link": null
876
+ },
877
+ {
878
+ "name": "multitalk_embeds",
879
+ "shape": 7,
880
+ "type": "MULTITALK_EMBEDS",
881
+ "link": null
882
+ },
883
+ {
884
+ "name": "freeinit_args",
885
+ "shape": 7,
886
+ "type": "FREEINITARGS",
887
+ "link": null
888
+ }
889
+ ],
890
+ "outputs": [
891
+ {
892
+ "name": "samples",
893
+ "type": "LATENT",
894
+ "slot_index": 0,
895
+ "links": [
896
+ 117
897
+ ]
898
+ }
899
+ ],
900
+ "properties": {
901
+ "cnr_id": "ComfyUI-WanVideoWrapper",
902
+ "ver": "d9b1f4d1a5aea91d101ae97a54714a5861af3f50",
903
+ "Node name for S&R": "WanVideoSampler"
904
+ },
905
+ "widgets_values": [
906
+ 6,
907
+ 1,
908
+ 5,
909
+ 46,
910
+ "fixed",
911
+ true,
912
+ "flowmatch_pusa",
913
+ 0,
914
+ 1,
915
+ "",
916
+ "comfy",
917
+ ""
918
+ ]
919
+ },
920
+ {
921
+ "id": 22,
922
+ "type": "WanVideoModelLoader",
923
+ "pos": [
924
+ 157.20700073242188,
925
+ -839.4575805664062
926
+ ],
927
+ "size": [
928
+ 477.4410095214844,
929
+ 274
930
+ ],
931
+ "flags": {},
932
+ "order": 14,
933
+ "mode": 0,
934
+ "inputs": [
935
+ {
936
+ "name": "compile_args",
937
+ "shape": 7,
938
+ "type": "WANCOMPILEARGS",
939
+ "link": 119
940
+ },
941
+ {
942
+ "name": "block_swap_args",
943
+ "shape": 7,
944
+ "type": "BLOCKSWAPARGS",
945
+ "link": 50
946
+ },
947
+ {
948
+ "name": "lora",
949
+ "shape": 7,
950
+ "type": "WANVIDLORA",
951
+ "link": 85
952
+ },
953
+ {
954
+ "name": "vram_management_args",
955
+ "shape": 7,
956
+ "type": "VRAM_MANAGEMENTARGS",
957
+ "link": null
958
+ },
959
+ {
960
+ "name": "vace_model",
961
+ "shape": 7,
962
+ "type": "VACEPATH",
963
+ "link": null
964
+ },
965
+ {
966
+ "name": "fantasytalking_model",
967
+ "shape": 7,
968
+ "type": "FANTASYTALKINGMODEL",
969
+ "link": null
970
+ },
971
+ {
972
+ "name": "multitalk_model",
973
+ "shape": 7,
974
+ "type": "MULTITALKMODEL",
975
+ "link": null
976
+ }
977
+ ],
978
+ "outputs": [
979
+ {
980
+ "name": "model",
981
+ "type": "WANVIDEOMODEL",
982
+ "slot_index": 0,
983
+ "links": [
984
+ 29,
985
+ 79
986
+ ]
987
+ }
988
+ ],
989
+ "properties": {
990
+ "cnr_id": "ComfyUI-WanVideoWrapper",
991
+ "ver": "d9b1f4d1a5aea91d101ae97a54714a5861af3f50",
992
+ "Node name for S&R": "WanVideoModelLoader"
993
+ },
994
+ "widgets_values": [
995
+ "WanVideo\\Wan2_1-T2V-14B_fp8_e4m3fn.safetensors",
996
+ "fp16_fast",
997
+ "fp8_e4m3fn",
998
+ "offload_device",
999
+ "sageattn"
1000
+ ],
1001
+ "color": "#223",
1002
+ "bgcolor": "#335"
1003
+ },
1004
+ {
1005
+ "id": 78,
1006
+ "type": "WanVideoEmptyEmbeds",
1007
+ "pos": [
1008
+ 330.17205810546875,
1009
+ -116.94092559814453
1010
+ ],
1011
+ "size": [
1012
+ 272.431640625,
1013
+ 126
1014
+ ],
1015
+ "flags": {},
1016
+ "order": 17,
1017
+ "mode": 0,
1018
+ "inputs": [
1019
+ {
1020
+ "name": "control_embeds",
1021
+ "shape": 7,
1022
+ "type": "WANVIDIMAGE_EMBEDS",
1023
+ "link": null
1024
+ },
1025
+ {
1026
+ "name": "extra_latents",
1027
+ "shape": 7,
1028
+ "type": "LATENT",
1029
+ "link": 103
1030
+ },
1031
+ {
1032
+ "name": "width",
1033
+ "type": "INT",
1034
+ "widget": {
1035
+ "name": "width"
1036
+ },
1037
+ "link": 100
1038
+ },
1039
+ {
1040
+ "name": "height",
1041
+ "type": "INT",
1042
+ "widget": {
1043
+ "name": "height"
1044
+ },
1045
+ "link": 101
1046
+ }
1047
+ ],
1048
+ "outputs": [
1049
+ {
1050
+ "name": "image_embeds",
1051
+ "type": "WANVIDIMAGE_EMBEDS",
1052
+ "links": [
1053
+ 102
1054
+ ]
1055
+ }
1056
+ ],
1057
+ "properties": {
1058
+ "cnr_id": "ComfyUI-WanVideoWrapper",
1059
+ "ver": "6bc53b771d5d2af316801cb69e2ee10dbf7d18b1",
1060
+ "Node name for S&R": "WanVideoEmptyEmbeds"
1061
+ },
1062
+ "widgets_values": [
1063
+ 832,
1064
+ 480,
1065
+ 81
1066
+ ]
1067
+ },
1068
+ {
1069
+ "id": 35,
1070
+ "type": "WanVideoTorchCompileSettings",
1071
+ "pos": [
1072
+ -276.8500671386719,
1073
+ -1050.6326904296875
1074
+ ],
1075
+ "size": [
1076
+ 390.5999755859375,
1077
+ 202
1078
+ ],
1079
+ "flags": {},
1080
+ "order": 8,
1081
+ "mode": 0,
1082
+ "inputs": [],
1083
+ "outputs": [
1084
+ {
1085
+ "name": "torch_compile_args",
1086
+ "type": "WANCOMPILEARGS",
1087
+ "slot_index": 0,
1088
+ "links": [
1089
+ 119
1090
+ ]
1091
+ }
1092
+ ],
1093
+ "properties": {
1094
+ "cnr_id": "ComfyUI-WanVideoWrapper",
1095
+ "ver": "d9b1f4d1a5aea91d101ae97a54714a5861af3f50",
1096
+ "Node name for S&R": "WanVideoTorchCompileSettings"
1097
+ },
1098
+ "widgets_values": [
1099
+ "inductor",
1100
+ false,
1101
+ "default",
1102
+ false,
1103
+ 64,
1104
+ true,
1105
+ 128
1106
+ ],
1107
+ "color": "#223",
1108
+ "bgcolor": "#335"
1109
+ }
1110
+ ],
1111
+ "links": [
1112
+ [
1113
+ 15,
1114
+ 11,
1115
+ 0,
1116
+ 16,
1117
+ 0,
1118
+ "WANTEXTENCODER"
1119
+ ],
1120
+ [
1121
+ 29,
1122
+ 22,
1123
+ 0,
1124
+ 27,
1125
+ 0,
1126
+ "WANVIDEOMODEL"
1127
+ ],
1128
+ [
1129
+ 30,
1130
+ 16,
1131
+ 0,
1132
+ 27,
1133
+ 2,
1134
+ "WANVIDEOTEXTEMBEDS"
1135
+ ],
1136
+ [
1137
+ 36,
1138
+ 28,
1139
+ 0,
1140
+ 30,
1141
+ 0,
1142
+ "IMAGE"
1143
+ ],
1144
+ [
1145
+ 43,
1146
+ 38,
1147
+ 0,
1148
+ 28,
1149
+ 0,
1150
+ "VAE"
1151
+ ],
1152
+ [
1153
+ 50,
1154
+ 39,
1155
+ 0,
1156
+ 22,
1157
+ 1,
1158
+ "BLOCKSWAPARGS"
1159
+ ],
1160
+ [
1161
+ 52,
1162
+ 48,
1163
+ 0,
1164
+ 49,
1165
+ 0,
1166
+ "CLIP"
1167
+ ],
1168
+ [
1169
+ 53,
1170
+ 48,
1171
+ 0,
1172
+ 50,
1173
+ 0,
1174
+ "CLIP"
1175
+ ],
1176
+ [
1177
+ 54,
1178
+ 49,
1179
+ 0,
1180
+ 46,
1181
+ 0,
1182
+ "CONDITIONING"
1183
+ ],
1184
+ [
1185
+ 55,
1186
+ 50,
1187
+ 0,
1188
+ 46,
1189
+ 1,
1190
+ "CONDITIONING"
1191
+ ],
1192
+ [
1193
+ 79,
1194
+ 22,
1195
+ 0,
1196
+ 16,
1197
+ 1,
1198
+ "WANVIDEOMODEL"
1199
+ ],
1200
+ [
1201
+ 85,
1202
+ 68,
1203
+ 0,
1204
+ 22,
1205
+ 2,
1206
+ "WANVIDLORA"
1207
+ ],
1208
+ [
1209
+ 88,
1210
+ 38,
1211
+ 0,
1212
+ 70,
1213
+ 0,
1214
+ "WANVAE"
1215
+ ],
1216
+ [
1217
+ 96,
1218
+ 58,
1219
+ 0,
1220
+ 71,
1221
+ 0,
1222
+ "IMAGE"
1223
+ ],
1224
+ [
1225
+ 100,
1226
+ 71,
1227
+ 1,
1228
+ 78,
1229
+ 2,
1230
+ "INT"
1231
+ ],
1232
+ [
1233
+ 101,
1234
+ 71,
1235
+ 2,
1236
+ 78,
1237
+ 3,
1238
+ "INT"
1239
+ ],
1240
+ [
1241
+ 102,
1242
+ 78,
1243
+ 0,
1244
+ 27,
1245
+ 1,
1246
+ "WANVIDIMAGE_EMBEDS"
1247
+ ],
1248
+ [
1249
+ 103,
1250
+ 70,
1251
+ 0,
1252
+ 78,
1253
+ 1,
1254
+ "LATENT"
1255
+ ],
1256
+ [
1257
+ 111,
1258
+ 75,
1259
+ 0,
1260
+ 68,
1261
+ 0,
1262
+ "WANVIDLORA"
1263
+ ],
1264
+ [
1265
+ 115,
1266
+ 71,
1267
+ 0,
1268
+ 70,
1269
+ 1,
1270
+ "IMAGE"
1271
+ ],
1272
+ [
1273
+ 117,
1274
+ 27,
1275
+ 0,
1276
+ 28,
1277
+ 1,
1278
+ "LATENT"
1279
+ ],
1280
+ [
1281
+ 119,
1282
+ 35,
1283
+ 0,
1284
+ 22,
1285
+ 0,
1286
+ "WANCOMPILEARGS"
1287
+ ]
1288
+ ],
1289
+ "groups": [
1290
+ {
1291
+ "id": 1,
1292
+ "title": "ComfyUI text encoding alternative",
1293
+ "bounding": [
1294
+ -18.503620147705078,
1295
+ 360.8843688964844,
1296
+ 1210.621337890625,
1297
+ 805.9080810546875
1298
+ ],
1299
+ "color": "#3f789e",
1300
+ "font_size": 24,
1301
+ "flags": {}
1302
+ }
1303
+ ],
1304
+ "config": {},
1305
+ "extra": {
1306
+ "ds": {
1307
+ "scale": 0.6115909044841845,
1308
+ "offset": [
1309
+ 1277.9482772926067,
1310
+ 850.2732515940098
1311
+ ]
1312
+ },
1313
+ "node_versions": {
1314
+ "ComfyUI-WanVideoWrapper": "5a2383621a05825d0d0437781afcb8552d9590fd",
1315
+ "comfy-core": "0.3.26",
1316
+ "ComfyUI-KJNodes": "a5bd3c86c8ed6b83c55c2d0e7a59515b15a0137f",
1317
+ "ComfyUI-VideoHelperSuite": "0a75c7958fe320efcb052f1d9f8451fd20c730a8"
1318
+ },
1319
+ "VHS_latentpreview": true,
1320
+ "VHS_latentpreviewrate": 0,
1321
+ "VHS_MetadataImage": true,
1322
+ "VHS_KeepIntermediate": true,
1323
+ "frontendVersion": "1.24.1"
1324
+ },
1325
+ "version": 0.4
1326
+ }