moroder commited on
Commit
b0bce59
·
1 Parent(s): a2c44d8

added multimodal mode

Browse files
Files changed (8) hide show
  1. app.py +8 -1
  2. backend.py +1 -0
  3. modes.py +16 -0
  4. tests/conftest.py +9 -0
  5. tests/test_modes.py +4 -3
  6. ui.py +1 -0
  7. workflow.py +1 -1
  8. workflows/multimodal.json +993 -0
app.py CHANGED
@@ -594,7 +594,7 @@ def build_app() -> gr.Blocks:
594
  # Each mode button injects a tiny on-click that rewrites #aio-mode-tag
595
  # and (on mobile) auto-collapses the drawer.
596
  _MODE_TAG_BY_NAME = {
597
- "t2v": "T2V", "a2v": "A2V", "i2v": "I2V",
598
  "lipsync": "LIPSYNC", "keyframe": "KEY", "style": "STYLE",
599
  }
600
  for name, btn in mode_buttons.items():
@@ -715,6 +715,9 @@ def _render_one_mode(name: str) -> dict:
715
  handles["image"] = gr.Image(label="Source image", type="filepath")
716
  elif name == "a2v":
717
  handles["audio"] = gr.Audio(label="Source audio", type="filepath")
 
 
 
718
  elif name == "lipsync":
719
  handles["image"] = gr.Image(label="Portrait", type="filepath")
720
  handles["audio"] = gr.Audio(label="Speech audio", type="filepath")
@@ -1064,6 +1067,8 @@ def _input_keys_for_mode(mode_name: str, h: dict) -> list[str]:
1064
  base.append("image")
1065
  elif mode_name == "a2v":
1066
  base.append("audio")
 
 
1067
  elif mode_name == "lipsync":
1068
  base.extend(["image", "audio"])
1069
  elif mode_name == "keyframe":
@@ -1088,6 +1093,8 @@ def _collect_inputs_for_mode(mode_name: str, h: dict) -> list:
1088
  base.append(h["image"])
1089
  elif mode_name == "a2v":
1090
  base.append(h["audio"])
 
 
1091
  elif mode_name == "lipsync":
1092
  base.extend([h["image"], h["audio"]])
1093
  elif mode_name == "keyframe":
 
594
  # Each mode button injects a tiny on-click that rewrites #aio-mode-tag
595
  # and (on mobile) auto-collapses the drawer.
596
  _MODE_TAG_BY_NAME = {
597
+ "t2v": "T2V", "a2v": "A2V", "i2v": "I2V", "multimodal": "MIAV",
598
  "lipsync": "LIPSYNC", "keyframe": "KEY", "style": "STYLE",
599
  }
600
  for name, btn in mode_buttons.items():
 
715
  handles["image"] = gr.Image(label="Source image", type="filepath")
716
  elif name == "a2v":
717
  handles["audio"] = gr.Audio(label="Source audio", type="filepath")
718
+ elif name == "multimodal":
719
+ handles["image"] = gr.Image(label="Initial image", type="filepath")
720
+ handles["audio"] = gr.Audio(label="Driving audio", type="filepath")
721
  elif name == "lipsync":
722
  handles["image"] = gr.Image(label="Portrait", type="filepath")
723
  handles["audio"] = gr.Audio(label="Speech audio", type="filepath")
 
1067
  base.append("image")
1068
  elif mode_name == "a2v":
1069
  base.append("audio")
1070
+ elif mode_name == "multimodal":
1071
+ base.extend(["image", "audio"])
1072
  elif mode_name == "lipsync":
1073
  base.extend(["image", "audio"])
1074
  elif mode_name == "keyframe":
 
1093
  base.append(h["image"])
1094
  elif mode_name == "a2v":
1095
  base.append(h["audio"])
1096
+ elif mode_name == "multimodal":
1097
+ base.extend([h["image"], h["audio"]])
1098
  elif mode_name == "lipsync":
1099
  base.extend([h["image"], h["audio"]])
1100
  elif mode_name == "keyframe":
backend.py CHANGED
@@ -75,6 +75,7 @@ _BASE_DURATION_S: dict[str, int] = {
75
  "t2v": 90,
76
  "i2v": 90,
77
  "a2v": 120,
 
78
  "lipsync": 240, # extra: audio encoder + audio VAE + extra LoRAs
79
  "keyframe": 180,
80
  "style": 360, # extra: preprocessor (canny/dwpose/depth) + IC-LoRAs
 
75
  "t2v": 90,
76
  "i2v": 90,
77
  "a2v": 120,
78
+ "multimodal": 240,
79
  "lipsync": 240, # extra: audio encoder + audio VAE + extra LoRAs
80
  "keyframe": 180,
81
  "style": 360, # extra: preprocessor (canny/dwpose/depth) + IC-LoRAs
modes.py CHANGED
@@ -66,6 +66,7 @@ SEED_NODE_BY_MODE: dict[str, str] = {
66
  "a2v": "463:5540",
67
  "i2v": "209:5541",
68
  "lipsync": "521:5542",
 
69
  "keyframe": "670:5543",
70
  "style": "5364:5545",
71
  }
@@ -115,6 +116,13 @@ def _lipsync_parameterize(inp: dict[str, Any]) -> list[Patch]:
115
  ]
116
 
117
 
 
 
 
 
 
 
 
118
  def _keyframe_parameterize(inp: dict[str, Any]) -> list[Patch]:
119
  return _shared_patches(inp, "keyframe") + [
120
  (NODE_IMAGE_1, "image", inp["first_frame"]),
@@ -157,6 +165,7 @@ _A2V_STAGES = [
157
  ]
158
 
159
  _LIPSYNC_STAGES = list(_A2V_STAGES)
 
160
  _KEYFRAME_STAGES = [
161
  Stage("Encode prompt", 5),
162
  Stage("Encode keyframes", 5),
@@ -201,6 +210,13 @@ MODE_REGISTRY["lipsync"] = Mode(
201
  parameterize_fn=_lipsync_parameterize,
202
  stage_map=_LIPSYNC_STAGES,
203
  )
 
 
 
 
 
 
 
204
  MODE_REGISTRY["keyframe"] = Mode(
205
  name="keyframe",
206
  label="Keyframe → Video",
 
66
  "a2v": "463:5540",
67
  "i2v": "209:5541",
68
  "lipsync": "521:5542",
69
+ "multimodal": "521:5542",
70
  "keyframe": "670:5543",
71
  "style": "5364:5545",
72
  }
 
116
  ]
117
 
118
 
119
+ def _multimodal_parameterize(inp: dict[str, Any]) -> list[Patch]:
120
+ return _shared_patches(inp, "multimodal") + [
121
+ (NODE_IMAGE_1, "image", inp["image"]),
122
+ (NODE_AUDIO, "audio", inp["audio"]),
123
+ ]
124
+
125
+
126
  def _keyframe_parameterize(inp: dict[str, Any]) -> list[Patch]:
127
  return _shared_patches(inp, "keyframe") + [
128
  (NODE_IMAGE_1, "image", inp["first_frame"]),
 
165
  ]
166
 
167
  _LIPSYNC_STAGES = list(_A2V_STAGES)
168
+ _MULTIMODAL_STAGES = list(_A2V_STAGES)
169
  _KEYFRAME_STAGES = [
170
  Stage("Encode prompt", 5),
171
  Stage("Encode keyframes", 5),
 
210
  parameterize_fn=_lipsync_parameterize,
211
  stage_map=_LIPSYNC_STAGES,
212
  )
213
+ MODE_REGISTRY["multimodal"] = Mode(
214
+ name="multimodal",
215
+ label="Text + Image + Audio",
216
+ icon="🎭",
217
+ parameterize_fn=_multimodal_parameterize,
218
+ stage_map=_MULTIMODAL_STAGES,
219
+ )
220
  MODE_REGISTRY["keyframe"] = Mode(
221
  name="keyframe",
222
  label="Keyframe → Video",
tests/conftest.py CHANGED
@@ -103,6 +103,15 @@ def canonical_inputs() -> dict[str, dict[str, Any]]:
103
  "fps": 24,
104
  "seed": 42,
105
  },
 
 
 
 
 
 
 
 
 
106
  "keyframe": {
107
  "prompt": "smooth transition between the two frames",
108
  "first_frame": "/tmp/start.png",
 
103
  "fps": 24,
104
  "seed": 42,
105
  },
106
+ "multimodal": {
107
+ "prompt": "a cinematic multimodal generation",
108
+ "image": "/tmp/portrait.png",
109
+ "audio": "/tmp/speech.wav",
110
+ "preset": "balanced",
111
+ "frames": 81,
112
+ "fps": 24,
113
+ "seed": 42,
114
+ },
115
  "keyframe": {
116
  "prompt": "smooth transition between the two frames",
117
  "first_frame": "/tmp/start.png",
tests/test_modes.py CHANGED
@@ -49,7 +49,7 @@ def test_t2v_and_i2v_in_registry():
49
  assert "i2v" in modes.MODE_REGISTRY
50
 
51
 
52
- @pytest.mark.parametrize("mode_name", ["a2v", "lipsync", "keyframe", "style"])
53
  def test_remaining_modes_parameterize_validates(mode_name, canonical_inputs):
54
  inputs = canonical_inputs[mode_name]
55
  mode = modes.MODE_REGISTRY[mode_name]
@@ -86,13 +86,14 @@ def test_style_parameterize_passes_input_video(canonical_inputs):
86
  assert canonical_inputs["style"]["input_video"] in [p[2] for p in patches]
87
 
88
 
89
- def test_mode_registry_has_all_six_keys():
90
- """All six modes are in the registry now."""
91
  assert set(modes.MODE_REGISTRY.keys()) == {
92
  "t2v",
93
  "a2v",
94
  "i2v",
95
  "lipsync",
 
96
  "keyframe",
97
  "style",
98
  }
 
49
  assert "i2v" in modes.MODE_REGISTRY
50
 
51
 
52
+ @pytest.mark.parametrize("mode_name", ["a2v", "lipsync", "multimodal", "keyframe", "style"])
53
  def test_remaining_modes_parameterize_validates(mode_name, canonical_inputs):
54
  inputs = canonical_inputs[mode_name]
55
  mode = modes.MODE_REGISTRY[mode_name]
 
86
  assert canonical_inputs["style"]["input_video"] in [p[2] for p in patches]
87
 
88
 
89
+ def test_mode_registry_has_all_keys():
90
+ """All seven modes are in the registry now."""
91
  assert set(modes.MODE_REGISTRY.keys()) == {
92
  "t2v",
93
  "a2v",
94
  "i2v",
95
  "lipsync",
96
+ "multimodal",
97
  "keyframe",
98
  "style",
99
  }
ui.py CHANGED
@@ -80,6 +80,7 @@ CAMERA_LORAS: list[str] = [
80
  IC_LORAS_BY_MODE: dict[str, list[str]] = {
81
  "t2v": [],
82
  "a2v": [],
 
83
  "i2v": ["union", "pose-control"],
84
  "lipsync": ["pose-control"],
85
  "keyframe": ["union"],
 
80
  IC_LORAS_BY_MODE: dict[str, list[str]] = {
81
  "t2v": [],
82
  "a2v": [],
83
+ "multimodal": ["union", "pose-control"],
84
  "i2v": ["union", "pose-control"],
85
  "lipsync": ["pose-control"],
86
  "keyframe": ["union"],
workflow.py CHANGED
@@ -14,7 +14,7 @@ from typing import Any
14
 
15
  WORKFLOWS_DIR = pathlib.Path(__file__).parent / "workflows"
16
 
17
- VALID_MODES: tuple[str, ...] = ("t2v", "a2v", "i2v", "lipsync", "keyframe", "style")
18
 
19
 
20
  def load_template(mode: str) -> dict[str, Any]:
 
14
 
15
  WORKFLOWS_DIR = pathlib.Path(__file__).parent / "workflows"
16
 
17
+ VALID_MODES: tuple[str, ...] = ("t2v", "a2v", "i2v", "lipsync", "keyframe", "style", "multimodal")
18
 
19
 
20
  def load_template(mode: str) -> dict[str, Any]:
workflows/multimodal.json ADDED
@@ -0,0 +1,993 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "101": {
3
+ "inputs": {
4
+ "model_name": "ltx-2.3-spatial-upscaler-x2-1.0.safetensors"
5
+ },
6
+ "class_type": "LatentUpscaleModelLoader",
7
+ "_meta": {
8
+ "title": "Load Latent Upscale Model"
9
+ }
10
+ },
11
+ "146": {
12
+ "inputs": {
13
+ "clip_name1": "gemma_3_12B_it_fp4_mixed.safetensors",
14
+ "clip_name2": "ltx-2.3_text_projection_bf16.safetensors",
15
+ "type": "ltxv",
16
+ "device": "default"
17
+ },
18
+ "class_type": "DualCLIPLoader",
19
+ "_meta": {
20
+ "title": "DualCLIPLoader"
21
+ }
22
+ },
23
+ "149": {
24
+ "inputs": {
25
+ "image": "influencer_mic_hd.png"
26
+ },
27
+ "class_type": "LoadImage",
28
+ "_meta": {
29
+ "title": "Load Image1"
30
+ }
31
+ },
32
+ "174": {
33
+ "inputs": {
34
+ "vae_name": "LTX23_video_vae_bf16.safetensors",
35
+ "device": "main_device",
36
+ "weight_dtype": "bf16"
37
+ },
38
+ "class_type": "VAELoaderKJ",
39
+ "_meta": {
40
+ "title": "VAELoader Video"
41
+ }
42
+ },
43
+ "175": {
44
+ "inputs": {
45
+ "vae_name": "LTX23_audio_vae_bf16.safetensors",
46
+ "device": "main_device",
47
+ "weight_dtype": "bf16"
48
+ },
49
+ "class_type": "VAELoaderKJ",
50
+ "_meta": {
51
+ "title": "VAELoader Audio"
52
+ }
53
+ },
54
+ "188": {
55
+ "inputs": {
56
+ "frame_rate": [
57
+ "5446",
58
+ 0
59
+ ],
60
+ "loop_count": 0,
61
+ "filename_prefix": "LTX2.3/Video",
62
+ "format": "video/h264-mp4",
63
+ "pix_fmt": "yuv420p",
64
+ "crf": 8,
65
+ "save_metadata": false,
66
+ "trim_to_audio": false,
67
+ "pingpong": false,
68
+ "save_output": true,
69
+ "images": [
70
+ "217",
71
+ 0
72
+ ],
73
+ "audio": [
74
+ "218",
75
+ 0
76
+ ]
77
+ },
78
+ "class_type": "VHS_VideoCombine",
79
+ "_meta": {
80
+ "title": "Video Combine 🎥🅥🅗🅢"
81
+ }
82
+ },
83
+ "196": {
84
+ "inputs": {
85
+ "Xi": 7,
86
+ "Xf": 7,
87
+ "isfloatX": 0
88
+ },
89
+ "class_type": "mxSlider",
90
+ "_meta": {
91
+ "title": "Clip Length ( in seconds )"
92
+ }
93
+ },
94
+ "211": {
95
+ "inputs": {
96
+ "PowerLoraLoaderHeaderWidget": {
97
+ "type": "PowerLoraLoaderHeaderWidget"
98
+ },
99
+ "lora_1": {
100
+ "on": true,
101
+ "lora": "ltx-2.3-22b-distilled-lora-dynamic_fro09_avg_rank_105_bf16.safetensors",
102
+ "strength": 0.6
103
+ },
104
+ "lora_2": {
105
+ "on": false,
106
+ "lora": "ltx-2.3-22b-distilled-lora-384.safetensors",
107
+ "strength": 1
108
+ },
109
+ "lora_3": {
110
+ "on": false,
111
+ "lora": "ltx-2-19b-lora-camera-control-dolly-left.safetensors",
112
+ "strength": 1
113
+ },
114
+ "lora_4": {
115
+ "on": false,
116
+ "lora": "ltx-2-19b-lora-camera-control-dolly-right.safetensors",
117
+ "strength": 1
118
+ },
119
+ "lora_5": {
120
+ "on": true,
121
+ "lora": "ltx-2-19b-lora-camera-control-dolly-in.safetensors",
122
+ "strength": 1
123
+ },
124
+ "lora_6": {
125
+ "on": false,
126
+ "lora": "ltx-2-19b-lora-camera-control-dolly-out.safetensors",
127
+ "strength": 1
128
+ },
129
+ "lora_7": {
130
+ "on": false,
131
+ "lora": "ltx-2-19b-lora-camera-control-jib-up.safetensors",
132
+ "strength": 1
133
+ },
134
+ "lora_8": {
135
+ "on": false,
136
+ "lora": "ltx-2-19b-lora-camera-control-jib-down.safetensors",
137
+ "strength": 0.6
138
+ },
139
+ "➕ Add Lora": "",
140
+ "model": [
141
+ "366",
142
+ 0
143
+ ],
144
+ "clip": [
145
+ "146",
146
+ 0
147
+ ]
148
+ },
149
+ "class_type": "Power Lora Loader (rgthree)",
150
+ "_meta": {
151
+ "title": "Power Lora Loader (rgthree)"
152
+ }
153
+ },
154
+ "217": {
155
+ "inputs": {
156
+ "any_04": [
157
+ "521:522",
158
+ 0
159
+ ]
160
+ },
161
+ "class_type": "Any Switch (rgthree)",
162
+ "_meta": {
163
+ "title": "Any Switch image"
164
+ }
165
+ },
166
+ "218": {
167
+ "inputs": {
168
+ "any_04": [
169
+ "5400",
170
+ 0
171
+ ]
172
+ },
173
+ "class_type": "Any Switch (rgthree)",
174
+ "_meta": {
175
+ "title": "Any Switch audio"
176
+ }
177
+ },
178
+ "366": {
179
+ "inputs": {
180
+ "unet_name": "ltx-2-3-22b-dev-Q4_K_M.gguf"
181
+ },
182
+ "class_type": "UnetLoaderGGUF",
183
+ "_meta": {
184
+ "title": "Unet Loader (GGUF) for Low V-Ram 8G-12G"
185
+ }
186
+ },
187
+ "591": {
188
+ "inputs": {
189
+ "vae_name": "taeltx2_3.safetensors"
190
+ },
191
+ "class_type": "VAELoader",
192
+ "_meta": {
193
+ "title": "Load VAE"
194
+ }
195
+ },
196
+ "700": {
197
+ "inputs": {
198
+ "chunks": 4,
199
+ "dim_threshold": 4096,
200
+ "model": [
201
+ "211",
202
+ 0
203
+ ]
204
+ },
205
+ "class_type": "LTXVChunkFeedForward",
206
+ "_meta": {
207
+ "title": "LTXV Chunk FeedForward"
208
+ }
209
+ },
210
+ "5376": {
211
+ "inputs": {
212
+ "lora_name": "ltx-2.3-22b-ic-lora-union-control-ref0.5.safetensors",
213
+ "strength_model": 1,
214
+ "model": [
215
+ "211",
216
+ 0
217
+ ]
218
+ },
219
+ "class_type": "LTXICLoRALoaderModelOnly",
220
+ "_meta": {
221
+ "title": "🅛🅣🅧 IC-LoRA Loader Model Only"
222
+ }
223
+ },
224
+ "5382": {
225
+ "inputs": {
226
+ "value": 1280
227
+ },
228
+ "class_type": "INTConstant",
229
+ "_meta": {
230
+ "title": "Height"
231
+ }
232
+ },
233
+ "5383": {
234
+ "inputs": {
235
+ "value": 720
236
+ },
237
+ "class_type": "INTConstant",
238
+ "_meta": {
239
+ "title": "Width"
240
+ }
241
+ },
242
+ "5387": {
243
+ "inputs": {
244
+ "expression": "a*b+1",
245
+ "a": [
246
+ "196",
247
+ 0
248
+ ],
249
+ "b": [
250
+ "5445",
251
+ 0
252
+ ]
253
+ },
254
+ "class_type": "MathExpression|pysssss",
255
+ "_meta": {
256
+ "title": "Math Expression 1🐍"
257
+ }
258
+ },
259
+ "5392": {
260
+ "inputs": {
261
+ "chunks": 4,
262
+ "dim_threshold": 4096,
263
+ "model": [
264
+ "5376",
265
+ 0
266
+ ]
267
+ },
268
+ "class_type": "LTXVChunkFeedForward",
269
+ "_meta": {
270
+ "title": "LTXV Chunk FeedForward"
271
+ }
272
+ },
273
+ "5400": {
274
+ "inputs": {
275
+ "audio": "beauty_pagent_dialogue.mp3",
276
+ "start_time": 0,
277
+ "duration": [
278
+ "5442",
279
+ 0
280
+ ]
281
+ },
282
+ "class_type": "VHS_LoadAudioUpload",
283
+ "_meta": {
284
+ "title": "Load Audio (Upload)🎥🅥🅗🅢"
285
+ }
286
+ },
287
+ "5401": {
288
+ "inputs": {
289
+ "audioUI": "",
290
+ "audio": [
291
+ "5400",
292
+ 0
293
+ ]
294
+ },
295
+ "class_type": "PreviewAudio",
296
+ "_meta": {
297
+ "title": "Preview Audio"
298
+ }
299
+ },
300
+ "5429": {
301
+ "inputs": {
302
+ "resize_type": "scale dimensions",
303
+ "resize_type.width": [
304
+ "5383",
305
+ 0
306
+ ],
307
+ "resize_type.height": [
308
+ "5382",
309
+ 0
310
+ ],
311
+ "resize_type.crop": "center",
312
+ "scale_method": "lanczos",
313
+ "input": [
314
+ "149",
315
+ 0
316
+ ]
317
+ },
318
+ "class_type": "ResizeImageMaskNode",
319
+ "_meta": {
320
+ "title": "Resize Image/Mask"
321
+ }
322
+ },
323
+ "5434": {
324
+ "inputs": {
325
+ "resize_type": "scale dimensions",
326
+ "resize_type.width": [
327
+ "5383",
328
+ 0
329
+ ],
330
+ "resize_type.height": [
331
+ "5382",
332
+ 0
333
+ ],
334
+ "resize_type.crop": "center",
335
+ "scale_method": "lanczos",
336
+ "input": [
337
+ "5437",
338
+ 0
339
+ ]
340
+ },
341
+ "class_type": "ResizeImageMaskNode",
342
+ "_meta": {
343
+ "title": "Resize Image/Mask"
344
+ }
345
+ },
346
+ "5437": {
347
+ "inputs": {
348
+ "image": "5.FLF2.png"
349
+ },
350
+ "class_type": "LoadImage",
351
+ "_meta": {
352
+ "title": "Load Image2"
353
+ }
354
+ },
355
+ "5442": {
356
+ "inputs": {
357
+ "a": [
358
+ "196",
359
+ 0
360
+ ]
361
+ },
362
+ "class_type": "CM_IntToFloat",
363
+ "_meta": {
364
+ "title": "IntToFloat"
365
+ }
366
+ },
367
+ "5444": {
368
+ "inputs": {
369
+ "video": "Sway Dance Lesson 31s.mp4",
370
+ "force_rate": [
371
+ "5446",
372
+ 0
373
+ ],
374
+ "custom_width": 0,
375
+ "custom_height": 0,
376
+ "frame_load_cap": [
377
+ "5387",
378
+ 0
379
+ ],
380
+ "skip_first_frames": 266,
381
+ "select_every_nth": 1,
382
+ "format": "AnimateDiff"
383
+ },
384
+ "class_type": "VHS_LoadVideo",
385
+ "_meta": {
386
+ "title": "Load Video (Upload) 🎥🅥🅗🅢"
387
+ }
388
+ },
389
+ "5445": {
390
+ "inputs": {
391
+ "value": 25
392
+ },
393
+ "class_type": "INTConstant",
394
+ "_meta": {
395
+ "title": "FPS"
396
+ }
397
+ },
398
+ "5446": {
399
+ "inputs": {
400
+ "a": [
401
+ "5445",
402
+ 0
403
+ ]
404
+ },
405
+ "class_type": "CM_IntToFloat",
406
+ "_meta": {
407
+ "title": "IntToFloat"
408
+ }
409
+ },
410
+ "5458": {
411
+ "inputs": {
412
+ "resize_type": "scale dimensions",
413
+ "resize_type.width": [
414
+ "5383",
415
+ 0
416
+ ],
417
+ "resize_type.height": [
418
+ "5382",
419
+ 0
420
+ ],
421
+ "resize_type.crop": "center",
422
+ "scale_method": "lanczos",
423
+ "input": [
424
+ "5444",
425
+ 0
426
+ ]
427
+ },
428
+ "class_type": "ResizeImageMaskNode",
429
+ "_meta": {
430
+ "title": "Resize Image/Mask"
431
+ }
432
+ },
433
+ "5536": {
434
+ "inputs": {
435
+ "text": "A woman talking, her face and gestures demonstrate the conversation content. The camera slowly dolly in.",
436
+ "clip": [
437
+ "146",
438
+ 0
439
+ ]
440
+ },
441
+ "class_type": "CLIPTextEncode",
442
+ "_meta": {
443
+ "title": "CLIP Text Encode (Prompt) positive"
444
+ }
445
+ },
446
+ "5537": {
447
+ "inputs": {
448
+ "text": "blurry, low quality, still frame, frames, watermark, overlay, titles, has blurbox, has subtitles",
449
+ "clip": [
450
+ "146",
451
+ 0
452
+ ]
453
+ },
454
+ "class_type": "CLIPTextEncode",
455
+ "_meta": {
456
+ "title": "CLIP Text Encode (Prompt) negative"
457
+ }
458
+ },
459
+ "5560": {
460
+ "inputs": {
461
+ "unet_name": "ltx-2.3-22b-dev_transformer_only_fp8_scaled.safetensors",
462
+ "weight_dtype": "default"
463
+ },
464
+ "class_type": "UNETLoader",
465
+ "_meta": {
466
+ "title": "Diffusion Model Loader for V-ram 16G up"
467
+ }
468
+ },
469
+ "521:465": {
470
+ "inputs": {
471
+ "sigmas": "1., 0.99375, 0.9875, 0.98125, 0.975, 0.909375, 0.725, 0.421875, 0.0"
472
+ },
473
+ "class_type": "ManualSigmas",
474
+ "_meta": {
475
+ "title": "ManualSigmas"
476
+ }
477
+ },
478
+ "521:469": {
479
+ "inputs": {
480
+ "value": 0,
481
+ "width": [
482
+ "521:473",
483
+ 0
484
+ ],
485
+ "height": [
486
+ "521:473",
487
+ 1
488
+ ]
489
+ },
490
+ "class_type": "SolidMask",
491
+ "_meta": {
492
+ "title": "SolidMask"
493
+ }
494
+ },
495
+ "521:471": {
496
+ "inputs": {
497
+ "width": [
498
+ "521:473",
499
+ 0
500
+ ],
501
+ "height": [
502
+ "521:473",
503
+ 1
504
+ ],
505
+ "length": [
506
+ "521:5511",
507
+ 0
508
+ ],
509
+ "batch_size": 1
510
+ },
511
+ "class_type": "EmptyLTXVLatentVideo",
512
+ "_meta": {
513
+ "title": "EmptyLTXVLatentVideo"
514
+ }
515
+ },
516
+ "521:473": {
517
+ "inputs": {
518
+ "image": [
519
+ "521:472",
520
+ 0
521
+ ]
522
+ },
523
+ "class_type": "GetImageSize",
524
+ "_meta": {
525
+ "title": "Get Image Size"
526
+ }
527
+ },
528
+ "521:474": {
529
+ "inputs": {
530
+ "video_latent": [
531
+ "521:470",
532
+ 0
533
+ ],
534
+ "audio_latent": [
535
+ "521:503",
536
+ 0
537
+ ]
538
+ },
539
+ "class_type": "LTXVConcatAVLatent",
540
+ "_meta": {
541
+ "title": "LTXVConcatAVLatent"
542
+ }
543
+ },
544
+ "521:475": {
545
+ "inputs": {
546
+ "sigmas": "0.8025, 0.6332, 0.3425, 0.0"
547
+ },
548
+ "class_type": "ManualSigmas",
549
+ "_meta": {
550
+ "title": "ManualSigmas"
551
+ }
552
+ },
553
+ "521:476": {
554
+ "inputs": {
555
+ "video_latent": [
556
+ "521:495",
557
+ 0
558
+ ],
559
+ "audio_latent": [
560
+ "521:519",
561
+ 1
562
+ ]
563
+ },
564
+ "class_type": "LTXVConcatAVLatent",
565
+ "_meta": {
566
+ "title": "LTXVConcatAVLatent"
567
+ }
568
+ },
569
+ "521:478": {
570
+ "inputs": {
571
+ "noise": [
572
+ "521:5542",
573
+ 0
574
+ ],
575
+ "guider": [
576
+ "521:498",
577
+ 0
578
+ ],
579
+ "sampler": [
580
+ "521:464",
581
+ 0
582
+ ],
583
+ "sigmas": [
584
+ "521:465",
585
+ 0
586
+ ],
587
+ "latent_image": [
588
+ "521:474",
589
+ 0
590
+ ]
591
+ },
592
+ "class_type": "SamplerCustomAdvanced",
593
+ "_meta": {
594
+ "title": "SamplerCustomAdvanced"
595
+ }
596
+ },
597
+ "521:486": {
598
+ "inputs": {
599
+ "longer_edge": 1024,
600
+ "images": [
601
+ "5429",
602
+ 0
603
+ ]
604
+ },
605
+ "class_type": "ResizeImagesByLongerEdge",
606
+ "_meta": {
607
+ "title": "Resize Images by Longer Edge"
608
+ }
609
+ },
610
+ "521:495": {
611
+ "inputs": {
612
+ "strength": 0.8,
613
+ "bypass": false,
614
+ "vae": [
615
+ "174",
616
+ 0
617
+ ],
618
+ "image": [
619
+ "521:492",
620
+ 0
621
+ ],
622
+ "latent": [
623
+ "521:477",
624
+ 0
625
+ ]
626
+ },
627
+ "class_type": "LTXVImgToVideoInplace",
628
+ "_meta": {
629
+ "title": "LTXVImgToVideoInplace"
630
+ }
631
+ },
632
+ "521:503": {
633
+ "inputs": {
634
+ "samples": [
635
+ "521:510",
636
+ 0
637
+ ],
638
+ "mask": [
639
+ "521:469",
640
+ 0
641
+ ]
642
+ },
643
+ "class_type": "SetLatentNoiseMask",
644
+ "_meta": {
645
+ "title": "Set Latent Noise Mask"
646
+ }
647
+ },
648
+ "521:517": {
649
+ "inputs": {
650
+ "frame_rate": [
651
+ "521:5513",
652
+ 0
653
+ ],
654
+ "positive": [
655
+ "5536",
656
+ 0
657
+ ],
658
+ "negative": [
659
+ "5537",
660
+ 0
661
+ ]
662
+ },
663
+ "class_type": "LTXVConditioning",
664
+ "_meta": {
665
+ "title": "LTXVConditioning"
666
+ }
667
+ },
668
+ "521:518": {
669
+ "inputs": {
670
+ "positive": [
671
+ "521:517",
672
+ 0
673
+ ],
674
+ "negative": [
675
+ "521:517",
676
+ 1
677
+ ],
678
+ "latent": [
679
+ "521:519",
680
+ 0
681
+ ]
682
+ },
683
+ "class_type": "LTXVCropGuides",
684
+ "_meta": {
685
+ "title": "LTXVCropGuides"
686
+ }
687
+ },
688
+ "521:519": {
689
+ "inputs": {
690
+ "av_latent": [
691
+ "521:478",
692
+ 0
693
+ ]
694
+ },
695
+ "class_type": "LTXVSeparateAVLatent",
696
+ "_meta": {
697
+ "title": "LTXVSeparateAVLatent"
698
+ }
699
+ },
700
+ "521:464": {
701
+ "inputs": {
702
+ "sampler_name": "euler_ancestral"
703
+ },
704
+ "class_type": "KSamplerSelect",
705
+ "_meta": {
706
+ "title": "KSamplerSelect"
707
+ }
708
+ },
709
+ "521:466": {
710
+ "inputs": {
711
+ "cfg": 1,
712
+ "model": [
713
+ "521:606",
714
+ 0
715
+ ],
716
+ "positive": [
717
+ "521:518",
718
+ 0
719
+ ],
720
+ "negative": [
721
+ "521:518",
722
+ 1
723
+ ]
724
+ },
725
+ "class_type": "CFGGuider",
726
+ "_meta": {
727
+ "title": "CFGGuider"
728
+ }
729
+ },
730
+ "521:498": {
731
+ "inputs": {
732
+ "cfg": 1,
733
+ "model": [
734
+ "521:606",
735
+ 0
736
+ ],
737
+ "positive": [
738
+ "521:517",
739
+ 0
740
+ ],
741
+ "negative": [
742
+ "521:517",
743
+ 1
744
+ ]
745
+ },
746
+ "class_type": "CFGGuider",
747
+ "_meta": {
748
+ "title": "CFGGuider"
749
+ }
750
+ },
751
+ "521:470": {
752
+ "inputs": {
753
+ "strength": 0.8,
754
+ "bypass": false,
755
+ "vae": [
756
+ "174",
757
+ 0
758
+ ],
759
+ "image": [
760
+ "521:492",
761
+ 0
762
+ ],
763
+ "latent": [
764
+ "521:471",
765
+ 0
766
+ ]
767
+ },
768
+ "class_type": "LTXVImgToVideoInplace",
769
+ "_meta": {
770
+ "title": "LTXVImgToVideoInplace"
771
+ }
772
+ },
773
+ "521:468": {
774
+ "inputs": {
775
+ "noise": [
776
+ "521:5542",
777
+ 0
778
+ ],
779
+ "guider": [
780
+ "521:466",
781
+ 0
782
+ ],
783
+ "sampler": [
784
+ "521:464",
785
+ 0
786
+ ],
787
+ "sigmas": [
788
+ "521:475",
789
+ 0
790
+ ],
791
+ "latent_image": [
792
+ "521:476",
793
+ 0
794
+ ]
795
+ },
796
+ "class_type": "SamplerCustomAdvanced",
797
+ "_meta": {
798
+ "title": "SamplerCustomAdvanced"
799
+ }
800
+ },
801
+ "521:477": {
802
+ "inputs": {
803
+ "samples": [
804
+ "521:519",
805
+ 0
806
+ ],
807
+ "upscale_model": [
808
+ "101",
809
+ 0
810
+ ],
811
+ "vae": [
812
+ "174",
813
+ 0
814
+ ]
815
+ },
816
+ "class_type": "LTXVLatentUpsampler",
817
+ "_meta": {
818
+ "title": "LTXVLatentUpsampler"
819
+ }
820
+ },
821
+ "521:522": {
822
+ "inputs": {
823
+ "tile_size": 512,
824
+ "overlap": 64,
825
+ "temporal_size": 2048,
826
+ "temporal_overlap": 8,
827
+ "samples": [
828
+ "521:479",
829
+ 0
830
+ ],
831
+ "vae": [
832
+ "174",
833
+ 0
834
+ ]
835
+ },
836
+ "class_type": "VAEDecodeTiled",
837
+ "_meta": {
838
+ "title": "VAE Decode (Tiled)"
839
+ }
840
+ },
841
+ "521:606": {
842
+ "inputs": {
843
+ "preview_rate": 8,
844
+ "model": [
845
+ "700",
846
+ 0
847
+ ],
848
+ "vae": [
849
+ "591",
850
+ 0
851
+ ]
852
+ },
853
+ "class_type": "LTX2SamplingPreviewOverride",
854
+ "_meta": {
855
+ "title": "LTX2 Sampling Preview Override"
856
+ }
857
+ },
858
+ "521:492": {
859
+ "inputs": {
860
+ "img_compression": 33,
861
+ "image": [
862
+ "521:486",
863
+ 0
864
+ ]
865
+ },
866
+ "class_type": "LTXVPreprocess",
867
+ "_meta": {
868
+ "title": "LTXVPreprocess"
869
+ }
870
+ },
871
+ "521:513": {
872
+ "inputs": {
873
+ "images": [
874
+ "521:522",
875
+ 0
876
+ ]
877
+ },
878
+ "class_type": "FinalFrameSelector",
879
+ "_meta": {
880
+ "title": "Final Frame Selector"
881
+ }
882
+ },
883
+ "521:485": {
884
+ "inputs": {
885
+ "width": [
886
+ "5383",
887
+ 0
888
+ ],
889
+ "height": [
890
+ "5382",
891
+ 0
892
+ ],
893
+ "batch_size": 1,
894
+ "color": 0
895
+ },
896
+ "class_type": "EmptyImage",
897
+ "_meta": {
898
+ "title": "EmptyImage"
899
+ }
900
+ },
901
+ "521:5513": {
902
+ "inputs": {
903
+ "value": [
904
+ "5446",
905
+ 0
906
+ ]
907
+ },
908
+ "class_type": "PrimitiveFloat",
909
+ "_meta": {
910
+ "title": "fps"
911
+ }
912
+ },
913
+ "521:479": {
914
+ "inputs": {
915
+ "av_latent": [
916
+ "521:468",
917
+ 0
918
+ ]
919
+ },
920
+ "class_type": "LTXVSeparateAVLatent",
921
+ "_meta": {
922
+ "title": "LTXVSeparateAVLatent"
923
+ }
924
+ },
925
+ "521:472": {
926
+ "inputs": {
927
+ "upscale_method": "lanczos",
928
+ "scale_by": 0.5,
929
+ "image": [
930
+ "521:485",
931
+ 0
932
+ ]
933
+ },
934
+ "class_type": "ImageScaleBy",
935
+ "_meta": {
936
+ "title": "Upscale Image By"
937
+ }
938
+ },
939
+ "521:510": {
940
+ "inputs": {
941
+ "audio": [
942
+ "5400",
943
+ 0
944
+ ],
945
+ "audio_vae": [
946
+ "175",
947
+ 0
948
+ ]
949
+ },
950
+ "class_type": "LTXVAudioVAEEncode",
951
+ "_meta": {
952
+ "title": "LTXV Audio VAE Encode"
953
+ }
954
+ },
955
+ "521:5511": {
956
+ "inputs": {
957
+ "expression": "a*b+1",
958
+ "a": [
959
+ "196",
960
+ 0
961
+ ],
962
+ "b": [
963
+ "521:5513",
964
+ 0
965
+ ]
966
+ },
967
+ "class_type": "MathExpression|pysssss",
968
+ "_meta": {
969
+ "title": "Math Expression 🐍"
970
+ }
971
+ },
972
+ "521:5512": {
973
+ "inputs": {
974
+ "a": [
975
+ "521:5513",
976
+ 0
977
+ ]
978
+ },
979
+ "class_type": "CM_FloatToInt",
980
+ "_meta": {
981
+ "title": "FloatToInt"
982
+ }
983
+ },
984
+ "521:5542": {
985
+ "inputs": {
986
+ "noise_seed": 87299332486566
987
+ },
988
+ "class_type": "RandomNoise",
989
+ "_meta": {
990
+ "title": "RandomNoise"
991
+ }
992
+ }
993
+ }