techfreakworm commited on
Commit
03937ef
·
unverified ·
1 Parent(s): 0256245

feat(modes): A2V + Lipsync + Keyframe + Style parameterize_fn

Browse files
Files changed (2) hide show
  1. modes.py +149 -0
  2. tests/test_modes.py +53 -0
modes.py CHANGED
@@ -79,6 +79,46 @@ I2V_NODE_FPS = 5445
79
  I2V_NODE_CLIP_LENGTH = 196
80
  I2V_NODE_IMAGE = 149 # LoadImage "Load Image1" — wv[0] = filename
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  def _frames_to_seconds(frames: int, fps: int) -> int:
84
  """Convert (frames, fps) to integer seconds for the mxSlider clip-length widget.
@@ -115,6 +155,50 @@ def _i2v_parameterize(inp: dict[str, Any]) -> list[Patch]:
115
  ]
116
 
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  _T2V_STAGES = [
119
  Stage("Encode prompt", 5),
120
  Stage("Diffusion (Stage 1)", 60),
@@ -132,6 +216,43 @@ _I2V_STAGES = [
132
  Stage("Decode video", 10),
133
  ]
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  MODE_REGISTRY["t2v"] = Mode(
136
  name="t2v",
137
  label="Text → Video",
@@ -146,3 +267,31 @@ MODE_REGISTRY["i2v"] = Mode(
146
  parameterize_fn=_i2v_parameterize,
147
  stage_map=_I2V_STAGES,
148
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  I2V_NODE_CLIP_LENGTH = 196
80
  I2V_NODE_IMAGE = 149 # LoadImage "Load Image1" — wv[0] = filename
81
 
82
+ # Mode-specific media nodes — captured from workflows/{a2v,lipsync,keyframe,style}.json
83
+ # on 2026-04-30. All four templates contain the same node ids for these inputs (the
84
+ # Loaders group is shared across modes); only a subset is wired into each mode's
85
+ # pipeline.
86
+ #
87
+ # VHS_LoadAudioUpload and VHS_LoadVideo carry dict-style widgets_values keyed by
88
+ # "audio"/"video". The current set_input helper is list-indexed; passing
89
+ # widget_index=0 against a dict adds a numeric "0" key without replacing the
90
+ # canonical "audio"/"video" entry. The runtime file-path swap is therefore not
91
+ # yet wired — Task 12 only validates the patch tuple set. Real path injection
92
+ # lands when backend.py grows file-staging in Task 17.
93
+
94
+ A2V_NODE_PROMPT = 5536
95
+ A2V_NODE_NEG_PROMPT = 5537
96
+ A2V_NODE_WIDTH = 5383
97
+ A2V_NODE_HEIGHT = 5382
98
+ A2V_NODE_FPS = 5445
99
+ A2V_NODE_CLIP_LENGTH = 196
100
+ A2V_NODE_AUDIO = 5400 # VHS_LoadAudioUpload — dict wv keyed by "audio"
101
+
102
+ LIPSYNC_NODE_PROMPT = 5536
103
+ LIPSYNC_NODE_NEG_PROMPT = 5537
104
+ LIPSYNC_NODE_FPS = 5445
105
+ LIPSYNC_NODE_CLIP_LENGTH = 196
106
+ LIPSYNC_NODE_IMAGE = 149 # LoadImage "Load Image1" — wv[0] = filename
107
+ LIPSYNC_NODE_AUDIO = 5400 # VHS_LoadAudioUpload — dict wv keyed by "audio"
108
+
109
+ KEYFRAME_NODE_PROMPT = 5536
110
+ KEYFRAME_NODE_NEG_PROMPT = 5537
111
+ KEYFRAME_NODE_FPS = 5445
112
+ KEYFRAME_NODE_CLIP_LENGTH = 196
113
+ KEYFRAME_NODE_FIRST_FRAME = 149 # LoadImage "Load Image1" — wv[0] = filename
114
+ KEYFRAME_NODE_LAST_FRAME = 5437 # LoadImage "Load Image2" — wv[0] = filename
115
+
116
+ STYLE_NODE_PROMPT = 5536
117
+ STYLE_NODE_NEG_PROMPT = 5537
118
+ STYLE_NODE_FPS = 5445
119
+ STYLE_NODE_CLIP_LENGTH = 196
120
+ STYLE_NODE_INPUT_VIDEO = 5444 # VHS_LoadVideo — dict wv keyed by "video"
121
+
122
 
123
  def _frames_to_seconds(frames: int, fps: int) -> int:
124
  """Convert (frames, fps) to integer seconds for the mxSlider clip-length widget.
 
155
  ]
156
 
157
 
158
+ def _a2v_parameterize(inp: dict[str, Any]) -> list[Patch]:
159
+ return [
160
+ (A2V_NODE_PROMPT, 0, inp["prompt"]),
161
+ (A2V_NODE_NEG_PROMPT, 0, inp.get("negative_prompt", "")),
162
+ (A2V_NODE_AUDIO, 0, inp["audio"]),
163
+ (A2V_NODE_WIDTH, 0, int(inp["width"])),
164
+ (A2V_NODE_HEIGHT, 0, int(inp["height"])),
165
+ (A2V_NODE_FPS, 0, int(inp["fps"])),
166
+ (A2V_NODE_CLIP_LENGTH, 0, _frames_to_seconds(int(inp["frames"]), int(inp["fps"]))),
167
+ ]
168
+
169
+
170
+ def _lipsync_parameterize(inp: dict[str, Any]) -> list[Patch]:
171
+ return [
172
+ (LIPSYNC_NODE_PROMPT, 0, inp["prompt"]),
173
+ (LIPSYNC_NODE_NEG_PROMPT, 0, inp.get("negative_prompt", "")),
174
+ (LIPSYNC_NODE_IMAGE, 0, inp["image"]),
175
+ (LIPSYNC_NODE_AUDIO, 0, inp["audio"]),
176
+ (LIPSYNC_NODE_FPS, 0, int(inp["fps"])),
177
+ (LIPSYNC_NODE_CLIP_LENGTH, 0, _frames_to_seconds(int(inp["frames"]), int(inp["fps"]))),
178
+ ]
179
+
180
+
181
+ def _keyframe_parameterize(inp: dict[str, Any]) -> list[Patch]:
182
+ return [
183
+ (KEYFRAME_NODE_PROMPT, 0, inp["prompt"]),
184
+ (KEYFRAME_NODE_NEG_PROMPT, 0, inp.get("negative_prompt", "")),
185
+ (KEYFRAME_NODE_FIRST_FRAME, 0, inp["first_frame"]),
186
+ (KEYFRAME_NODE_LAST_FRAME, 0, inp["last_frame"]),
187
+ (KEYFRAME_NODE_FPS, 0, int(inp["fps"])),
188
+ (KEYFRAME_NODE_CLIP_LENGTH, 0, _frames_to_seconds(int(inp["frames"]), int(inp["fps"]))),
189
+ ]
190
+
191
+
192
+ def _style_parameterize(inp: dict[str, Any]) -> list[Patch]:
193
+ return [
194
+ (STYLE_NODE_PROMPT, 0, inp["prompt"]),
195
+ (STYLE_NODE_NEG_PROMPT, 0, inp.get("negative_prompt", "")),
196
+ (STYLE_NODE_INPUT_VIDEO, 0, inp["input_video"]),
197
+ (STYLE_NODE_FPS, 0, int(inp["fps"])),
198
+ (STYLE_NODE_CLIP_LENGTH, 0, _frames_to_seconds(int(inp["frames"]), int(inp["fps"]))),
199
+ ]
200
+
201
+
202
  _T2V_STAGES = [
203
  Stage("Encode prompt", 5),
204
  Stage("Diffusion (Stage 1)", 60),
 
216
  Stage("Decode video", 10),
217
  ]
218
 
219
+ _A2V_STAGES = [
220
+ Stage("Encode prompt", 5),
221
+ Stage("Encode audio", 5),
222
+ Stage("Diffusion (Stage 1)", 55),
223
+ Stage("Spatial upscale", 7),
224
+ Stage("Diffusion (Stage 2)", 18),
225
+ Stage("Decode video", 10),
226
+ ]
227
+
228
+ _LIPSYNC_STAGES = [
229
+ Stage("Encode prompt", 5),
230
+ Stage("Encode image", 3),
231
+ Stage("Encode audio", 5),
232
+ Stage("Diffusion (Stage 1)", 52),
233
+ Stage("Spatial upscale", 7),
234
+ Stage("Diffusion (Stage 2)", 18),
235
+ Stage("Decode video", 10),
236
+ ]
237
+
238
+ _KEYFRAME_STAGES = [
239
+ Stage("Encode prompt", 5),
240
+ Stage("Encode keyframes", 5),
241
+ Stage("Diffusion (Stage 1)", 55),
242
+ Stage("Spatial upscale", 7),
243
+ Stage("Diffusion (Stage 2)", 18),
244
+ Stage("Decode video", 10),
245
+ ]
246
+
247
+ _STYLE_STAGES = [
248
+ Stage("Encode prompt", 5),
249
+ Stage("Decode source video", 5),
250
+ Stage("Diffusion (Stage 1)", 55),
251
+ Stage("Spatial upscale", 7),
252
+ Stage("Diffusion (Stage 2)", 18),
253
+ Stage("Decode video", 10),
254
+ ]
255
+
256
  MODE_REGISTRY["t2v"] = Mode(
257
  name="t2v",
258
  label="Text → Video",
 
267
  parameterize_fn=_i2v_parameterize,
268
  stage_map=_I2V_STAGES,
269
  )
270
+ MODE_REGISTRY["a2v"] = Mode(
271
+ name="a2v",
272
+ label="Audio → Video",
273
+ icon="🎵",
274
+ parameterize_fn=_a2v_parameterize,
275
+ stage_map=_A2V_STAGES,
276
+ )
277
+ MODE_REGISTRY["lipsync"] = Mode(
278
+ name="lipsync",
279
+ label="Lipsync",
280
+ icon="👄",
281
+ parameterize_fn=_lipsync_parameterize,
282
+ stage_map=_LIPSYNC_STAGES,
283
+ )
284
+ MODE_REGISTRY["keyframe"] = Mode(
285
+ name="keyframe",
286
+ label="Keyframe → Video",
287
+ icon="🎞",
288
+ parameterize_fn=_keyframe_parameterize,
289
+ stage_map=_KEYFRAME_STAGES,
290
+ )
291
+ MODE_REGISTRY["style"] = Mode(
292
+ name="style",
293
+ label="Style Transfer",
294
+ icon="🎨",
295
+ parameterize_fn=_style_parameterize,
296
+ stage_map=_STYLE_STAGES,
297
+ )
tests/test_modes.py CHANGED
@@ -46,3 +46,56 @@ def test_t2v_and_i2v_in_registry():
46
  """T2V and I2V exist in MODE_REGISTRY (full completeness in Task 12)."""
47
  assert "t2v" in modes.MODE_REGISTRY
48
  assert "i2v" in modes.MODE_REGISTRY
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  """T2V and I2V exist in MODE_REGISTRY (full completeness in Task 12)."""
47
  assert "t2v" in modes.MODE_REGISTRY
48
  assert "i2v" in modes.MODE_REGISTRY
49
+
50
+
51
+ @pytest.mark.parametrize("mode_name", ["a2v", "lipsync", "keyframe", "style"])
52
+ def test_remaining_modes_parameterize_validates(mode_name, canonical_inputs):
53
+ inputs = canonical_inputs[mode_name]
54
+ mode = modes.MODE_REGISTRY[mode_name]
55
+ patches = mode.parameterize_fn(inputs)
56
+ assert len(patches) > 0
57
+
58
+ wf = workflow.load_template(mode_name)
59
+ for patch in patches:
60
+ workflow.set_input(wf, *patch)
61
+ workflow.validate(wf)
62
+
63
+
64
+ def test_a2v_parameterize_passes_audio_path(canonical_inputs):
65
+ patches = modes.MODE_REGISTRY["a2v"].parameterize_fn(canonical_inputs["a2v"])
66
+ assert canonical_inputs["a2v"]["audio"] in [p[2] for p in patches]
67
+
68
+
69
+ def test_lipsync_parameterize_passes_image_and_audio(canonical_inputs):
70
+ patches = modes.MODE_REGISTRY["lipsync"].parameterize_fn(canonical_inputs["lipsync"])
71
+ values = [p[2] for p in patches]
72
+ assert canonical_inputs["lipsync"]["image"] in values
73
+ assert canonical_inputs["lipsync"]["audio"] in values
74
+
75
+
76
+ def test_keyframe_parameterize_passes_two_frames(canonical_inputs):
77
+ patches = modes.MODE_REGISTRY["keyframe"].parameterize_fn(canonical_inputs["keyframe"])
78
+ values = [p[2] for p in patches]
79
+ assert canonical_inputs["keyframe"]["first_frame"] in values
80
+ assert canonical_inputs["keyframe"]["last_frame"] in values
81
+
82
+
83
+ def test_style_parameterize_passes_input_video(canonical_inputs):
84
+ patches = modes.MODE_REGISTRY["style"].parameterize_fn(canonical_inputs["style"])
85
+ assert canonical_inputs["style"]["input_video"] in [p[2] for p in patches]
86
+
87
+
88
+ def test_mode_registry_has_all_six_keys():
89
+ """All six modes are in the registry now."""
90
+ assert set(modes.MODE_REGISTRY.keys()) == {
91
+ "t2v", "a2v", "i2v", "lipsync", "keyframe", "style",
92
+ }
93
+
94
+
95
+ def test_each_mode_has_required_attributes():
96
+ for name, mode in modes.MODE_REGISTRY.items():
97
+ assert mode.name == name
98
+ assert mode.label # non-empty
99
+ assert mode.icon # non-empty
100
+ assert callable(mode.parameterize_fn)
101
+ assert isinstance(mode.stage_map, list) and len(mode.stage_map) > 0