Emmanuel Durand commited on
Commit
21aaf52
·
1 Parent(s): 8749e18

Adding Latentsync

Browse files
.gitmodules ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [submodule "custom_nodes/ComfyUI-VideoHelperSuite"]
2
+ path = custom_nodes/ComfyUI-VideoHelperSuite
3
+ url = https://github.com/Kosinkadink/ComfyUI-VideoHelperSuite.git
4
+ [submodule "custom_nodes/ComfyUI-LatentSyncWrapper"]
5
+ path = custom_nodes/ComfyUI-LatentSyncWrapper
6
+ url = https://github.com/ShmuelRonen/ComfyUI-LatentSyncWrapper
app.py CHANGED
@@ -4,11 +4,8 @@ import random
4
  import sys
5
  from typing import Sequence, Mapping, Any, Union
6
  import torch
7
- from huggingface_hub import hf_hub_download
8
  import spaces
9
 
10
- hf_hub_download(repo_id="stable-diffusion-v1-5/stable-diffusion-v1-5", filename="v1-5-pruned-emaonly.ckpt", local_dir="models/checkpoints")
11
-
12
  def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any:
13
  """Returns the value at the given index of a sequence or mapping.
14
 
@@ -119,60 +116,61 @@ def import_custom_nodes() -> None:
119
  from nodes import NODE_CLASS_MAPPINGS
120
 
121
 
122
- @spaces.GPU(duration=15)
123
- def generate_image(prompt):
124
  import_custom_nodes()
125
  with torch.inference_mode():
126
- checkpointloadersimple = NODE_CLASS_MAPPINGS["CheckpointLoaderSimple"]()
127
- checkpointloadersimple_4 = checkpointloadersimple.load_checkpoint(
128
- ckpt_name="SD1.5/v1-5-pruned-emaonly.ckpt"
129
- )
130
-
131
- emptylatentimage = NODE_CLASS_MAPPINGS["EmptyLatentImage"]()
132
- emptylatentimage_5 = emptylatentimage.generate(
133
- width=512, height=512, batch_size=1
134
- )
135
-
136
- cliptextencode = NODE_CLASS_MAPPINGS["CLIPTextEncode"]()
137
- cliptextencode_6 = cliptextencode.encode(
138
- text=prompt,
139
- clip=get_value_at_index(checkpointloadersimple_4, 1),
140
  )
141
 
142
- cliptextencode_7 = cliptextencode.encode(
143
- text="text, watermark", clip=get_value_at_index(checkpointloadersimple_4, 1)
144
- )
145
-
146
- ksampler = NODE_CLASS_MAPPINGS["KSampler"]()
147
- vaedecode = NODE_CLASS_MAPPINGS["VAEDecode"]()
148
- saveimage = NODE_CLASS_MAPPINGS["SaveImage"]()
149
 
150
  for q in range(1):
151
- ksampler_3 = ksampler.sample(
152
- seed=random.randint(1, 2**64),
153
- steps=20,
154
- cfg=8,
155
- sampler_name="euler",
156
- scheduler="normal",
157
- denoise=1,
158
- model=get_value_at_index(checkpointloadersimple_4, 0),
159
- positive=get_value_at_index(cliptextencode_6, 0),
160
- negative=get_value_at_index(cliptextencode_7, 0),
161
- latent_image=get_value_at_index(emptylatentimage_5, 0),
162
  )
163
 
164
- vaedecode_8 = vaedecode.decode(
165
- samples=get_value_at_index(ksampler_3, 0),
166
- vae=get_value_at_index(checkpointloadersimple_4, 2),
 
 
 
167
  )
168
 
169
- saveimage_9 = saveimage.save_images(
170
- filename_prefix="ComfyUI", images=get_value_at_index(vaedecode_8, 0)
 
 
 
 
 
 
 
 
 
 
 
171
  )
172
 
173
- saved_path = f"output/{saveimage_9['ui']['images'][0]['filename']}"
174
- return saved_path
175
-
176
 
177
  if __name__ == "__main__":
178
  # Comment out the main() call in the exported Python code
@@ -185,20 +183,24 @@ if __name__ == "__main__":
185
  with gr.Row():
186
  with gr.Column():
187
  # Add an input
188
- prompt_input = gr.Textbox(label="Prompt", placeholder="Enter your prompt here...")
 
 
 
 
189
 
190
  # The generate button
191
  generate_btn = gr.Button("Generate")
192
 
193
  with gr.Column():
194
  # The output image
195
- output_image = gr.Image(label="Generated Image")
196
 
197
  # When clicking the button, it will trigger the `generate_image` function, with the respective inputs
198
  # and the output an image
199
  generate_btn.click(
200
  fn=generate_image,
201
- inputs=[prompt_input],
202
- outputs=[output_image]
203
  )
204
  app.launch(share=True)
 
4
  import sys
5
  from typing import Sequence, Mapping, Any, Union
6
  import torch
 
7
  import spaces
8
 
 
 
9
  def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any:
10
  """Returns the value at the given index of a sequence or mapping.
11
 
 
116
  from nodes import NODE_CLASS_MAPPINGS
117
 
118
 
119
+ #@spaces.GPU(duration=15)
120
+ def generate_image(video, audio):
121
  import_custom_nodes()
122
  with torch.inference_mode():
123
+ loadaudio = NODE_CLASS_MAPPINGS["LoadAudio"]()
124
+ loadaudio_37 = loadaudio.load(audio=audio)
125
+
126
+ vhs_loadvideo = NODE_CLASS_MAPPINGS["VHS_LoadVideo"]()
127
+ vhs_loadvideo_40 = vhs_loadvideo.load_video(
128
+ video=video,
129
+ force_rate=25,
130
+ custom_width=0,
131
+ custom_height=768,
132
+ frame_load_cap=0,
133
+ skip_first_frames=0,
134
+ select_every_nth=1,
135
+ format="AnimateDiff",
 
136
  )
137
 
138
+ videolengthadjuster = NODE_CLASS_MAPPINGS["VideoLengthAdjuster"]()
139
+ latentsyncnode = NODE_CLASS_MAPPINGS["LatentSyncNode"]()
140
+ vhs_videocombine = NODE_CLASS_MAPPINGS["VHS_VideoCombine"]()
 
 
 
 
141
 
142
  for q in range(1):
143
+ videolengthadjuster_55 = videolengthadjuster.adjust(
144
+ mode="pingpong",
145
+ fps=25,
146
+ silent_padding_sec=0.5,
147
+ images=get_value_at_index(vhs_loadvideo_40, 0),
148
+ audio=get_value_at_index(loadaudio_37, 0),
 
 
 
 
 
149
  )
150
 
151
+ latentsyncnode_54 = latentsyncnode.inference(
152
+ seed=random.randint(1, 2**64),
153
+ lips_expression=1.5,
154
+ inference_steps=20,
155
+ images=get_value_at_index(videolengthadjuster_55, 0),
156
+ audio=get_value_at_index(videolengthadjuster_55, 1),
157
  )
158
 
159
+ vhs_videocombine_41 = vhs_videocombine.combine_video(
160
+ frame_rate=25,
161
+ loop_count=0,
162
+ filename_prefix="latentsync",
163
+ format="video/h264-mp4",
164
+ pix_fmt="yuv420p",
165
+ crf=19,
166
+ save_metadata=True,
167
+ trim_to_audio=False,
168
+ pingpong=False,
169
+ save_output=True,
170
+ images=get_value_at_index(latentsyncnode_54, 0),
171
+ audio=get_value_at_index(latentsyncnode_54, 1),
172
  )
173
 
 
 
 
174
 
175
  if __name__ == "__main__":
176
  # Comment out the main() call in the exported Python code
 
183
  with gr.Row():
184
  with gr.Column():
185
  # Add an input
186
+ with gr.Row():
187
+ with gr.Group():
188
+ source_video = gr.Video(label="Source video")
189
+ with gr.Group():
190
+ source_audio = gr.Audio(label="Microphone", type="filepath")
191
 
192
  # The generate button
193
  generate_btn = gr.Button("Generate")
194
 
195
  with gr.Column():
196
  # The output image
197
+ output_video = gr.Video(label="Generated Image")
198
 
199
  # When clicking the button, it will trigger the `generate_image` function, with the respective inputs
200
  # and the output an image
201
  generate_btn.click(
202
  fn=generate_image,
203
+ inputs=[source_video, source_audio],
204
+ outputs=[output_video]
205
  )
206
  app.launch(share=True)
custom_nodes/ComfyUI-LatentSyncWrapper ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit 556b673f82a0a6c38c82344332d33c98d8f30feb
custom_nodes/ComfyUI-VideoHelperSuite ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit a7ce59e381934733bfae03b1be029756d6ce936d
requirements.txt CHANGED
@@ -18,6 +18,8 @@ Pillow
18
  scipy
19
  tqdm
20
  psutil
 
 
21
 
22
  #non essential dependencies:
23
  kornia>=0.7.1
@@ -25,3 +27,23 @@ spandrel
25
  soundfile
26
  av>=14.2.0
27
  pydantic~=2.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  scipy
19
  tqdm
20
  psutil
21
+ accelerate
22
+ gradio
23
 
24
  #non essential dependencies:
25
  kornia>=0.7.1
 
27
  soundfile
28
  av>=14.2.0
29
  pydantic~=2.0
30
+
31
+ #Latentsync
32
+ insightface
33
+ onnxruntime
34
+ diffusers
35
+ mediapipe>=0.10.8
36
+ transformers
37
+ huggingface-hub>=0.10.0
38
+ omegaconf
39
+ einops
40
+ opencv-python
41
+ face-alignment
42
+ decord
43
+ ffmpeg-python>=0.2.0
44
+ safetensors
45
+ soundfile
46
+
47
+ #videohelpersuite
48
+ opencv-python
49
+ imageio-ffmpeg