Spaces:

Lab148
/

comfyui-latentsync

Runtime error

App Files Files Community

Emmanuel Durand commited on Jun 5, 2025

Commit

21aaf52

1 Parent(s): 8749e18

Adding Latentsync

Browse files

Files changed (5) hide show

.gitmodules +6 -0
app.py +51 -49
custom_nodes/ComfyUI-LatentSyncWrapper +1 -0
custom_nodes/ComfyUI-VideoHelperSuite +1 -0
requirements.txt +22 -0

.gitmodules ADDED Viewed

	@@ -0,0 +1,6 @@

+[submodule "custom_nodes/ComfyUI-VideoHelperSuite"]
+	path = custom_nodes/ComfyUI-VideoHelperSuite
+	url = https://github.com/Kosinkadink/ComfyUI-VideoHelperSuite.git
+[submodule "custom_nodes/ComfyUI-LatentSyncWrapper"]
+	path = custom_nodes/ComfyUI-LatentSyncWrapper
+	url = https://github.com/ShmuelRonen/ComfyUI-LatentSyncWrapper

app.py CHANGED Viewed

@@ -4,11 +4,8 @@ import random
 import sys
 from typing import Sequence, Mapping, Any, Union
 import torch
-from huggingface_hub import hf_hub_download
 import spaces
-hf_hub_download(repo_id="stable-diffusion-v1-5/stable-diffusion-v1-5", filename="v1-5-pruned-emaonly.ckpt", local_dir="models/checkpoints")
 def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any:
     """Returns the value at the given index of a sequence or mapping.
@@ -119,60 +116,61 @@ def import_custom_nodes() -> None:
 from nodes import NODE_CLASS_MAPPINGS
-@spaces.GPU(duration=15)
-def generate_image(prompt):
     import_custom_nodes()
     with torch.inference_mode():
-        checkpointloadersimple = NODE_CLASS_MAPPINGS["CheckpointLoaderSimple"]()
-        checkpointloadersimple_4 = checkpointloadersimple.load_checkpoint(
-            ckpt_name="SD1.5/v1-5-pruned-emaonly.ckpt"
-        )
-        emptylatentimage = NODE_CLASS_MAPPINGS["EmptyLatentImage"]()
-        emptylatentimage_5 = emptylatentimage.generate(
-            width=512, height=512, batch_size=1
-        )
-        cliptextencode = NODE_CLASS_MAPPINGS["CLIPTextEncode"]()
-        cliptextencode_6 = cliptextencode.encode(
-            text=prompt,
-            clip=get_value_at_index(checkpointloadersimple_4, 1),
         )
-        cliptextencode_7 = cliptextencode.encode(
-            text="text, watermark", clip=get_value_at_index(checkpointloadersimple_4, 1)
-        )
-        ksampler = NODE_CLASS_MAPPINGS["KSampler"]()
-        vaedecode = NODE_CLASS_MAPPINGS["VAEDecode"]()
-        saveimage = NODE_CLASS_MAPPINGS["SaveImage"]()
         for q in range(1):
-            ksampler_3 = ksampler.sample(
-                seed=random.randint(1, 2**64),
-                steps=20,
-                cfg=8,
-                sampler_name="euler",
-                scheduler="normal",
-                denoise=1,
-                model=get_value_at_index(checkpointloadersimple_4, 0),
-                positive=get_value_at_index(cliptextencode_6, 0),
-                negative=get_value_at_index(cliptextencode_7, 0),
-                latent_image=get_value_at_index(emptylatentimage_5, 0),
             )
-            vaedecode_8 = vaedecode.decode(
-                samples=get_value_at_index(ksampler_3, 0),
-                vae=get_value_at_index(checkpointloadersimple_4, 2),
             )
-            saveimage_9 = saveimage.save_images(
-                filename_prefix="ComfyUI", images=get_value_at_index(vaedecode_8, 0)
             )
-            saved_path = f"output/{saveimage_9['ui']['images'][0]['filename']}"
-            return saved_path
 if __name__ == "__main__":
     # Comment out the main() call in the exported Python code
@@ -185,20 +183,24 @@ if __name__ == "__main__":
         with gr.Row():
             with gr.Column():
                 # Add an input
-                prompt_input = gr.Textbox(label="Prompt", placeholder="Enter your prompt here...")
                 # The generate button
                 generate_btn = gr.Button("Generate")
             with gr.Column():
                 # The output image
-                output_image = gr.Image(label="Generated Image")
             # When clicking the button, it will trigger the `generate_image` function, with the respective inputs
             # and the output an image
             generate_btn.click(
                 fn=generate_image,
-                inputs=[prompt_input],
-                outputs=[output_image]
             )
         app.launch(share=True)

 import sys
 from typing import Sequence, Mapping, Any, Union
 import torch
 import spaces
 def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any:
     """Returns the value at the given index of a sequence or mapping.
 from nodes import NODE_CLASS_MAPPINGS
+#@spaces.GPU(duration=15)
+def generate_image(video, audio):
     import_custom_nodes()
     with torch.inference_mode():
+        loadaudio = NODE_CLASS_MAPPINGS["LoadAudio"]()
+        loadaudio_37 = loadaudio.load(audio=audio)
+        vhs_loadvideo = NODE_CLASS_MAPPINGS["VHS_LoadVideo"]()
+        vhs_loadvideo_40 = vhs_loadvideo.load_video(
+            video=video,
+            force_rate=25,
+            custom_width=0,
+            custom_height=768,
+            frame_load_cap=0,
+            skip_first_frames=0,
+            select_every_nth=1,
+            format="AnimateDiff",
         )
+        videolengthadjuster = NODE_CLASS_MAPPINGS["VideoLengthAdjuster"]()
+        latentsyncnode = NODE_CLASS_MAPPINGS["LatentSyncNode"]()
+        vhs_videocombine = NODE_CLASS_MAPPINGS["VHS_VideoCombine"]()
         for q in range(1):
+            videolengthadjuster_55 = videolengthadjuster.adjust(
+                mode="pingpong",
+                fps=25,
+                silent_padding_sec=0.5,
+                images=get_value_at_index(vhs_loadvideo_40, 0),
+                audio=get_value_at_index(loadaudio_37, 0),
             )
+            latentsyncnode_54 = latentsyncnode.inference(
+                seed=random.randint(1, 2**64),
+                lips_expression=1.5,
+                inference_steps=20,
+                images=get_value_at_index(videolengthadjuster_55, 0),
+                audio=get_value_at_index(videolengthadjuster_55, 1),
             )
+            vhs_videocombine_41 = vhs_videocombine.combine_video(
+                frame_rate=25,
+                loop_count=0,
+                filename_prefix="latentsync",
+                format="video/h264-mp4",
+                pix_fmt="yuv420p",
+                crf=19,
+                save_metadata=True,
+                trim_to_audio=False,
+                pingpong=False,
+                save_output=True,
+                images=get_value_at_index(latentsyncnode_54, 0),
+                audio=get_value_at_index(latentsyncnode_54, 1),
             )
 if __name__ == "__main__":
     # Comment out the main() call in the exported Python code
         with gr.Row():
             with gr.Column():
                 # Add an input
+                with gr.Row():
+                    with gr.Group():
+                        source_video = gr.Video(label="Source video")
+                    with gr.Group():
+                        source_audio = gr.Audio(label="Microphone", type="filepath")
                 # The generate button
                 generate_btn = gr.Button("Generate")
             with gr.Column():
                 # The output image
+                output_video = gr.Video(label="Generated Image")
             # When clicking the button, it will trigger the `generate_image` function, with the respective inputs
             # and the output an image
             generate_btn.click(
                 fn=generate_image,
+                inputs=[source_video, source_audio],
+                outputs=[output_video]
             )
         app.launch(share=True)

custom_nodes/ComfyUI-LatentSyncWrapper ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit 556b673f82a0a6c38c82344332d33c98d8f30feb

custom_nodes/ComfyUI-VideoHelperSuite ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit a7ce59e381934733bfae03b1be029756d6ce936d

requirements.txt CHANGED Viewed

@@ -18,6 +18,8 @@ Pillow
 scipy
 tqdm
 psutil
 #non essential dependencies:
 kornia>=0.7.1
@@ -25,3 +27,23 @@ spandrel
 soundfile
 av>=14.2.0
 pydantic~=2.0

 scipy
 tqdm
 psutil
+accelerate
+gradio
 #non essential dependencies:
 kornia>=0.7.1
 soundfile
 av>=14.2.0
 pydantic~=2.0
+#Latentsync
+insightface
+onnxruntime
+diffusers
+mediapipe>=0.10.8
+transformers
+huggingface-hub>=0.10.0
+omegaconf
+einops
+opencv-python
+face-alignment
+decord
+ffmpeg-python>=0.2.0
+safetensors
+soundfile
+#videohelpersuite
+opencv-python
+imageio-ffmpeg