Spaces:

banao-tech
/

model-testing

Build error

App Files Files Community

banao-tech commited on 20 days ago

Commit

1a8b8ad

verified ·

1 Parent(s): a4e0f95

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -30

app.py CHANGED Viewed

@@ -8,11 +8,10 @@ from huggingface_hub import snapshot_download
 ROOT = Path(__file__).parent.resolve()
 REPO_DIR = ROOT / "LatentSync"
-TEMP_DIR = REPO_DIR / "temp"
-INPUT_DIR = REPO_DIR / "inputs"
-OUTPUT_DIR = REPO_DIR / "outputs"
 CKPT_DIR = REPO_DIR / "checkpoints"
 HF_CKPT_REPO = "ByteDance/LatentSync-1.5"
 def run(cmd, cwd=None):
@@ -20,25 +19,29 @@ def run(cmd, cwd=None):
     subprocess.check_call(cmd, cwd=cwd)
 def setup():
     if not REPO_DIR.exists():
         run(["git", "clone", "--depth", "1", "https://github.com/bytedance/LatentSync.git", str(REPO_DIR)])
     CKPT_DIR.mkdir(parents=True, exist_ok=True)
     snapshot_download(
         repo_id=HF_CKPT_REPO,
         local_dir=str(CKPT_DIR),
         local_dir_use_symlinks=False,
     )
-    INPUT_DIR.mkdir(parents=True, exist_ok=True)
-    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
-    TEMP_DIR.mkdir(parents=True, exist_ok=True)
-def make_still_video(img_path: str, audio_path: str, fps: int = 25) -> str:
     out_path = TEMP_DIR / f"still_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
     cmd = [
         "ffmpeg", "-y",
-        "-loop", "1", "-i", img_path,
         "-i", audio_path,
         "-shortest",
         "-r", str(fps),
@@ -51,42 +54,51 @@ def make_still_video(img_path: str, audio_path: str, fps: int = 25) -> str:
     run(cmd)
     return str(out_path)
-def generate(avatar_img, audio_wav):
     setup()
     img_path = str(Path(avatar_img).resolve())
     wav_path = str(Path(audio_wav).resolve())
-    # create video from image+audio
-    still_video = make_still_video(img_path, wav_path, fps=25)
-    # run LatentSync inference (use repo script directly)
-    out_path = OUTPUT_DIR / f"result_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
-    # NOTE:
-    # LatentSync repo sometimes provides "gradio_app.py" or "predict.py" with different args.
-    # We call the official inference entry if available.
-    # If your build fails here, paste the Space logs and I’ll adjust to exact script/args.
     cmd = [
-        "python", "predict.py",
-        "--image_path", img_path,
         "--audio_path", wav_path,
-        "--output_path", str(out_path),
     ]
-    # Some LatentSync versions require video instead of image; if this fails we’ll swap
-    # to their video-based inference script.
     run(cmd, cwd=str(REPO_DIR))
     return str(out_path)
-with gr.Blocks() as demo:
-    gr.Markdown("# LatentSync (avatar.jpg + audio.wav → mp4)")
     with gr.Row():
-        avatar = gr.Image(type="filepath", label="avatar.jpg/png")
-        audio = gr.Audio(type="filepath", label="audio.wav", format="wav")
     btn = gr.Button("Generate")
-    out = gr.Video(label="Output")
-    btn.click(generate, inputs=[avatar, audio], outputs=out)
 demo.launch()

 ROOT = Path(__file__).parent.resolve()
 REPO_DIR = ROOT / "LatentSync"
 CKPT_DIR = REPO_DIR / "checkpoints"
+TEMP_DIR = REPO_DIR / "temp"
+# Use 1.5 on T4 16GB
 HF_CKPT_REPO = "ByteDance/LatentSync-1.5"
 def run(cmd, cwd=None):
     subprocess.check_call(cmd, cwd=cwd)
 def setup():
+    # Clone LatentSync repo at runtime (won't appear in HF Files tab)
     if not REPO_DIR.exists():
         run(["git", "clone", "--depth", "1", "https://github.com/bytedance/LatentSync.git", str(REPO_DIR)])
     CKPT_DIR.mkdir(parents=True, exist_ok=True)
+    TEMP_DIR.mkdir(parents=True, exist_ok=True)
+    # Download all checkpoint files (includes latentsync_unet + whisper tiny/small etc)
     snapshot_download(
         repo_id=HF_CKPT_REPO,
         local_dir=str(CKPT_DIR),
         local_dir_use_symlinks=False,
     )
+def make_still_video(image_path: str, audio_path: str, fps: int = 25) -> str:
+    """
+    Create a video by looping the avatar image for the length of the audio.
+    LatentSync expects a VIDEO input.
+    """
     out_path = TEMP_DIR / f"still_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
     cmd = [
         "ffmpeg", "-y",
+        "-loop", "1", "-i", image_path,
         "-i", audio_path,
         "-shortest",
         "-r", str(fps),
     run(cmd)
     return str(out_path)
+def generate(avatar_img, audio_wav, steps, guidance, seed, use_deepcache):
     setup()
     img_path = str(Path(avatar_img).resolve())
     wav_path = str(Path(audio_wav).resolve())
+    # Make a temp mp4 from the single image + audio
+    video_path = make_still_video(img_path, wav_path, fps=25)
+    out_path = TEMP_DIR / f"result_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
     cmd = [
+        "python", "-m", "scripts.inference",
+        "--unet_config_path", "configs/unet.yaml",
+        "--inference_ckpt_path", "checkpoints/latentsync_unet.pt",
+        "--video_path", video_path,
         "--audio_path", wav_path,
+        "--video_out_path", str(out_path),
+        "--inference_steps", str(int(steps)),
+        "--guidance_scale", str(float(guidance)),
+        "--seed", str(int(seed)),
+        "--temp_dir", "temp",
     ]
+    if use_deepcache:
+        cmd.append("--enable_deepcache")
     run(cmd, cwd=str(REPO_DIR))
     return str(out_path)
+with gr.Blocks(title="LatentSync (avatar.jpg + audio.wav → lip-sync mp4)") as demo:
+    gr.Markdown("## LatentSync on Hugging Face (T4) — Upload avatar + audio → mp4")
+    with gr.Row():
+        avatar = gr.Image(type="filepath", label="Avatar image (jpg/png)")
+        audio = gr.Audio(type="filepath", label="Audio (wav)", format="wav")
     with gr.Row():
+        steps = gr.Slider(10, 40, value=20, step=1, label="Inference Steps")
+        guidance = gr.Slider(0.8, 2.0, value=1.0, step=0.1, label="Guidance Scale")
+        seed = gr.Number(value=1247, precision=0, label="Seed")
+        deepcache = gr.Checkbox(value=True, label="Enable DeepCache (faster)")
     btn = gr.Button("Generate")
+    out = gr.Video(label="Output video")
+    btn.click(generate, inputs=[avatar, audio, steps, guidance, seed, deepcache], outputs=out)
 demo.launch()