banao-tech commited on
Commit
d47e052
·
verified ·
1 Parent(s): 36d1647

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -17
app.py CHANGED
@@ -1,11 +1,8 @@
1
  import os
2
  os.environ["OMP_NUM_THREADS"] = "1"
3
-
4
- import os
5
  import subprocess
6
  from pathlib import Path
7
  from datetime import datetime
8
-
9
  import gradio as gr
10
  from huggingface_hub import snapshot_download
11
 
@@ -25,10 +22,10 @@ def setup():
25
  # Clone LatentSync repo at runtime (won't appear in HF Files tab)
26
  if not REPO_DIR.exists():
27
  run(["git", "clone", "--depth", "1", "https://github.com/bytedance/LatentSync.git", str(REPO_DIR)])
28
-
29
  CKPT_DIR.mkdir(parents=True, exist_ok=True)
30
  TEMP_DIR.mkdir(parents=True, exist_ok=True)
31
-
32
  # Download all checkpoint files (includes latentsync_unet + whisper tiny/small etc)
33
  snapshot_download(
34
  repo_id=HF_CKPT_REPO,
@@ -59,18 +56,19 @@ def make_still_video(image_path: str, audio_path: str, fps: int = 25) -> str:
59
 
60
  def generate(avatar_img, audio_wav, steps, guidance, seed, use_deepcache):
61
  setup()
62
-
63
  img_path = str(Path(avatar_img).resolve())
64
  wav_path = str(Path(audio_wav).resolve())
65
-
66
  # Make a temp mp4 from the single image + audio
67
  video_path = make_still_video(img_path, wav_path, fps=25)
68
-
69
  out_path = TEMP_DIR / f"result_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
70
-
 
71
  cmd = [
72
  "python", "-m", "scripts.inference",
73
- "--unet_config_path", "configs/unet.yaml",
74
  "--inference_ckpt_path", "checkpoints/latentsync_unet.pt",
75
  "--video_path", video_path,
76
  "--audio_path", wav_path,
@@ -80,28 +78,30 @@ def generate(avatar_img, audio_wav, steps, guidance, seed, use_deepcache):
80
  "--seed", str(int(seed)),
81
  "--temp_dir", "temp",
82
  ]
 
83
  if use_deepcache:
84
  cmd.append("--enable_deepcache")
85
-
86
  run(cmd, cwd=str(REPO_DIR))
 
87
  return str(out_path)
88
 
89
  with gr.Blocks(title="LatentSync (avatar.jpg + audio.wav → lip-sync mp4)") as demo:
90
- gr.Markdown("## LatentSync on Hugging Face (T4) — Upload avatar + audio → mp4")
91
-
92
  with gr.Row():
93
  avatar = gr.Image(type="filepath", label="Avatar image (jpg/png)")
94
  audio = gr.Audio(type="filepath", label="Audio (wav)", format="wav")
95
-
96
  with gr.Row():
97
  steps = gr.Slider(10, 40, value=20, step=1, label="Inference Steps")
98
  guidance = gr.Slider(0.8, 2.0, value=1.0, step=0.1, label="Guidance Scale")
99
  seed = gr.Number(value=1247, precision=0, label="Seed")
100
  deepcache = gr.Checkbox(value=True, label="Enable DeepCache (faster)")
101
-
102
  btn = gr.Button("Generate")
103
  out = gr.Video(label="Output video")
104
-
105
  btn.click(generate, inputs=[avatar, audio, steps, guidance, seed, deepcache], outputs=out)
106
 
107
- demo.launch()
 
1
  import os
2
  os.environ["OMP_NUM_THREADS"] = "1"
 
 
3
  import subprocess
4
  from pathlib import Path
5
  from datetime import datetime
 
6
  import gradio as gr
7
  from huggingface_hub import snapshot_download
8
 
 
22
  # Clone LatentSync repo at runtime (won't appear in HF Files tab)
23
  if not REPO_DIR.exists():
24
  run(["git", "clone", "--depth", "1", "https://github.com/bytedance/LatentSync.git", str(REPO_DIR)])
25
+
26
  CKPT_DIR.mkdir(parents=True, exist_ok=True)
27
  TEMP_DIR.mkdir(parents=True, exist_ok=True)
28
+
29
  # Download all checkpoint files (includes latentsync_unet + whisper tiny/small etc)
30
  snapshot_download(
31
  repo_id=HF_CKPT_REPO,
 
56
 
57
  def generate(avatar_img, audio_wav, steps, guidance, seed, use_deepcache):
58
  setup()
59
+
60
  img_path = str(Path(avatar_img).resolve())
61
  wav_path = str(Path(audio_wav).resolve())
62
+
63
  # Make a temp mp4 from the single image + audio
64
  video_path = make_still_video(img_path, wav_path, fps=25)
65
+
66
  out_path = TEMP_DIR / f"result_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
67
+
68
+ # FIXED: Use correct config path - configs/unet/stage2.yaml instead of configs/unet.yaml
69
  cmd = [
70
  "python", "-m", "scripts.inference",
71
+ "--unet_config_path", "configs/unet/stage2.yaml", # ← FIXED PATH
72
  "--inference_ckpt_path", "checkpoints/latentsync_unet.pt",
73
  "--video_path", video_path,
74
  "--audio_path", wav_path,
 
78
  "--seed", str(int(seed)),
79
  "--temp_dir", "temp",
80
  ]
81
+
82
  if use_deepcache:
83
  cmd.append("--enable_deepcache")
84
+
85
  run(cmd, cwd=str(REPO_DIR))
86
+
87
  return str(out_path)
88
 
89
  with gr.Blocks(title="LatentSync (avatar.jpg + audio.wav → lip-sync mp4)") as demo:
90
+ gr.Markdown("## LatentSync 1.5 on Hugging Face (T4) — Upload avatar + audio → mp4")
91
+
92
  with gr.Row():
93
  avatar = gr.Image(type="filepath", label="Avatar image (jpg/png)")
94
  audio = gr.Audio(type="filepath", label="Audio (wav)", format="wav")
95
+
96
  with gr.Row():
97
  steps = gr.Slider(10, 40, value=20, step=1, label="Inference Steps")
98
  guidance = gr.Slider(0.8, 2.0, value=1.0, step=0.1, label="Guidance Scale")
99
  seed = gr.Number(value=1247, precision=0, label="Seed")
100
  deepcache = gr.Checkbox(value=True, label="Enable DeepCache (faster)")
101
+
102
  btn = gr.Button("Generate")
103
  out = gr.Video(label="Output video")
104
+
105
  btn.click(generate, inputs=[avatar, audio, steps, guidance, seed, deepcache], outputs=out)
106
 
107
+ demo.launch()