banao-tech commited on
Commit
1a8b8ad
·
verified ·
1 Parent(s): a4e0f95

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -30
app.py CHANGED
@@ -8,11 +8,10 @@ from huggingface_hub import snapshot_download
8
 
9
  ROOT = Path(__file__).parent.resolve()
10
  REPO_DIR = ROOT / "LatentSync"
11
- TEMP_DIR = REPO_DIR / "temp"
12
- INPUT_DIR = REPO_DIR / "inputs"
13
- OUTPUT_DIR = REPO_DIR / "outputs"
14
  CKPT_DIR = REPO_DIR / "checkpoints"
 
15
 
 
16
  HF_CKPT_REPO = "ByteDance/LatentSync-1.5"
17
 
18
  def run(cmd, cwd=None):
@@ -20,25 +19,29 @@ def run(cmd, cwd=None):
20
  subprocess.check_call(cmd, cwd=cwd)
21
 
22
  def setup():
 
23
  if not REPO_DIR.exists():
24
  run(["git", "clone", "--depth", "1", "https://github.com/bytedance/LatentSync.git", str(REPO_DIR)])
25
 
26
  CKPT_DIR.mkdir(parents=True, exist_ok=True)
 
 
 
27
  snapshot_download(
28
  repo_id=HF_CKPT_REPO,
29
  local_dir=str(CKPT_DIR),
30
  local_dir_use_symlinks=False,
31
  )
32
 
33
- INPUT_DIR.mkdir(parents=True, exist_ok=True)
34
- OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
35
- TEMP_DIR.mkdir(parents=True, exist_ok=True)
36
-
37
- def make_still_video(img_path: str, audio_path: str, fps: int = 25) -> str:
38
  out_path = TEMP_DIR / f"still_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
39
  cmd = [
40
  "ffmpeg", "-y",
41
- "-loop", "1", "-i", img_path,
42
  "-i", audio_path,
43
  "-shortest",
44
  "-r", str(fps),
@@ -51,42 +54,51 @@ def make_still_video(img_path: str, audio_path: str, fps: int = 25) -> str:
51
  run(cmd)
52
  return str(out_path)
53
 
54
- def generate(avatar_img, audio_wav):
55
  setup()
56
 
57
  img_path = str(Path(avatar_img).resolve())
58
  wav_path = str(Path(audio_wav).resolve())
59
 
60
- # create video from image+audio
61
- still_video = make_still_video(img_path, wav_path, fps=25)
62
 
63
- # run LatentSync inference (use repo script directly)
64
- out_path = OUTPUT_DIR / f"result_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
65
 
66
- # NOTE:
67
- # LatentSync repo sometimes provides "gradio_app.py" or "predict.py" with different args.
68
- # We call the official inference entry if available.
69
- # If your build fails here, paste the Space logs and I’ll adjust to exact script/args.
70
  cmd = [
71
- "python", "predict.py",
72
- "--image_path", img_path,
 
 
73
  "--audio_path", wav_path,
74
- "--output_path", str(out_path),
 
 
 
 
75
  ]
 
 
76
 
77
- # Some LatentSync versions require video instead of image; if this fails we’ll swap
78
- # to their video-based inference script.
79
  run(cmd, cwd=str(REPO_DIR))
80
-
81
  return str(out_path)
82
 
83
- with gr.Blocks() as demo:
84
- gr.Markdown("# LatentSync (avatar.jpg + audio.wav → mp4)")
 
 
 
 
 
85
  with gr.Row():
86
- avatar = gr.Image(type="filepath", label="avatar.jpg/png")
87
- audio = gr.Audio(type="filepath", label="audio.wav", format="wav")
 
 
 
88
  btn = gr.Button("Generate")
89
- out = gr.Video(label="Output")
90
- btn.click(generate, inputs=[avatar, audio], outputs=out)
 
91
 
92
  demo.launch()
 
8
 
9
  ROOT = Path(__file__).parent.resolve()
10
  REPO_DIR = ROOT / "LatentSync"
 
 
 
11
  CKPT_DIR = REPO_DIR / "checkpoints"
12
+ TEMP_DIR = REPO_DIR / "temp"
13
 
14
+ # Use 1.5 on T4 16GB
15
  HF_CKPT_REPO = "ByteDance/LatentSync-1.5"
16
 
17
  def run(cmd, cwd=None):
 
19
  subprocess.check_call(cmd, cwd=cwd)
20
 
21
  def setup():
22
+ # Clone LatentSync repo at runtime (won't appear in HF Files tab)
23
  if not REPO_DIR.exists():
24
  run(["git", "clone", "--depth", "1", "https://github.com/bytedance/LatentSync.git", str(REPO_DIR)])
25
 
26
  CKPT_DIR.mkdir(parents=True, exist_ok=True)
27
+ TEMP_DIR.mkdir(parents=True, exist_ok=True)
28
+
29
+ # Download all checkpoint files (includes latentsync_unet + whisper tiny/small etc)
30
  snapshot_download(
31
  repo_id=HF_CKPT_REPO,
32
  local_dir=str(CKPT_DIR),
33
  local_dir_use_symlinks=False,
34
  )
35
 
36
+ def make_still_video(image_path: str, audio_path: str, fps: int = 25) -> str:
37
+ """
38
+ Create a video by looping the avatar image for the length of the audio.
39
+ LatentSync expects a VIDEO input.
40
+ """
41
  out_path = TEMP_DIR / f"still_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
42
  cmd = [
43
  "ffmpeg", "-y",
44
+ "-loop", "1", "-i", image_path,
45
  "-i", audio_path,
46
  "-shortest",
47
  "-r", str(fps),
 
54
  run(cmd)
55
  return str(out_path)
56
 
57
+ def generate(avatar_img, audio_wav, steps, guidance, seed, use_deepcache):
58
  setup()
59
 
60
  img_path = str(Path(avatar_img).resolve())
61
  wav_path = str(Path(audio_wav).resolve())
62
 
63
+ # Make a temp mp4 from the single image + audio
64
+ video_path = make_still_video(img_path, wav_path, fps=25)
65
 
66
+ out_path = TEMP_DIR / f"result_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
 
67
 
 
 
 
 
68
  cmd = [
69
+ "python", "-m", "scripts.inference",
70
+ "--unet_config_path", "configs/unet.yaml",
71
+ "--inference_ckpt_path", "checkpoints/latentsync_unet.pt",
72
+ "--video_path", video_path,
73
  "--audio_path", wav_path,
74
+ "--video_out_path", str(out_path),
75
+ "--inference_steps", str(int(steps)),
76
+ "--guidance_scale", str(float(guidance)),
77
+ "--seed", str(int(seed)),
78
+ "--temp_dir", "temp",
79
  ]
80
+ if use_deepcache:
81
+ cmd.append("--enable_deepcache")
82
 
 
 
83
  run(cmd, cwd=str(REPO_DIR))
 
84
  return str(out_path)
85
 
86
+ with gr.Blocks(title="LatentSync (avatar.jpg + audio.wav → lip-sync mp4)") as demo:
87
+ gr.Markdown("## LatentSync on Hugging Face (T4) — Upload avatar + audio → mp4")
88
+
89
+ with gr.Row():
90
+ avatar = gr.Image(type="filepath", label="Avatar image (jpg/png)")
91
+ audio = gr.Audio(type="filepath", label="Audio (wav)", format="wav")
92
+
93
  with gr.Row():
94
+ steps = gr.Slider(10, 40, value=20, step=1, label="Inference Steps")
95
+ guidance = gr.Slider(0.8, 2.0, value=1.0, step=0.1, label="Guidance Scale")
96
+ seed = gr.Number(value=1247, precision=0, label="Seed")
97
+ deepcache = gr.Checkbox(value=True, label="Enable DeepCache (faster)")
98
+
99
  btn = gr.Button("Generate")
100
+ out = gr.Video(label="Output video")
101
+
102
+ btn.click(generate, inputs=[avatar, audio, steps, guidance, seed, deepcache], outputs=out)
103
 
104
  demo.launch()