import os import subprocess import threading from pathlib import Path import gradio as gr REPO_DIR = Path("vrt") SETUP_FLAG = Path("setup_done.txt") OUTPUT_DIR = Path("outputs") OUTPUT_DIR.mkdir(exist_ok=True) HF_BASE = "https://huggingface.co/camenduru/video-retalking/resolve/main" CHECKPOINT_FILES = [ "30_net_gen.pth", "BFM.zip", "DNet.pt", "ENet.pth", "LNet.pth", "ParseNet-latest.pth", "RetinaFace-R50.pth", "expression.mat", "face3d_pretrain_epoch_20.pth", "GFPGANv1.3.pth", "GPEN-BFR-512.pth", "shape_predictor_68_face_landmarks.dat", ] _setup_lock = threading.Lock() def run(cmd, cwd=None, extra_env=None): env = os.environ.copy() if extra_env: env.update(extra_env) p = subprocess.run( cmd, cwd=str(cwd) if cwd else None, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, check=False, env=env, ) if p.returncode != 0: raise RuntimeError( f"Command failed ({p.returncode}): {' '.join(cmd)}\n\n{p.stdout}" ) return p.stdout def download_file(url: str, dest: Path): dest.parent.mkdir(parents=True, exist_ok=True) if dest.exists() and dest.stat().st_size > 0: return run(["bash", "-lc", f'curl -L -C - --retry 5 --retry-delay 2 -o "{dest}" "{url}"']) def ensure_checkpoints(): ckpt_dir = REPO_DIR / "checkpoints" ckpt_dir.mkdir(parents=True, exist_ok=True) for fname in CHECKPOINT_FILES: download_file(f"{HF_BASE}/{fname}", ckpt_dir / fname) # Unzip BFM.zip -> checkpoints/BFM/ bfm_zip = ckpt_dir / "BFM.zip" bfm_dir = ckpt_dir / "BFM" if not bfm_dir.exists(): bfm_dir.mkdir(parents=True, exist_ok=True) run(["unzip", "-q", str(bfm_zip), "-d", str(bfm_dir)]) def setup(): with _setup_lock: if SETUP_FLAG.exists() and REPO_DIR.exists(): return print("Setting up Video-Retalking...") if not REPO_DIR.exists(): run( ["git", "clone", "https://github.com/OpenTalker/video-retalking.git", str(REPO_DIR)] ) # Best effort try: run(["git", "lfs", "pull"], cwd=REPO_DIR) except Exception: pass ensure_checkpoints() SETUP_FLAG.touch() print("✅ Setup complete!") def generate(image_path, audio_path): if not image_path or not audio_path: return None, "❌ Upload both image and audio!" try: setup() image_path = Path(image_path).resolve() audio_path = Path(audio_path).resolve() out_path = (OUTPUT_DIR / "result.mp4").resolve() if out_path.exists(): out_path.unlink() safe_env = { "OMP_NUM_THREADS": "1", "MKL_NUM_THREADS": "1", "OPENBLAS_NUM_THREADS": "1", "NUMEXPR_NUM_THREADS": "1", } cmd = [ "python", "inference.py", "--face", str(image_path), "--audio", str(audio_path), "--outfile", str(out_path), ] run(cmd, cwd=REPO_DIR, extra_env=safe_env) if out_path.exists(): return str(out_path), "✅ Video generated successfully!" return None, "❌ Failed (no output file created)." except Exception as e: return None, f"❌ Error: {e}" demo = gr.Interface( fn=generate, inputs=[ gr.Image(type="filepath", label="📷 Face Image"), gr.Audio(type="filepath", label="🎵 Audio File"), ], outputs=[ gr.Video(label="📹 Generated Video"), gr.Textbox(label="Status", lines=6), ], title="🎬 Video-Retalking Lip Sync", description="Upload a face image and audio to generate a lip-synced video.", ) if __name__ == "__main__": demo.launch()