File size: 3,918 Bytes
346fa71
dd056d9
ebe6c13
0cd19a9
dd056d9
 
 
 
 
 
 
ebe6c13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5e0dbc
346fa71
 
 
 
ebe6c13
dd056d9
 
 
 
 
 
ebe6c13
346fa71
dd056d9
 
d5e0dbc
 
 
dd056d9
94c8c66
d5e0dbc
ebe6c13
 
 
e956e33
ebe6c13
 
d5e0dbc
ebe6c13
 
 
dd056d9
ebe6c13
346fa71
dd056d9
d5e0dbc
ebe6c13
 
 
 
 
 
d5e0dbc
ebe6c13
 
 
 
 
 
 
 
d5e0dbc
 
 
ebe6c13
d5e0dbc
ebe6c13
 
 
 
 
 
 
 
 
dd056d9
d5e0dbc
dd056d9
 
56d4c3e
dd056d9
a2cf6b9
e956e33
dd056d9
 
 
 
 
 
 
 
346fa71
 
 
 
 
 
 
dd056d9
 
 
d5e0dbc
 
 
 
 
 
dd056d9
346fa71
dd056d9
 
 
ebe6c13
dd056d9
a2cf6b9
dd056d9
400a8bd
d5e0dbc
e956e33
 
 
56d4c3e
dd056d9
e956e33
 
56d4c3e
d5e0dbc
e956e33
56d4c3e
dd056d9
e956e33
a2cf6b9
56d4c3e
dd056d9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import os
import subprocess
import threading
from pathlib import Path
import gradio as gr

REPO_DIR = Path("vrt")
SETUP_FLAG = Path("setup_done.txt")
OUTPUT_DIR = Path("outputs")
OUTPUT_DIR.mkdir(exist_ok=True)

HF_BASE = "https://huggingface.co/camenduru/video-retalking/resolve/main"

CHECKPOINT_FILES = [
    "30_net_gen.pth",
    "BFM.zip",
    "DNet.pt",
    "ENet.pth",
    "LNet.pth",
    "ParseNet-latest.pth",
    "RetinaFace-R50.pth",
    "expression.mat",
    "face3d_pretrain_epoch_20.pth",
    "GFPGANv1.3.pth",
    "GPEN-BFR-512.pth",
    "shape_predictor_68_face_landmarks.dat",
]

_setup_lock = threading.Lock()


def run(cmd, cwd=None, extra_env=None):
    env = os.environ.copy()
    if extra_env:
        env.update(extra_env)

    p = subprocess.run(
        cmd,
        cwd=str(cwd) if cwd else None,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True,
        check=False,
        env=env,
    )
    if p.returncode != 0:
        raise RuntimeError(
            f"Command failed ({p.returncode}): {' '.join(cmd)}\n\n{p.stdout}"
        )
    return p.stdout


def download_file(url: str, dest: Path):
    dest.parent.mkdir(parents=True, exist_ok=True)
    if dest.exists() and dest.stat().st_size > 0:
        return
    run(["bash", "-lc", f'curl -L -C - --retry 5 --retry-delay 2 -o "{dest}" "{url}"'])


def ensure_checkpoints():
    ckpt_dir = REPO_DIR / "checkpoints"
    ckpt_dir.mkdir(parents=True, exist_ok=True)

    for fname in CHECKPOINT_FILES:
        download_file(f"{HF_BASE}/{fname}", ckpt_dir / fname)

    # Unzip BFM.zip -> checkpoints/BFM/
    bfm_zip = ckpt_dir / "BFM.zip"
    bfm_dir = ckpt_dir / "BFM"
    if not bfm_dir.exists():
        bfm_dir.mkdir(parents=True, exist_ok=True)
        run(["unzip", "-q", str(bfm_zip), "-d", str(bfm_dir)])


def setup():
    with _setup_lock:
        if SETUP_FLAG.exists() and REPO_DIR.exists():
            return

        print("Setting up Video-Retalking...")

        if not REPO_DIR.exists():
            run(
                ["git", "clone", "https://github.com/OpenTalker/video-retalking.git", str(REPO_DIR)]
            )

        # Best effort
        try:
            run(["git", "lfs", "pull"], cwd=REPO_DIR)
        except Exception:
            pass

        ensure_checkpoints()

        SETUP_FLAG.touch()
        print("βœ… Setup complete!")


def generate(image_path, audio_path):
    if not image_path or not audio_path:
        return None, "❌ Upload both image and audio!"

    try:
        setup()

        image_path = Path(image_path).resolve()
        audio_path = Path(audio_path).resolve()

        out_path = (OUTPUT_DIR / "result.mp4").resolve()
        if out_path.exists():
            out_path.unlink()

        safe_env = {
            "OMP_NUM_THREADS": "1",
            "MKL_NUM_THREADS": "1",
            "OPENBLAS_NUM_THREADS": "1",
            "NUMEXPR_NUM_THREADS": "1",
        }

        cmd = [
            "python",
            "inference.py",
            "--face",
            str(image_path),
            "--audio",
            str(audio_path),
            "--outfile",
            str(out_path),
        ]
        run(cmd, cwd=REPO_DIR, extra_env=safe_env)

        if out_path.exists():
            return str(out_path), "βœ… Video generated successfully!"
        return None, "❌ Failed (no output file created)."

    except Exception as e:
        return None, f"❌ Error: {e}"


demo = gr.Interface(
    fn=generate,
    inputs=[
        gr.Image(type="filepath", label="πŸ“· Face Image"),
        gr.Audio(type="filepath", label="🎡 Audio File"),
    ],
    outputs=[
        gr.Video(label="πŸ“Ή Generated Video"),
        gr.Textbox(label="Status", lines=6),
    ],
    title="🎬 Video-Retalking Lip Sync",
    description="Upload a face image and audio to generate a lip-synced video.",
)

if __name__ == "__main__":
    demo.launch()