Spaces:

pykara
/

lipsync

Sleeping

File size: 3,152 Bytes

import os
import sys
import types
import uuid

# --------------------------------------------------------------------
# Fix environment issues
# --------------------------------------------------------------------
# Fix OMP_NUM_THREADS error
os.environ["OMP_NUM_THREADS"] = "1"

# Dummy google.colab for wyn_wav2lip
google = types.ModuleType("google")
colab = types.ModuleType("google.colab")

class _DummyDrive:
    def mount(self, *args, **kwargs):
        print("google.colab.drive.mount() called (dummy).")

colab.drive = _DummyDrive()
google.colab = colab

sys.modules["google"] = google
sys.modules["google.colab"] = colab

# --------------------------------------------------------------------
# Imports
# --------------------------------------------------------------------
import gradio as gr
from wyn_wav2lip.wav2lip import Wav2Lip
import soundfile as sf

BASE_DIR = os.path.dirname(os.path.abspath(__file__))
MEDIA_DIR = os.path.join(BASE_DIR, "media")
os.makedirs(MEDIA_DIR, exist_ok=True)

print("Initialising Wav2Lip...")
wav2lip = Wav2Lip()
wav2lip.setup()
print("Wav2Lip ready.")


def run_wav2lip(image_path: str, audio_path: str) -> str:
    """Run Wav2Lip and return absolute path to created video file."""
    existing_mp4 = {
        f for f in os.listdir(MEDIA_DIR)
        if f.lower().endswith(".mp4")
    }

    old_cwd = os.getcwd()
    os.chdir(MEDIA_DIR)
    try:
        # Wav2Lip expects filenames relative to current working dir
        wav2lip.run(
            video_file=os.path.basename(image_path),
            vocal_file=os.path.basename(audio_path),
        )
    finally:
        os.chdir(old_cwd)

    new_mp4 = [
        f for f in os.listdir(MEDIA_DIR)
        if f.lower().endswith(".mp4") and f not in existing_mp4
    ]
    if not new_mp4:
        mp4_candidates = [
            os.path.join(MEDIA_DIR, f)
            for f in os.listdir(MEDIA_DIR)
            if f.lower().endswith(".mp4")
        ]
        if not mp4_candidates:
            raise RuntimeError("No MP4 created by Wav2Lip.")
        return max(mp4_candidates, key=os.path.getmtime)

    return os.path.join(MEDIA_DIR, new_mp4[0])


def lipsync_func(image, audio):
    """
    image: PIL image (from gr.Image)
    audio: (sr, data) tuple (from gr.Audio(type="numpy"))
    """
    if image is None or audio is None:
        return None

    # Save image
    img_id = uuid.uuid4().hex
    image_path = os.path.join(MEDIA_DIR, f"{img_id}.png")
    image.save(image_path)

    # Save audio
    aud_id = uuid.uuid4().hex
    audio_path = os.path.join(MEDIA_DIR, f"{aud_id}.wav")
    sr, data = audio
    sf.write(audio_path, data, sr)

    # Run Wav2Lip
    video_path = run_wav2lip(image_path, audio_path)
    return video_path


demo = gr.Interface(
    fn=lipsync_func,
    inputs=[
        gr.Image(type="pil", label="Teacher image"),
        gr.Audio(type="numpy", label="Teacher audio (.wav)")
    ],
    outputs=gr.Video(label="Lip-synced video"),
    title="Wav2Lip Lipsync Service",
    description="Upload a static teacher image and a WAV audio. The model will generate a talking video."
)

if __name__ == "__main__":
    demo.launch()