File size: 2,807 Bytes
fea5695
 
 
 
 
 
ed87501
fea5695
 
 
 
 
 
 
 
 
 
ed87501
 
 
 
fea5695
 
 
 
ed87501
 
fea5695
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import gradio as gr
import os
import subprocess
import sys

# MuseTalk Gradio Interface

def setup_musetalk():
    """Clone and setup MuseTalk repository"""
    if not os.path.exists('MuseTalk'):
        subprocess.run(['git', 'clone', 'https://github.com/TMElyralab/MuseTalk.git'], check=True)
    os.chdir('MuseTalk')
    subprocess.run([sys.executable, '-m', 'pip', 'install', '-r', 'requirements.txt'], check=True)
    
def inference(video_file, audio_file, bbox_shift=0):
    """Run MuseTalk inference"""
    try:
        # Handle both file objects and string paths
        video_path = video_file.name if hasattr(video_file, 'name') else video_file
        audio_path = audio_file.name if hasattr(audio_file, 'name') else audio_file
        
        # Run inference script
        cmd = [
            sys.executable,
            'scripts/inference/gradio_demo.py',
            '--video', video_path,
            '--audio', audio_path,
            '--bbox_shift', str(bbox_shift)
        ]
        result = subprocess.run(cmd, capture_output=True, text=True)
        
        # Return the generated video
        output_path = 'results/output.mp4'
        if os.path.exists(output_path):
            return output_path
        else:
            return None
    except Exception as e:
        return f"Error: {str(e)}"

# Create Gradio interface
with gr.Blocks(title="MuseTalk - Real-time Audio-Driven Lip Sync") as demo:
    gr.Markdown("""
    # MuseTalk: Real-Time High-Quality Lip Synchronization
    
    Upload a video and audio file to generate lip-synced output.
    
    **Note:** First run will download required model weights (~2GB).
    """)
    
    with gr.Row():
        with gr.Column():
            video_input = gr.Video(label="Input Video")
            audio_input = gr.Audio(label="Input Audio", type="filepath")
            bbox_shift = gr.Slider(minimum=-10, maximum=10, value=0, step=1, 
                                   label="BBox Shift", 
                                   info="Adjust face bounding box position")
            submit_btn = gr.Button("Generate", variant="primary")
        
        with gr.Column():
            video_output = gr.Video(label="Output Video")
    
    submit_btn.click(
        fn=inference,
        inputs=[video_input, audio_input, bbox_shift],
        outputs=video_output
    )
    
    gr.Markdown("""
    ## About MuseTalk
    
    MuseTalk generates lip-synchronized videos from input video and audio files.
    
    - [GitHub Repository](https://github.com/TMElyralab/MuseTalk)
    - [Model Weights](https://huggingface.co/TMElyralab/MuseTalk)
    """)

if __name__ == "__main__":
    # Setup MuseTalk on first run
    try:
        setup_musetalk()
    except Exception as e:
        print(f"Setup warning: {e}")
    
    demo.launch()