banao-tech commited on
Commit
a2cf6b9
Β·
verified Β·
1 Parent(s): edc6454

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +185 -0
app.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ import gradio as gr
4
+ from pathlib import Path
5
+ from datetime import datetime
6
+ import shutil
7
+
8
+ # Environment setup
9
+ os.environ["GRADIO_TEMP_DIR"] = "/tmp/gradio"
10
+ os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
11
+
12
+ ROOT = Path(__file__).parent
13
+ MUSETALK_DIR = ROOT / "MuseTalk"
14
+ MODELS_DIR = MUSETALK_DIR / "models"
15
+ RESULTS_DIR = MUSETALK_DIR / "results"
16
+
17
+ def run_command(cmd, cwd=None):
18
+ """Run shell command safely"""
19
+ try:
20
+ result = subprocess.run(
21
+ cmd,
22
+ shell=True,
23
+ cwd=cwd,
24
+ capture_output=True,
25
+ text=True,
26
+ check=True
27
+ )
28
+ print(result.stdout)
29
+ return True
30
+ except subprocess.CalledProcessError as e:
31
+ print(f"Error: {e.stderr}")
32
+ return False
33
+
34
+ def setup_musetalk():
35
+ """Setup MuseTalk repository and models"""
36
+ if not MUSETALK_DIR.exists():
37
+ print("πŸ“¦ Cloning MuseTalk...")
38
+ run_command(f"git clone https://github.com/TMElyralab/MuseTalk.git {MUSETALK_DIR}")
39
+
40
+ # Create necessary directories
41
+ MODELS_DIR.mkdir(parents=True, exist_ok=True)
42
+ RESULTS_DIR.mkdir(parents=True, exist_ok=True)
43
+
44
+ # Download models if not present
45
+ if not (MODELS_DIR / "musetalk").exists():
46
+ print("πŸ“₯ Downloading MuseTalk models...")
47
+ run_command(
48
+ "huggingface-cli download TMElyralab/MuseTalk --local-dir models",
49
+ cwd=MUSETALK_DIR
50
+ )
51
+
52
+ # Download Whisper model for audio processing
53
+ if not (MODELS_DIR / "whisper").exists():
54
+ print("πŸ“₯ Downloading Whisper...")
55
+ run_command(
56
+ "huggingface-cli download openai/whisper-tiny --local-dir models/whisper",
57
+ cwd=MUSETALK_DIR
58
+ )
59
+
60
+ print("βœ… Setup complete!")
61
+ return True
62
+
63
+ def generate_video(avatar_image, audio_file, bbox_shift=0):
64
+ """Generate lip-synced video using MuseTalk"""
65
+ try:
66
+ # Setup on first run
67
+ if not MUSETALK_DIR.exists():
68
+ if not setup_musetalk():
69
+ return None, "❌ Setup failed"
70
+
71
+ if not avatar_image or not audio_file:
72
+ return None, "❌ Please upload both image and audio"
73
+
74
+ # Prepare input files
75
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
76
+ input_img = MUSETALK_DIR / f"input_img_{timestamp}.jpg"
77
+ input_audio = MUSETALK_DIR / f"input_audio_{timestamp}.wav"
78
+
79
+ shutil.copy(avatar_image, input_img)
80
+ shutil.copy(audio_file, input_audio)
81
+
82
+ # Run MuseTalk inference
83
+ print("🎬 Generating lip-synced video...")
84
+ output_path = RESULTS_DIR / f"output_{timestamp}.mp4"
85
+
86
+ cmd = f"""python -m scripts.inference \
87
+ --avatar_id "{input_img}" \
88
+ --audio_path "{input_audio}" \
89
+ --bbox_shift {bbox_shift} \
90
+ --result_dir "{RESULTS_DIR}"
91
+ """
92
+
93
+ if not run_command(cmd, cwd=MUSETALK_DIR):
94
+ return None, "❌ Video generation failed"
95
+
96
+ # Find generated video
97
+ video_files = list(RESULTS_DIR.glob(f"*{timestamp}*.mp4"))
98
+ if not video_files:
99
+ # Try finding any recent video
100
+ video_files = sorted(RESULTS_DIR.glob("*.mp4"), key=os.path.getmtime, reverse=True)
101
+
102
+ if video_files:
103
+ return str(video_files[0]), "βœ… Video generated successfully!"
104
+ else:
105
+ return None, "❌ Output video not found"
106
+
107
+ except Exception as e:
108
+ return None, f"❌ Error: {str(e)}"
109
+
110
+ # Gradio Interface
111
+ with gr.Blocks(theme=gr.themes.Soft(), title="MuseTalk Lip Sync") as demo:
112
+ gr.Markdown(
113
+ """
114
+ # 🎀 MuseTalk - AI Lip Sync Generator
115
+
116
+ Upload a face image and audio to create realistic lip-synced videos!
117
+
118
+ **✨ Features:**
119
+ - Fast generation (~30 seconds)
120
+ - High quality lip sync
121
+ - Works on T4 GPU
122
+ - Supports various face angles
123
+ """
124
+ )
125
+
126
+ with gr.Row():
127
+ with gr.Column(scale=1):
128
+ avatar = gr.Image(
129
+ type="filepath",
130
+ label="πŸ“· Face Image",
131
+ height=300
132
+ )
133
+ audio = gr.Audio(
134
+ type="filepath",
135
+ label="🎡 Audio File"
136
+ )
137
+ bbox_shift = gr.Slider(
138
+ -20, 20, value=0, step=1,
139
+ label="Face Alignment Adjustment",
140
+ info="Adjust if face detection is off"
141
+ )
142
+
143
+ with gr.Column(scale=1):
144
+ output_video = gr.Video(
145
+ label="🎬 Generated Video",
146
+ height=400
147
+ )
148
+ status = gr.Textbox(
149
+ label="Status",
150
+ interactive=False,
151
+ value="Ready to generate..."
152
+ )
153
+
154
+ generate_btn = gr.Button(
155
+ "πŸš€ Generate Lip-Synced Video",
156
+ variant="primary",
157
+ size="lg"
158
+ )
159
+
160
+ generate_btn.click(
161
+ fn=generate_video,
162
+ inputs=[avatar, audio, bbox_shift],
163
+ outputs=[output_video, status]
164
+ )
165
+
166
+ gr.Markdown(
167
+ """
168
+ ---
169
+ ### πŸ“ Tips:
170
+ - Use clear, front-facing images for best results
171
+ - Recommended resolution: 512x512 or higher
172
+ - Audio should be clear with minimal background noise
173
+ - First run downloads models (~3GB) - please wait
174
+
175
+ ### ⚑ Performance:
176
+ - T4 GPU: ~30-60 seconds per video
177
+ - Supports videos up to 2 minutes
178
+
179
+ **Powered by [MuseTalk](https://github.com/TMElyralab/MuseTalk)**
180
+ """
181
+ )
182
+
183
+ if __name__ == "__main__":
184
+ demo.queue(max_size=5)
185
+ demo.launch(server_name="0.0.0.0", server_port=7860)