banao-tech commited on
Commit
400a8bd
Β·
verified Β·
1 Parent(s): 0ac9382

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -161
app.py CHANGED
@@ -1,185 +1,77 @@
1
  import os
2
- import subprocess
3
  import gradio as gr
 
4
  from pathlib import Path
5
- from datetime import datetime
6
  import shutil
7
 
8
- # Environment setup
9
- os.environ["GRADIO_TEMP_DIR"] = "/tmp/gradio"
10
- os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
11
 
12
- ROOT = Path(__file__).parent
13
- MUSETALK_DIR = ROOT / "MuseTalk"
14
- MODELS_DIR = MUSETALK_DIR / "models"
15
- RESULTS_DIR = MUSETALK_DIR / "results"
16
-
17
- def run_command(cmd, cwd=None):
18
- """Run shell command safely"""
19
  try:
20
- result = subprocess.run(
21
- cmd,
22
- shell=True,
23
- cwd=cwd,
24
- capture_output=True,
25
- text=True,
26
- check=True
27
- )
28
- print(result.stdout)
29
- return True
30
- except subprocess.CalledProcessError as e:
31
- print(f"Error: {e.stderr}")
32
- return False
33
-
34
- def setup_musetalk():
35
- """Setup MuseTalk repository and models"""
36
- if not MUSETALK_DIR.exists():
37
- print("πŸ“¦ Cloning MuseTalk...")
38
- run_command(f"git clone https://github.com/TMElyralab/MuseTalk.git {MUSETALK_DIR}")
39
-
40
- # Create necessary directories
41
- MODELS_DIR.mkdir(parents=True, exist_ok=True)
42
- RESULTS_DIR.mkdir(parents=True, exist_ok=True)
43
-
44
- # Download models if not present
45
- if not (MODELS_DIR / "musetalk").exists():
46
- print("πŸ“₯ Downloading MuseTalk models...")
47
- run_command(
48
- "huggingface-cli download TMElyralab/MuseTalk --local-dir models",
49
- cwd=MUSETALK_DIR
50
- )
51
-
52
- # Download Whisper model for audio processing
53
- if not (MODELS_DIR / "whisper").exists():
54
- print("πŸ“₯ Downloading Whisper...")
55
- run_command(
56
- "huggingface-cli download openai/whisper-tiny --local-dir models/whisper",
57
- cwd=MUSETALK_DIR
58
- )
59
-
60
- print("βœ… Setup complete!")
61
- return True
62
-
63
- def generate_video(avatar_image, audio_file, bbox_shift=0):
64
- """Generate lip-synced video using MuseTalk"""
65
- try:
66
- # Setup on first run
67
- if not MUSETALK_DIR.exists():
68
- if not setup_musetalk():
69
- return None, "❌ Setup failed"
70
-
71
- if not avatar_image or not audio_file:
72
  return None, "❌ Please upload both image and audio"
73
 
74
- # Prepare input files
75
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
76
- input_img = MUSETALK_DIR / f"input_img_{timestamp}.jpg"
77
- input_audio = MUSETALK_DIR / f"input_audio_{timestamp}.wav"
78
 
79
- shutil.copy(avatar_image, input_img)
80
- shutil.copy(audio_file, input_audio)
 
 
 
 
 
81
 
82
- # Run MuseTalk inference
83
- print("🎬 Generating lip-synced video...")
84
- output_path = RESULTS_DIR / f"output_{timestamp}.mp4"
85
 
86
- cmd = f"""python -m scripts.inference \
87
- --avatar_id "{input_img}" \
88
- --audio_path "{input_audio}" \
89
- --bbox_shift {bbox_shift} \
90
- --result_dir "{RESULTS_DIR}"
91
- """
92
 
93
- if not run_command(cmd, cwd=MUSETALK_DIR):
94
- return None, "❌ Video generation failed"
95
-
96
- # Find generated video
97
- video_files = list(RESULTS_DIR.glob(f"*{timestamp}*.mp4"))
98
- if not video_files:
99
- # Try finding any recent video
100
- video_files = sorted(RESULTS_DIR.glob("*.mp4"), key=os.path.getmtime, reverse=True)
101
-
102
- if video_files:
103
- return str(video_files[0]), "βœ… Video generated successfully!"
104
- else:
105
- return None, "❌ Output video not found"
106
-
107
  except Exception as e:
108
  return None, f"❌ Error: {str(e)}"
109
 
110
- # Gradio Interface
111
- with gr.Blocks(theme=gr.themes.Soft(), title="MuseTalk Lip Sync") as demo:
112
- gr.Markdown(
113
- """
114
- # 🎀 MuseTalk - AI Lip Sync Generator
 
 
 
 
 
 
 
 
115
 
116
- Upload a face image and audio to create realistic lip-synced videos!
 
 
 
117
 
118
- **✨ Features:**
119
- - Fast generation (~30 seconds)
120
- - High quality lip sync
121
- - Works on T4 GPU
122
- - Supports various face angles
123
- """
124
- )
125
 
126
  with gr.Row():
127
- with gr.Column(scale=1):
128
- avatar = gr.Image(
129
- type="filepath",
130
- label="πŸ“· Face Image",
131
- height=300
132
- )
133
- audio = gr.Audio(
134
- type="filepath",
135
- label="🎡 Audio File"
136
- )
137
- bbox_shift = gr.Slider(
138
- -20, 20, value=0, step=1,
139
- label="Face Alignment Adjustment",
140
- info="Adjust if face detection is off"
141
- )
142
 
143
- with gr.Column(scale=1):
144
- output_video = gr.Video(
145
- label="🎬 Generated Video",
146
- height=400
147
- )
148
- status = gr.Textbox(
149
- label="Status",
150
- interactive=False,
151
- value="Ready to generate..."
152
- )
153
-
154
- generate_btn = gr.Button(
155
- "πŸš€ Generate Lip-Synced Video",
156
- variant="primary",
157
- size="lg"
158
- )
159
 
160
- generate_btn.click(
161
- fn=generate_video,
162
- inputs=[avatar, audio, bbox_shift],
163
- outputs=[output_video, status]
164
- )
165
-
166
- gr.Markdown(
167
- """
168
- ---
169
- ### πŸ“ Tips:
170
- - Use clear, front-facing images for best results
171
- - Recommended resolution: 512x512 or higher
172
- - Audio should be clear with minimal background noise
173
- - First run downloads models (~3GB) - please wait
174
-
175
- ### ⚑ Performance:
176
- - T4 GPU: ~30-60 seconds per video
177
- - Supports videos up to 2 minutes
178
-
179
- **Powered by [MuseTalk](https://github.com/TMElyralab/MuseTalk)**
180
- """
181
- )
182
 
183
  if __name__ == "__main__":
184
- demo.queue(max_size=5)
185
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import os
 
2
  import gradio as gr
3
+ import subprocess
4
  from pathlib import Path
 
5
  import shutil
6
 
7
+ # Simple setup
8
+ ROOT = Path("/tmp/musehub")
9
+ ROOT.mkdir(exist_ok=True)
10
 
11
+ def generate_lipsync(image_path, audio_path):
12
+ """Generate lip-synced video using simple ffmpeg + ML approach"""
 
 
 
 
 
13
  try:
14
+ if not image_path or not audio_path:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  return None, "❌ Please upload both image and audio"
16
 
17
+ output = ROOT / "output.mp4"
 
 
 
18
 
19
+ # Method 1: Use video-retalking (simpler and works on T4)
20
+ cmd = [
21
+ "python", "inference.py",
22
+ "--face", image_path,
23
+ "--audio", audio_path,
24
+ "--outfile", str(output)
25
+ ]
26
 
27
+ subprocess.run(cmd, check=True, cwd="/app/video-retalking")
 
 
28
 
29
+ if output.exists():
30
+ return str(output), "βœ… Video generated!"
31
+ return None, "❌ Generation failed"
 
 
 
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  except Exception as e:
34
  return None, f"❌ Error: {str(e)}"
35
 
36
+ def setup_model():
37
+ """Download and setup video-retalking"""
38
+ repo_dir = Path("/app/video-retalking")
39
+ if repo_dir.exists():
40
+ return True
41
+
42
+ try:
43
+ # Clone repo
44
+ subprocess.run([
45
+ "git", "clone",
46
+ "https://github.com/OpenTalker/video-retalking.git",
47
+ str(repo_dir)
48
+ ], check=True)
49
 
50
+ # Download checkpoints
51
+ subprocess.run([
52
+ "bash", "scripts/download_models.sh"
53
+ ], cwd=repo_dir, check=True)
54
 
55
+ return True
56
+ except:
57
+ return False
58
+
59
+ # Gradio UI
60
+ with gr.Blocks(title="AI Lip Sync") as demo:
61
+ gr.Markdown("# 🎀 AI Lip Sync Generator\nUpload a face image and audio to create lip-synced video")
62
 
63
  with gr.Row():
64
+ with gr.Column():
65
+ image = gr.Image(type="filepath", label="Face Image")
66
+ audio = gr.Audio(type="filepath", label="Audio File")
67
+ btn = gr.Button("πŸš€ Generate", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
68
 
69
+ with gr.Column():
70
+ video = gr.Video(label="Result")
71
+ status = gr.Textbox(label="Status")
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
+ btn.click(generate_lipsync, [image, audio], [video, status])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  if __name__ == "__main__":
76
+ setup_model()
77
+ demo.launch()