Ali-Raza-167 commited on
Commit
9d84b0f
·
verified ·
1 Parent(s): 48d9d30

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -21
app.py CHANGED
@@ -27,6 +27,45 @@ OUT = BASE / "ai_avatar_out"
27
  WORK.mkdir(exist_ok=True, parents=True)
28
  OUT.mkdir(exist_ok=True, parents=True)
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  # -------------------- Configuration --------------------
31
  class AgentConfig:
32
  def __init__(self,
@@ -183,32 +222,81 @@ def tts_20s_voice_clone(script_text: str, ref_wav: str, out_wav: str, language:
183
  ensure_exact_duration(tmp, out_wav, 20.0)
184
  return out_wav
185
 
186
- # -------------------- SadTalker --------------------
187
  def run_sadtalker(source_img: str, driven_wav: str, out_dir: str,
188
  expr_scale: float = 1.0, pose_scale: float = 1.0, fps: int = 25) -> str:
189
- """Call SadTalker inference."""
 
 
 
 
190
  out_dir = str(Path(out_dir))
191
  os.makedirs(out_dir, exist_ok=True)
192
 
193
- args = [
194
- "python", "SadTalker/inference.py",
195
- "--driven_audio", driven_wav,
196
- "--source_image", source_img,
197
- "--preprocess", "full",
198
- "--still",
199
- "--enhancer", "gfpgan",
200
- "--expression_scale", str(expr_scale),
201
- "--pose_scale", str(pose_scale),
202
- "--result_dir", out_dir,
203
- "--fps", str(fps),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  ]
205
- run_cmd(args)
206
 
207
- mp4s = sorted(glob.glob(os.path.join(out_dir, "**", "*.mp4"), recursive=True),
208
- key=os.path.getmtime)
209
- if not mp4s:
210
- raise RuntimeError("SadTalker did not produce an MP4.")
211
- return mp4s[-1]
 
212
 
213
  # -------------------- Final Muxing --------------------
214
  def mux_audio_video(video_path: str, audio_wav: str, final_mp4: str, fps: int = 25):
@@ -232,6 +320,15 @@ def run_agent(video_path: str,
232
  """Main agent orchestrator function."""
233
  logs = AgentLogs()
234
  try:
 
 
 
 
 
 
 
 
 
235
  video_path = str(video_path)
236
  vid_name = Path(video_path).stem
237
  session = WORK / f"run_{uuid.uuid4().hex[:8]}_{vid_name}"
@@ -268,12 +365,12 @@ def run_agent(video_path: str,
268
  grab_frame_from_video(video_path, still_img, at_sec=cfg.grab_frame_at)
269
  logs.log(f" - Grabbed frame at {cfg.grab_frame_at}s from video.")
270
 
271
- logs.log("Step 6) Run SadTalker animation...")
272
  raw_video = run_sadtalker(still_img, tts_audio, sadtalker_out,
273
  expr_scale=cfg.expr_scale,
274
  pose_scale=cfg.pose_scale,
275
  fps=cfg.fps)
276
- logs.log(f" - SadTalker output: {raw_video}")
277
 
278
  logs.log("Step 7) Mux final MP4 (20s, audio + avatar)...")
279
  mux_audio_video(raw_video, tts_audio, final_mp4, fps=cfg.fps)
 
27
  WORK.mkdir(exist_ok=True, parents=True)
28
  OUT.mkdir(exist_ok=True, parents=True)
29
 
30
+ # Setup SadTalker
31
+ SADTALKER_DIR = BASE / "SadTalker"
32
+
33
+ def setup_sadtalker():
34
+ """Setup SadTalker if not already available."""
35
+ if not SADTALKER_DIR.exists():
36
+ print("Setting up SadTalker...")
37
+ try:
38
+ # Clone SadTalker
39
+ subprocess.run([
40
+ "git", "clone", "https://github.com/OpenTalker/SadTalker.git",
41
+ str(SADTALKER_DIR)
42
+ ], check=True, capture_output=True, text=True)
43
+
44
+ # Install requirements
45
+ requirements_path = SADTALKER_DIR / "requirements.txt"
46
+ if requirements_path.exists():
47
+ subprocess.run([
48
+ sys.executable, "-m", "pip", "install", "-r", str(requirements_path)
49
+ ], check=True, capture_output=True, text=True)
50
+
51
+ # Download models
52
+ download_script = SADTALKER_DIR / "scripts" / "download_models.sh"
53
+ if download_script.exists():
54
+ subprocess.run([
55
+ "bash", str(download_script)
56
+ ], cwd=str(SADTALKER_DIR), check=True, capture_output=True, text=True)
57
+
58
+ print("✅ SadTalker setup complete!")
59
+ except subprocess.CalledProcessError as e:
60
+ print(f"❌ SadTalker setup failed: {e}")
61
+ print(f"stdout: {e.stdout}")
62
+ print(f"stderr: {e.stderr}")
63
+ return False
64
+ return True
65
+
66
+ # Initialize SadTalker on startup
67
+ setup_sadtalker()
68
+
69
  # -------------------- Configuration --------------------
70
  class AgentConfig:
71
  def __init__(self,
 
222
  ensure_exact_duration(tmp, out_wav, 20.0)
223
  return out_wav
224
 
225
+ # -------------------- SadTalker with Fallback --------------------
226
  def run_sadtalker(source_img: str, driven_wav: str, out_dir: str,
227
  expr_scale: float = 1.0, pose_scale: float = 1.0, fps: int = 25) -> str:
228
+ """Call SadTalker inference with fallback."""
229
+ if not SADTALKER_DIR.exists():
230
+ if not setup_sadtalker():
231
+ return create_static_video_fallback(source_img, driven_wav, out_dir, fps)
232
+
233
  out_dir = str(Path(out_dir))
234
  os.makedirs(out_dir, exist_ok=True)
235
 
236
+ inference_script = SADTALKER_DIR / "inference.py"
237
+ if not inference_script.exists():
238
+ print(" SadTalker inference script not found, using fallback")
239
+ return create_static_video_fallback(source_img, driven_wav, out_dir, fps)
240
+
241
+ try:
242
+ args = [
243
+ sys.executable, str(inference_script),
244
+ "--driven_audio", driven_wav,
245
+ "--source_image", source_img,
246
+ "--preprocess", "full",
247
+ "--still",
248
+ "--enhancer", "gfpgan",
249
+ "--expression_scale", str(expr_scale),
250
+ "--pose_scale", str(pose_scale),
251
+ "--result_dir", out_dir,
252
+ "--fps", str(fps),
253
+ ]
254
+
255
+ # Change to SadTalker directory for execution
256
+ original_cwd = os.getcwd()
257
+ try:
258
+ os.chdir(str(SADTALKER_DIR))
259
+ run_cmd(args)
260
+ finally:
261
+ os.chdir(original_cwd)
262
+
263
+ mp4s = sorted(glob.glob(os.path.join(out_dir, "**", "*.mp4"), recursive=True),
264
+ key=os.path.getmtime)
265
+ if not mp4s:
266
+ print("❌ SadTalker produced no output, using fallback")
267
+ return create_static_video_fallback(source_img, driven_wav, out_dir, fps)
268
+ return mp4s[-1]
269
+
270
+ except Exception as e:
271
+ print(f"❌ SadTalker failed: {e}, using fallback")
272
+ return create_static_video_fallback(source_img, driven_wav, out_dir, fps)
273
+
274
+ def create_static_video_fallback(source_img: str, driven_wav: str, out_dir: str, fps: int = 25) -> str:
275
+ """Create a static video with the image and audio as fallback."""
276
+ output_path = os.path.join(out_dir, "fallback_output.mp4")
277
+
278
+ # Get audio duration
279
+ audio = AudioSegment.from_file(driven_wav)
280
+ duration = len(audio) / 1000.0 # Convert to seconds
281
+
282
+ # Create video with static image and audio
283
+ cmd = [
284
+ "ffmpeg", "-y",
285
+ "-loop", "1", "-i", source_img,
286
+ "-i", driven_wav,
287
+ "-c:v", "libx264", "-tune", "stillimage", "-c:a", "aac",
288
+ "-b:a", "192k", "-pix_fmt", "yuv420p",
289
+ "-shortest", "-r", str(fps),
290
+ "-t", str(duration),
291
+ output_path
292
  ]
 
293
 
294
+ try:
295
+ run_cmd(cmd)
296
+ print(f"✅ Created fallback static video: {output_path}")
297
+ return output_path
298
+ except Exception as e:
299
+ raise RuntimeError(f"Even fallback video creation failed: {e}")
300
 
301
  # -------------------- Final Muxing --------------------
302
  def mux_audio_video(video_path: str, audio_wav: str, final_mp4: str, fps: int = 25):
 
320
  """Main agent orchestrator function."""
321
  logs = AgentLogs()
322
  try:
323
+ # Check SadTalker setup first
324
+ logs.log("Checking SadTalker setup...")
325
+ if not SADTALKER_DIR.exists():
326
+ logs.log("Setting up SadTalker (first run may take a few minutes)...")
327
+ if not setup_sadtalker():
328
+ logs.log("⚠️ SadTalker setup failed, will use static video fallback")
329
+ else:
330
+ logs.log("✅ SadTalker ready")
331
+
332
  video_path = str(video_path)
333
  vid_name = Path(video_path).stem
334
  session = WORK / f"run_{uuid.uuid4().hex[:8]}_{vid_name}"
 
365
  grab_frame_from_video(video_path, still_img, at_sec=cfg.grab_frame_at)
366
  logs.log(f" - Grabbed frame at {cfg.grab_frame_at}s from video.")
367
 
368
+ logs.log("Step 6) Run SadTalker animation (or fallback)...")
369
  raw_video = run_sadtalker(still_img, tts_audio, sadtalker_out,
370
  expr_scale=cfg.expr_scale,
371
  pose_scale=cfg.pose_scale,
372
  fps=cfg.fps)
373
+ logs.log(f" - Video output: {raw_video}")
374
 
375
  logs.log("Step 7) Mux final MP4 (20s, audio + avatar)...")
376
  mux_audio_video(raw_video, tts_audio, final_mp4, fps=cfg.fps)