Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -27,6 +27,45 @@ OUT = BASE / "ai_avatar_out"
|
|
| 27 |
WORK.mkdir(exist_ok=True, parents=True)
|
| 28 |
OUT.mkdir(exist_ok=True, parents=True)
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
# -------------------- Configuration --------------------
|
| 31 |
class AgentConfig:
|
| 32 |
def __init__(self,
|
|
@@ -183,32 +222,81 @@ def tts_20s_voice_clone(script_text: str, ref_wav: str, out_wav: str, language:
|
|
| 183 |
ensure_exact_duration(tmp, out_wav, 20.0)
|
| 184 |
return out_wav
|
| 185 |
|
| 186 |
-
# -------------------- SadTalker --------------------
|
| 187 |
def run_sadtalker(source_img: str, driven_wav: str, out_dir: str,
|
| 188 |
expr_scale: float = 1.0, pose_scale: float = 1.0, fps: int = 25) -> str:
|
| 189 |
-
"""Call SadTalker inference."""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
out_dir = str(Path(out_dir))
|
| 191 |
os.makedirs(out_dir, exist_ok=True)
|
| 192 |
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
"
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
]
|
| 205 |
-
run_cmd(args)
|
| 206 |
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
|
|
|
| 212 |
|
| 213 |
# -------------------- Final Muxing --------------------
|
| 214 |
def mux_audio_video(video_path: str, audio_wav: str, final_mp4: str, fps: int = 25):
|
|
@@ -232,6 +320,15 @@ def run_agent(video_path: str,
|
|
| 232 |
"""Main agent orchestrator function."""
|
| 233 |
logs = AgentLogs()
|
| 234 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
video_path = str(video_path)
|
| 236 |
vid_name = Path(video_path).stem
|
| 237 |
session = WORK / f"run_{uuid.uuid4().hex[:8]}_{vid_name}"
|
|
@@ -268,12 +365,12 @@ def run_agent(video_path: str,
|
|
| 268 |
grab_frame_from_video(video_path, still_img, at_sec=cfg.grab_frame_at)
|
| 269 |
logs.log(f" - Grabbed frame at {cfg.grab_frame_at}s from video.")
|
| 270 |
|
| 271 |
-
logs.log("Step 6) Run SadTalker animation...")
|
| 272 |
raw_video = run_sadtalker(still_img, tts_audio, sadtalker_out,
|
| 273 |
expr_scale=cfg.expr_scale,
|
| 274 |
pose_scale=cfg.pose_scale,
|
| 275 |
fps=cfg.fps)
|
| 276 |
-
logs.log(f" -
|
| 277 |
|
| 278 |
logs.log("Step 7) Mux final MP4 (20s, audio + avatar)...")
|
| 279 |
mux_audio_video(raw_video, tts_audio, final_mp4, fps=cfg.fps)
|
|
|
|
| 27 |
WORK.mkdir(exist_ok=True, parents=True)
|
| 28 |
OUT.mkdir(exist_ok=True, parents=True)
|
| 29 |
|
| 30 |
+
# Setup SadTalker
|
| 31 |
+
SADTALKER_DIR = BASE / "SadTalker"
|
| 32 |
+
|
| 33 |
+
def setup_sadtalker():
|
| 34 |
+
"""Setup SadTalker if not already available."""
|
| 35 |
+
if not SADTALKER_DIR.exists():
|
| 36 |
+
print("Setting up SadTalker...")
|
| 37 |
+
try:
|
| 38 |
+
# Clone SadTalker
|
| 39 |
+
subprocess.run([
|
| 40 |
+
"git", "clone", "https://github.com/OpenTalker/SadTalker.git",
|
| 41 |
+
str(SADTALKER_DIR)
|
| 42 |
+
], check=True, capture_output=True, text=True)
|
| 43 |
+
|
| 44 |
+
# Install requirements
|
| 45 |
+
requirements_path = SADTALKER_DIR / "requirements.txt"
|
| 46 |
+
if requirements_path.exists():
|
| 47 |
+
subprocess.run([
|
| 48 |
+
sys.executable, "-m", "pip", "install", "-r", str(requirements_path)
|
| 49 |
+
], check=True, capture_output=True, text=True)
|
| 50 |
+
|
| 51 |
+
# Download models
|
| 52 |
+
download_script = SADTALKER_DIR / "scripts" / "download_models.sh"
|
| 53 |
+
if download_script.exists():
|
| 54 |
+
subprocess.run([
|
| 55 |
+
"bash", str(download_script)
|
| 56 |
+
], cwd=str(SADTALKER_DIR), check=True, capture_output=True, text=True)
|
| 57 |
+
|
| 58 |
+
print("✅ SadTalker setup complete!")
|
| 59 |
+
except subprocess.CalledProcessError as e:
|
| 60 |
+
print(f"❌ SadTalker setup failed: {e}")
|
| 61 |
+
print(f"stdout: {e.stdout}")
|
| 62 |
+
print(f"stderr: {e.stderr}")
|
| 63 |
+
return False
|
| 64 |
+
return True
|
| 65 |
+
|
| 66 |
+
# Initialize SadTalker on startup
|
| 67 |
+
setup_sadtalker()
|
| 68 |
+
|
| 69 |
# -------------------- Configuration --------------------
|
| 70 |
class AgentConfig:
|
| 71 |
def __init__(self,
|
|
|
|
| 222 |
ensure_exact_duration(tmp, out_wav, 20.0)
|
| 223 |
return out_wav
|
| 224 |
|
| 225 |
+
# -------------------- SadTalker with Fallback --------------------
|
| 226 |
def run_sadtalker(source_img: str, driven_wav: str, out_dir: str,
|
| 227 |
expr_scale: float = 1.0, pose_scale: float = 1.0, fps: int = 25) -> str:
|
| 228 |
+
"""Call SadTalker inference with fallback."""
|
| 229 |
+
if not SADTALKER_DIR.exists():
|
| 230 |
+
if not setup_sadtalker():
|
| 231 |
+
return create_static_video_fallback(source_img, driven_wav, out_dir, fps)
|
| 232 |
+
|
| 233 |
out_dir = str(Path(out_dir))
|
| 234 |
os.makedirs(out_dir, exist_ok=True)
|
| 235 |
|
| 236 |
+
inference_script = SADTALKER_DIR / "inference.py"
|
| 237 |
+
if not inference_script.exists():
|
| 238 |
+
print("❌ SadTalker inference script not found, using fallback")
|
| 239 |
+
return create_static_video_fallback(source_img, driven_wav, out_dir, fps)
|
| 240 |
+
|
| 241 |
+
try:
|
| 242 |
+
args = [
|
| 243 |
+
sys.executable, str(inference_script),
|
| 244 |
+
"--driven_audio", driven_wav,
|
| 245 |
+
"--source_image", source_img,
|
| 246 |
+
"--preprocess", "full",
|
| 247 |
+
"--still",
|
| 248 |
+
"--enhancer", "gfpgan",
|
| 249 |
+
"--expression_scale", str(expr_scale),
|
| 250 |
+
"--pose_scale", str(pose_scale),
|
| 251 |
+
"--result_dir", out_dir,
|
| 252 |
+
"--fps", str(fps),
|
| 253 |
+
]
|
| 254 |
+
|
| 255 |
+
# Change to SadTalker directory for execution
|
| 256 |
+
original_cwd = os.getcwd()
|
| 257 |
+
try:
|
| 258 |
+
os.chdir(str(SADTALKER_DIR))
|
| 259 |
+
run_cmd(args)
|
| 260 |
+
finally:
|
| 261 |
+
os.chdir(original_cwd)
|
| 262 |
+
|
| 263 |
+
mp4s = sorted(glob.glob(os.path.join(out_dir, "**", "*.mp4"), recursive=True),
|
| 264 |
+
key=os.path.getmtime)
|
| 265 |
+
if not mp4s:
|
| 266 |
+
print("❌ SadTalker produced no output, using fallback")
|
| 267 |
+
return create_static_video_fallback(source_img, driven_wav, out_dir, fps)
|
| 268 |
+
return mp4s[-1]
|
| 269 |
+
|
| 270 |
+
except Exception as e:
|
| 271 |
+
print(f"❌ SadTalker failed: {e}, using fallback")
|
| 272 |
+
return create_static_video_fallback(source_img, driven_wav, out_dir, fps)
|
| 273 |
+
|
| 274 |
+
def create_static_video_fallback(source_img: str, driven_wav: str, out_dir: str, fps: int = 25) -> str:
|
| 275 |
+
"""Create a static video with the image and audio as fallback."""
|
| 276 |
+
output_path = os.path.join(out_dir, "fallback_output.mp4")
|
| 277 |
+
|
| 278 |
+
# Get audio duration
|
| 279 |
+
audio = AudioSegment.from_file(driven_wav)
|
| 280 |
+
duration = len(audio) / 1000.0 # Convert to seconds
|
| 281 |
+
|
| 282 |
+
# Create video with static image and audio
|
| 283 |
+
cmd = [
|
| 284 |
+
"ffmpeg", "-y",
|
| 285 |
+
"-loop", "1", "-i", source_img,
|
| 286 |
+
"-i", driven_wav,
|
| 287 |
+
"-c:v", "libx264", "-tune", "stillimage", "-c:a", "aac",
|
| 288 |
+
"-b:a", "192k", "-pix_fmt", "yuv420p",
|
| 289 |
+
"-shortest", "-r", str(fps),
|
| 290 |
+
"-t", str(duration),
|
| 291 |
+
output_path
|
| 292 |
]
|
|
|
|
| 293 |
|
| 294 |
+
try:
|
| 295 |
+
run_cmd(cmd)
|
| 296 |
+
print(f"✅ Created fallback static video: {output_path}")
|
| 297 |
+
return output_path
|
| 298 |
+
except Exception as e:
|
| 299 |
+
raise RuntimeError(f"Even fallback video creation failed: {e}")
|
| 300 |
|
| 301 |
# -------------------- Final Muxing --------------------
|
| 302 |
def mux_audio_video(video_path: str, audio_wav: str, final_mp4: str, fps: int = 25):
|
|
|
|
| 320 |
"""Main agent orchestrator function."""
|
| 321 |
logs = AgentLogs()
|
| 322 |
try:
|
| 323 |
+
# Check SadTalker setup first
|
| 324 |
+
logs.log("Checking SadTalker setup...")
|
| 325 |
+
if not SADTALKER_DIR.exists():
|
| 326 |
+
logs.log("Setting up SadTalker (first run may take a few minutes)...")
|
| 327 |
+
if not setup_sadtalker():
|
| 328 |
+
logs.log("⚠️ SadTalker setup failed, will use static video fallback")
|
| 329 |
+
else:
|
| 330 |
+
logs.log("✅ SadTalker ready")
|
| 331 |
+
|
| 332 |
video_path = str(video_path)
|
| 333 |
vid_name = Path(video_path).stem
|
| 334 |
session = WORK / f"run_{uuid.uuid4().hex[:8]}_{vid_name}"
|
|
|
|
| 365 |
grab_frame_from_video(video_path, still_img, at_sec=cfg.grab_frame_at)
|
| 366 |
logs.log(f" - Grabbed frame at {cfg.grab_frame_at}s from video.")
|
| 367 |
|
| 368 |
+
logs.log("Step 6) Run SadTalker animation (or fallback)...")
|
| 369 |
raw_video = run_sadtalker(still_img, tts_audio, sadtalker_out,
|
| 370 |
expr_scale=cfg.expr_scale,
|
| 371 |
pose_scale=cfg.pose_scale,
|
| 372 |
fps=cfg.fps)
|
| 373 |
+
logs.log(f" - Video output: {raw_video}")
|
| 374 |
|
| 375 |
logs.log("Step 7) Mux final MP4 (20s, audio + avatar)...")
|
| 376 |
mux_audio_video(raw_video, tts_audio, final_mp4, fps=cfg.fps)
|