Spaces:

Ali-Raza-167
/

Ai-Avatar-RankSol

Build error

App Files Files Community

Ali-Raza-167 commited on Sep 22, 2025

Commit

9d84b0f

verified ·

1 Parent(s): 48d9d30

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -21

app.py CHANGED Viewed

@@ -27,6 +27,45 @@ OUT = BASE / "ai_avatar_out"
 WORK.mkdir(exist_ok=True, parents=True)
 OUT.mkdir(exist_ok=True, parents=True)
 # -------------------- Configuration --------------------
 class AgentConfig:
     def __init__(self,
@@ -183,32 +222,81 @@ def tts_20s_voice_clone(script_text: str, ref_wav: str, out_wav: str, language:
     ensure_exact_duration(tmp, out_wav, 20.0)
     return out_wav
-# -------------------- SadTalker --------------------
 def run_sadtalker(source_img: str, driven_wav: str, out_dir: str,
                  expr_scale: float = 1.0, pose_scale: float = 1.0, fps: int = 25) -> str:
-    """Call SadTalker inference."""
     out_dir = str(Path(out_dir))
     os.makedirs(out_dir, exist_ok=True)
-    args = [
-        "python", "SadTalker/inference.py",
-        "--driven_audio", driven_wav,
-        "--source_image", source_img,
-        "--preprocess", "full",
-        "--still",
-        "--enhancer", "gfpgan",
-        "--expression_scale", str(expr_scale),
-        "--pose_scale", str(pose_scale),
-        "--result_dir", out_dir,
-        "--fps", str(fps),
     ]
-    run_cmd(args)
-    mp4s = sorted(glob.glob(os.path.join(out_dir, "**", "*.mp4"), recursive=True),
-                  key=os.path.getmtime)
-    if not mp4s:
-        raise RuntimeError("SadTalker did not produce an MP4.")
-    return mp4s[-1]
 # -------------------- Final Muxing --------------------
 def mux_audio_video(video_path: str, audio_wav: str, final_mp4: str, fps: int = 25):
@@ -232,6 +320,15 @@ def run_agent(video_path: str,
     """Main agent orchestrator function."""
     logs = AgentLogs()
     try:
         video_path = str(video_path)
         vid_name = Path(video_path).stem
         session = WORK / f"run_{uuid.uuid4().hex[:8]}_{vid_name}"
@@ -268,12 +365,12 @@ def run_agent(video_path: str,
             grab_frame_from_video(video_path, still_img, at_sec=cfg.grab_frame_at)
             logs.log(f"  - Grabbed frame at {cfg.grab_frame_at}s from video.")
-        logs.log("Step 6) Run SadTalker animation...")
         raw_video = run_sadtalker(still_img, tts_audio, sadtalker_out,
                                  expr_scale=cfg.expr_scale,
                                  pose_scale=cfg.pose_scale,
                                  fps=cfg.fps)
-        logs.log(f"  - SadTalker output: {raw_video}")
         logs.log("Step 7) Mux final MP4 (20s, audio + avatar)...")
         mux_audio_video(raw_video, tts_audio, final_mp4, fps=cfg.fps)

 WORK.mkdir(exist_ok=True, parents=True)
 OUT.mkdir(exist_ok=True, parents=True)
+# Setup SadTalker
+SADTALKER_DIR = BASE / "SadTalker"
+def setup_sadtalker():
+    """Setup SadTalker if not already available."""
+    if not SADTALKER_DIR.exists():
+        print("Setting up SadTalker...")
+        try:
+            # Clone SadTalker
+            subprocess.run([
+                "git", "clone", "https://github.com/OpenTalker/SadTalker.git",
+                str(SADTALKER_DIR)
+            ], check=True, capture_output=True, text=True)
+            # Install requirements
+            requirements_path = SADTALKER_DIR / "requirements.txt"
+            if requirements_path.exists():
+                subprocess.run([
+                    sys.executable, "-m", "pip", "install", "-r", str(requirements_path)
+                ], check=True, capture_output=True, text=True)
+            # Download models
+            download_script = SADTALKER_DIR / "scripts" / "download_models.sh"
+            if download_script.exists():
+                subprocess.run([
+                    "bash", str(download_script)
+                ], cwd=str(SADTALKER_DIR), check=True, capture_output=True, text=True)
+            print("✅ SadTalker setup complete!")
+        except subprocess.CalledProcessError as e:
+            print(f"❌ SadTalker setup failed: {e}")
+            print(f"stdout: {e.stdout}")
+            print(f"stderr: {e.stderr}")
+            return False
+    return True
+# Initialize SadTalker on startup
+setup_sadtalker()
 # -------------------- Configuration --------------------
 class AgentConfig:
     def __init__(self,
     ensure_exact_duration(tmp, out_wav, 20.0)
     return out_wav
+# -------------------- SadTalker with Fallback --------------------
 def run_sadtalker(source_img: str, driven_wav: str, out_dir: str,
                  expr_scale: float = 1.0, pose_scale: float = 1.0, fps: int = 25) -> str:
+    """Call SadTalker inference with fallback."""
+    if not SADTALKER_DIR.exists():
+        if not setup_sadtalker():
+            return create_static_video_fallback(source_img, driven_wav, out_dir, fps)
     out_dir = str(Path(out_dir))
     os.makedirs(out_dir, exist_ok=True)
+    inference_script = SADTALKER_DIR / "inference.py"
+    if not inference_script.exists():
+        print("❌ SadTalker inference script not found, using fallback")
+        return create_static_video_fallback(source_img, driven_wav, out_dir, fps)
+    try:
+        args = [
+            sys.executable, str(inference_script),
+            "--driven_audio", driven_wav,
+            "--source_image", source_img,
+            "--preprocess", "full",
+            "--still",
+            "--enhancer", "gfpgan",
+            "--expression_scale", str(expr_scale),
+            "--pose_scale", str(pose_scale),
+            "--result_dir", out_dir,
+            "--fps", str(fps),
+        ]
+        # Change to SadTalker directory for execution
+        original_cwd = os.getcwd()
+        try:
+            os.chdir(str(SADTALKER_DIR))
+            run_cmd(args)
+        finally:
+            os.chdir(original_cwd)
+        mp4s = sorted(glob.glob(os.path.join(out_dir, "**", "*.mp4"), recursive=True),
+                      key=os.path.getmtime)
+        if not mp4s:
+            print("❌ SadTalker produced no output, using fallback")
+            return create_static_video_fallback(source_img, driven_wav, out_dir, fps)
+        return mp4s[-1]
+    except Exception as e:
+        print(f"❌ SadTalker failed: {e}, using fallback")
+        return create_static_video_fallback(source_img, driven_wav, out_dir, fps)
+def create_static_video_fallback(source_img: str, driven_wav: str, out_dir: str, fps: int = 25) -> str:
+    """Create a static video with the image and audio as fallback."""
+    output_path = os.path.join(out_dir, "fallback_output.mp4")
+    # Get audio duration
+    audio = AudioSegment.from_file(driven_wav)
+    duration = len(audio) / 1000.0  # Convert to seconds
+    # Create video with static image and audio
+    cmd = [
+        "ffmpeg", "-y",
+        "-loop", "1", "-i", source_img,
+        "-i", driven_wav,
+        "-c:v", "libx264", "-tune", "stillimage", "-c:a", "aac",
+        "-b:a", "192k", "-pix_fmt", "yuv420p",
+        "-shortest", "-r", str(fps),
+        "-t", str(duration),
+        output_path
     ]
+    try:
+        run_cmd(cmd)
+        print(f"✅ Created fallback static video: {output_path}")
+        return output_path
+    except Exception as e:
+        raise RuntimeError(f"Even fallback video creation failed: {e}")
 # -------------------- Final Muxing --------------------
 def mux_audio_video(video_path: str, audio_wav: str, final_mp4: str, fps: int = 25):
     """Main agent orchestrator function."""
     logs = AgentLogs()
     try:
+        # Check SadTalker setup first
+        logs.log("Checking SadTalker setup...")
+        if not SADTALKER_DIR.exists():
+            logs.log("Setting up SadTalker (first run may take a few minutes)...")
+            if not setup_sadtalker():
+                logs.log("⚠️  SadTalker setup failed, will use static video fallback")
+        else:
+            logs.log("✅ SadTalker ready")
         video_path = str(video_path)
         vid_name = Path(video_path).stem
         session = WORK / f"run_{uuid.uuid4().hex[:8]}_{vid_name}"
             grab_frame_from_video(video_path, still_img, at_sec=cfg.grab_frame_at)
             logs.log(f"  - Grabbed frame at {cfg.grab_frame_at}s from video.")
+        logs.log("Step 6) Run SadTalker animation (or fallback)...")
         raw_video = run_sadtalker(still_img, tts_audio, sadtalker_out,
                                  expr_scale=cfg.expr_scale,
                                  pose_scale=cfg.pose_scale,
                                  fps=cfg.fps)
+        logs.log(f"  - Video output: {raw_video}")
         logs.log("Step 7) Mux final MP4 (20s, audio + avatar)...")
         mux_audio_video(raw_video, tts_audio, final_mp4, fps=cfg.fps)