Spaces:

lulavc
/

AnimaStudio

Running on Zero

lulavc commited on Mar 4

Commit

a628050

1 Parent(s): 43f8b96

fix: apply code-review and security-review findings

- ndim guard before arr.shape[2] in _coerce_frames
- _mux_video cleanup partial output on ffmpeg failure
- _run_tts cleanup temp WAV on save failure
- Generic user-facing error messages (log full trace server-side)
- Fix missing closing ) in translate error f-string
- Validate ffprobe duration output before float cast
- Add queue(max_size=10, concurrency_limit=1) for GPU endpoints

Files changed (2) hide show

app.py +38 -18
dubbing.py +8 -1

app.py CHANGED Viewed

@@ -132,7 +132,7 @@ def _coerce_frames(frames):
         if arr.ndim == 2:
             import cv2
             arr = cv2.cvtColor(arr, cv2.COLOR_GRAY2RGB)
-        elif arr.shape[2] == 4:
             arr = arr[:, :, :3]
         result.append(arr)
     return result
@@ -145,20 +145,28 @@ def _mux_video(frames, audio_path: str, fps: int = DEFAULT_FPS) -> str:
     coerced = _coerce_frames(frames)
     with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f:
         out_path = f.name
-    with tempfile.TemporaryDirectory() as tmpdir:
-        for i, arr in enumerate(coerced):
-            cv2.imwrite(os.path.join(tmpdir, f"{i:06d}.png"), cv2.cvtColor(arr, cv2.COLOR_RGB2BGR))
-        cmd = [
-            "ffmpeg", "-y", "-loglevel", "error",
-            "-framerate", str(fps),
-            "-i", os.path.join(tmpdir, "%06d.png"),
-            "-i", audio_path,
-            "-c:v", "libx264", "-preset", "fast", "-crf", "22",
-            "-c:a", "aac", "-b:a", "128k",
-            "-shortest", "-pix_fmt", "yuv420p",
-            out_path,
-        ]
-        subprocess.run(cmd, check=True, timeout=120)
     return out_path
@@ -168,6 +176,7 @@ def _run_tts(text: str, voice_ref: str | None, emotion: float, language: str = "
     model = _load_tts()
     log.info("TTS: language=%s text_len=%d emotion=%.2f", language, len(text), emotion)
     model.to("cuda")
     try:
         wav = model.generate(
             text=text.strip(),
@@ -178,6 +187,13 @@ def _run_tts(text: str, voice_ref: str | None, emotion: float, language: str = "
             out_path = f.name
         torchaudio.save(out_path, wav, model.sr)
         return out_path
     finally:
         model.to("cpu")
         torch.cuda.empty_cache()
@@ -254,7 +270,8 @@ def generate(portrait_img, input_mode: str, text: str, tts_language: str,
     except gr.Error:
         raise
     except Exception as e:
-        raise gr.Error(f"Generation failed: {str(e)[:400]}")
     finally:
         if _tts_tmp and os.path.exists(_tts_tmp):
             try:
@@ -295,7 +312,8 @@ def dub_video(video_input, target_lang: str, voice_ref, emotion: float, lang: st
             try:
                 translated_text = dubbing.translate(transcript.text, source_display, target_lang)
             except Exception as exc:
-                raise gr.Error(f"{t['err_translate']} ({str(exc)[:200]}")
         else:
             translated_text = transcript.text
@@ -319,7 +337,8 @@ def dub_video(video_input, target_lang: str, voice_ref, emotion: float, lang: st
     except gr.Error:
         raise
     except Exception as e:
-        raise gr.Error(f"Dubbing failed: {str(e)[:400]}")
     finally:
         for fp in temp_files:
             if fp and os.path.exists(fp):
@@ -536,4 +555,5 @@ with gr.Blocks(title="AnimaStudio 🎬") as demo:
 if __name__ == "__main__":
     demo.launch(theme=THEME, css=CSS, mcp_server=True)

         if arr.ndim == 2:
             import cv2
             arr = cv2.cvtColor(arr, cv2.COLOR_GRAY2RGB)
+        elif arr.ndim == 3 and arr.shape[2] == 4:
             arr = arr[:, :, :3]
         result.append(arr)
     return result
     coerced = _coerce_frames(frames)
     with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f:
         out_path = f.name
+    try:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            for i, arr in enumerate(coerced):
+                cv2.imwrite(os.path.join(tmpdir, f"{i:06d}.png"), cv2.cvtColor(arr, cv2.COLOR_RGB2BGR))
+            cmd = [
+                "ffmpeg", "-y", "-loglevel", "error",
+                "-framerate", str(fps),
+                "-i", os.path.join(tmpdir, "%06d.png"),
+                "-i", audio_path,
+                "-c:v", "libx264", "-preset", "fast", "-crf", "22",
+                "-c:a", "aac", "-b:a", "128k",
+                "-shortest", "-pix_fmt", "yuv420p",
+                out_path,
+            ]
+            subprocess.run(cmd, check=True, timeout=120)
+    except Exception:
+        if os.path.exists(out_path):
+            try:
+                os.unlink(out_path)
+            except OSError:
+                pass
+        raise
     return out_path
     model = _load_tts()
     log.info("TTS: language=%s text_len=%d emotion=%.2f", language, len(text), emotion)
     model.to("cuda")
+    out_path = None
     try:
         wav = model.generate(
             text=text.strip(),
             out_path = f.name
         torchaudio.save(out_path, wav, model.sr)
         return out_path
+    except Exception:
+        if out_path and os.path.exists(out_path):
+            try:
+                os.unlink(out_path)
+            except OSError:
+                pass
+        raise
     finally:
         model.to("cpu")
         torch.cuda.empty_cache()
     except gr.Error:
         raise
     except Exception as e:
+        log.error("Generation failed: %s", e, exc_info=True)
+        raise gr.Error("Generation failed. Please try different settings or try again.")
     finally:
         if _tts_tmp and os.path.exists(_tts_tmp):
             try:
             try:
                 translated_text = dubbing.translate(transcript.text, source_display, target_lang)
             except Exception as exc:
+                log.error("Translation failed: %s", exc, exc_info=True)
+                raise gr.Error(t["err_translate"])
         else:
             translated_text = transcript.text
     except gr.Error:
         raise
     except Exception as e:
+        log.error("Dubbing failed: %s", e, exc_info=True)
+        raise gr.Error("Dubbing failed. Please try a shorter video or different settings.")
     finally:
         for fp in temp_files:
             if fp and os.path.exists(fp):
 if __name__ == "__main__":
+    demo.queue(max_size=10, default_concurrency_limit=1)
     demo.launch(theme=THEME, css=CSS, mcp_server=True)

dubbing.py CHANGED Viewed

@@ -185,4 +185,11 @@ def get_video_duration(video_path: str) -> float:
         video_path,
     ]
     result = subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=30)
-    return float(result.stdout.strip())

         video_path,
     ]
     result = subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=30)
+    raw = result.stdout.strip()
+    try:
+        duration = float(raw)
+    except (ValueError, TypeError) as exc:
+        raise ValueError(f"ffprobe returned invalid duration: {raw!r}") from exc
+    if duration < 0 or not duration:
+        raise ValueError(f"ffprobe returned unusable duration: {duration}")
+    return duration