Spaces:

lulavc
/

AnimaStudio

Running on Zero

lulavc commited on Mar 5

Commit

ae3213a

1 Parent(s): 56ced0e

fix: wav shape, float dtype check, extract_audio cleanup, NaN duration, HF token for InferenceClient

Files changed (2) hide show

app.py CHANGED Viewed

@@ -129,7 +129,7 @@ def _coerce_frames(frames):
             arr = frame.cpu().float().numpy()
             if arr.ndim == 3 and arr.shape[0] in (1, 3, 4):
                 arr = arr.transpose(1, 2, 0)
-            if arr.max() <= 1.0:
                 arr = (arr * 255).clip(0, 255)
             arr = arr.astype(np.uint8)
         else:
@@ -188,6 +188,9 @@ def _run_tts(text: str, voice_ref: str | None, emotion: float, language: str = "
             audio_prompt_path=voice_ref if voice_ref else None,
             exaggeration=float(emotion),
         )
         with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
             out_path = f.name
         torchaudio.save(out_path, wav, model.sr)

             arr = frame.cpu().float().numpy()
             if arr.ndim == 3 and arr.shape[0] in (1, 3, 4):
                 arr = arr.transpose(1, 2, 0)
+            if arr.dtype.kind == 'f' and arr.max() <= 1.0:
                 arr = (arr * 255).clip(0, 255)
             arr = arr.astype(np.uint8)
         else:
             audio_prompt_path=voice_ref if voice_ref else None,
             exaggeration=float(emotion),
         )
+        # torchaudio.save requires 2-D tensor [channels, samples]
+        if wav.ndim == 1:
+            wav = wav.unsqueeze(0)
         with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
             out_path = f.name
         torchaudio.save(out_path, wav, model.sr)

dubbing.py CHANGED Viewed

@@ -69,7 +69,15 @@ def extract_audio(video_path: str) -> str:
         "-ar", "16000", "-ac", "1",
         out_path,
     ]
-    subprocess.run(cmd, check=True, timeout=60)
     return out_path
@@ -118,8 +126,9 @@ def translate(text: str, source_lang: str, target_lang: str) -> str:
     src_code = get_nllb_code(source_lang)
     tgt_code = get_nllb_code(target_lang)
-    # Client instantiated once outside the retry loop
-    client = InferenceClient()
     last_exc: Optional[Exception] = None
     for attempt in range(3):
         try:
@@ -190,6 +199,7 @@ def get_video_duration(video_path: str) -> float:
         duration = float(raw)
     except (ValueError, TypeError) as exc:
         raise ValueError(f"ffprobe returned invalid duration: {raw!r}") from exc
-    if duration < 0 or not duration:
         raise ValueError(f"ffprobe returned unusable duration: {duration}")
     return duration

         "-ar", "16000", "-ac", "1",
         out_path,
     ]
+    try:
+        subprocess.run(cmd, check=True, timeout=60)
+    except Exception:
+        if os.path.exists(out_path):
+            try:
+                os.unlink(out_path)
+            except OSError:
+                pass
+        raise
     return out_path
     src_code = get_nllb_code(source_lang)
     tgt_code = get_nllb_code(target_lang)
+    # Client instantiated once outside the retry loop; use HF token if available
+    _hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
+    client = InferenceClient(token=_hf_token if _hf_token else None)
     last_exc: Optional[Exception] = None
     for attempt in range(3):
         try:
         duration = float(raw)
     except (ValueError, TypeError) as exc:
         raise ValueError(f"ffprobe returned invalid duration: {raw!r}") from exc
+    import math
+    if not math.isfinite(duration) or duration <= 0:
         raise ValueError(f"ffprobe returned unusable duration: {duration}")
     return duration