lulavc commited on
Commit
ae3213a
·
1 Parent(s): 56ced0e

fix: wav shape, float dtype check, extract_audio cleanup, NaN duration, HF token for InferenceClient

Browse files
Files changed (2) hide show
  1. app.py +4 -1
  2. dubbing.py +14 -4
app.py CHANGED
@@ -129,7 +129,7 @@ def _coerce_frames(frames):
129
  arr = frame.cpu().float().numpy()
130
  if arr.ndim == 3 and arr.shape[0] in (1, 3, 4):
131
  arr = arr.transpose(1, 2, 0)
132
- if arr.max() <= 1.0:
133
  arr = (arr * 255).clip(0, 255)
134
  arr = arr.astype(np.uint8)
135
  else:
@@ -188,6 +188,9 @@ def _run_tts(text: str, voice_ref: str | None, emotion: float, language: str = "
188
  audio_prompt_path=voice_ref if voice_ref else None,
189
  exaggeration=float(emotion),
190
  )
 
 
 
191
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
192
  out_path = f.name
193
  torchaudio.save(out_path, wav, model.sr)
 
129
  arr = frame.cpu().float().numpy()
130
  if arr.ndim == 3 and arr.shape[0] in (1, 3, 4):
131
  arr = arr.transpose(1, 2, 0)
132
+ if arr.dtype.kind == 'f' and arr.max() <= 1.0:
133
  arr = (arr * 255).clip(0, 255)
134
  arr = arr.astype(np.uint8)
135
  else:
 
188
  audio_prompt_path=voice_ref if voice_ref else None,
189
  exaggeration=float(emotion),
190
  )
191
+ # torchaudio.save requires 2-D tensor [channels, samples]
192
+ if wav.ndim == 1:
193
+ wav = wav.unsqueeze(0)
194
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
195
  out_path = f.name
196
  torchaudio.save(out_path, wav, model.sr)
dubbing.py CHANGED
@@ -69,7 +69,15 @@ def extract_audio(video_path: str) -> str:
69
  "-ar", "16000", "-ac", "1",
70
  out_path,
71
  ]
72
- subprocess.run(cmd, check=True, timeout=60)
 
 
 
 
 
 
 
 
73
  return out_path
74
 
75
 
@@ -118,8 +126,9 @@ def translate(text: str, source_lang: str, target_lang: str) -> str:
118
  src_code = get_nllb_code(source_lang)
119
  tgt_code = get_nllb_code(target_lang)
120
 
121
- # Client instantiated once outside the retry loop
122
- client = InferenceClient()
 
123
  last_exc: Optional[Exception] = None
124
  for attempt in range(3):
125
  try:
@@ -190,6 +199,7 @@ def get_video_duration(video_path: str) -> float:
190
  duration = float(raw)
191
  except (ValueError, TypeError) as exc:
192
  raise ValueError(f"ffprobe returned invalid duration: {raw!r}") from exc
193
- if duration < 0 or not duration:
 
194
  raise ValueError(f"ffprobe returned unusable duration: {duration}")
195
  return duration
 
69
  "-ar", "16000", "-ac", "1",
70
  out_path,
71
  ]
72
+ try:
73
+ subprocess.run(cmd, check=True, timeout=60)
74
+ except Exception:
75
+ if os.path.exists(out_path):
76
+ try:
77
+ os.unlink(out_path)
78
+ except OSError:
79
+ pass
80
+ raise
81
  return out_path
82
 
83
 
 
126
  src_code = get_nllb_code(source_lang)
127
  tgt_code = get_nllb_code(target_lang)
128
 
129
+ # Client instantiated once outside the retry loop; use HF token if available
130
+ _hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
131
+ client = InferenceClient(token=_hf_token if _hf_token else None)
132
  last_exc: Optional[Exception] = None
133
  for attempt in range(3):
134
  try:
 
199
  duration = float(raw)
200
  except (ValueError, TypeError) as exc:
201
  raise ValueError(f"ffprobe returned invalid duration: {raw!r}") from exc
202
+ import math
203
+ if not math.isfinite(duration) or duration <= 0:
204
  raise ValueError(f"ffprobe returned unusable duration: {duration}")
205
  return duration