lulavc commited on
Commit
a628050
·
1 Parent(s): 43f8b96

fix: apply code-review and security-review findings

Browse files

- ndim guard before arr.shape[2] in _coerce_frames
- _mux_video cleanup partial output on ffmpeg failure
- _run_tts cleanup temp WAV on save failure
- Generic user-facing error messages (log full trace server-side)
- Fix missing closing ) in translate error f-string
- Validate ffprobe duration output before float cast
- Add queue(max_size=10, concurrency_limit=1) for GPU endpoints

Files changed (2) hide show
  1. app.py +38 -18
  2. dubbing.py +8 -1
app.py CHANGED
@@ -132,7 +132,7 @@ def _coerce_frames(frames):
132
  if arr.ndim == 2:
133
  import cv2
134
  arr = cv2.cvtColor(arr, cv2.COLOR_GRAY2RGB)
135
- elif arr.shape[2] == 4:
136
  arr = arr[:, :, :3]
137
  result.append(arr)
138
  return result
@@ -145,20 +145,28 @@ def _mux_video(frames, audio_path: str, fps: int = DEFAULT_FPS) -> str:
145
  coerced = _coerce_frames(frames)
146
  with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f:
147
  out_path = f.name
148
- with tempfile.TemporaryDirectory() as tmpdir:
149
- for i, arr in enumerate(coerced):
150
- cv2.imwrite(os.path.join(tmpdir, f"{i:06d}.png"), cv2.cvtColor(arr, cv2.COLOR_RGB2BGR))
151
- cmd = [
152
- "ffmpeg", "-y", "-loglevel", "error",
153
- "-framerate", str(fps),
154
- "-i", os.path.join(tmpdir, "%06d.png"),
155
- "-i", audio_path,
156
- "-c:v", "libx264", "-preset", "fast", "-crf", "22",
157
- "-c:a", "aac", "-b:a", "128k",
158
- "-shortest", "-pix_fmt", "yuv420p",
159
- out_path,
160
- ]
161
- subprocess.run(cmd, check=True, timeout=120)
 
 
 
 
 
 
 
 
162
  return out_path
163
 
164
 
@@ -168,6 +176,7 @@ def _run_tts(text: str, voice_ref: str | None, emotion: float, language: str = "
168
  model = _load_tts()
169
  log.info("TTS: language=%s text_len=%d emotion=%.2f", language, len(text), emotion)
170
  model.to("cuda")
 
171
  try:
172
  wav = model.generate(
173
  text=text.strip(),
@@ -178,6 +187,13 @@ def _run_tts(text: str, voice_ref: str | None, emotion: float, language: str = "
178
  out_path = f.name
179
  torchaudio.save(out_path, wav, model.sr)
180
  return out_path
 
 
 
 
 
 
 
181
  finally:
182
  model.to("cpu")
183
  torch.cuda.empty_cache()
@@ -254,7 +270,8 @@ def generate(portrait_img, input_mode: str, text: str, tts_language: str,
254
  except gr.Error:
255
  raise
256
  except Exception as e:
257
- raise gr.Error(f"Generation failed: {str(e)[:400]}")
 
258
  finally:
259
  if _tts_tmp and os.path.exists(_tts_tmp):
260
  try:
@@ -295,7 +312,8 @@ def dub_video(video_input, target_lang: str, voice_ref, emotion: float, lang: st
295
  try:
296
  translated_text = dubbing.translate(transcript.text, source_display, target_lang)
297
  except Exception as exc:
298
- raise gr.Error(f"{t['err_translate']} ({str(exc)[:200]}")
 
299
  else:
300
  translated_text = transcript.text
301
 
@@ -319,7 +337,8 @@ def dub_video(video_input, target_lang: str, voice_ref, emotion: float, lang: st
319
  except gr.Error:
320
  raise
321
  except Exception as e:
322
- raise gr.Error(f"Dubbing failed: {str(e)[:400]}")
 
323
  finally:
324
  for fp in temp_files:
325
  if fp and os.path.exists(fp):
@@ -536,4 +555,5 @@ with gr.Blocks(title="AnimaStudio 🎬") as demo:
536
 
537
 
538
  if __name__ == "__main__":
 
539
  demo.launch(theme=THEME, css=CSS, mcp_server=True)
 
132
  if arr.ndim == 2:
133
  import cv2
134
  arr = cv2.cvtColor(arr, cv2.COLOR_GRAY2RGB)
135
+ elif arr.ndim == 3 and arr.shape[2] == 4:
136
  arr = arr[:, :, :3]
137
  result.append(arr)
138
  return result
 
145
  coerced = _coerce_frames(frames)
146
  with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f:
147
  out_path = f.name
148
+ try:
149
+ with tempfile.TemporaryDirectory() as tmpdir:
150
+ for i, arr in enumerate(coerced):
151
+ cv2.imwrite(os.path.join(tmpdir, f"{i:06d}.png"), cv2.cvtColor(arr, cv2.COLOR_RGB2BGR))
152
+ cmd = [
153
+ "ffmpeg", "-y", "-loglevel", "error",
154
+ "-framerate", str(fps),
155
+ "-i", os.path.join(tmpdir, "%06d.png"),
156
+ "-i", audio_path,
157
+ "-c:v", "libx264", "-preset", "fast", "-crf", "22",
158
+ "-c:a", "aac", "-b:a", "128k",
159
+ "-shortest", "-pix_fmt", "yuv420p",
160
+ out_path,
161
+ ]
162
+ subprocess.run(cmd, check=True, timeout=120)
163
+ except Exception:
164
+ if os.path.exists(out_path):
165
+ try:
166
+ os.unlink(out_path)
167
+ except OSError:
168
+ pass
169
+ raise
170
  return out_path
171
 
172
 
 
176
  model = _load_tts()
177
  log.info("TTS: language=%s text_len=%d emotion=%.2f", language, len(text), emotion)
178
  model.to("cuda")
179
+ out_path = None
180
  try:
181
  wav = model.generate(
182
  text=text.strip(),
 
187
  out_path = f.name
188
  torchaudio.save(out_path, wav, model.sr)
189
  return out_path
190
+ except Exception:
191
+ if out_path and os.path.exists(out_path):
192
+ try:
193
+ os.unlink(out_path)
194
+ except OSError:
195
+ pass
196
+ raise
197
  finally:
198
  model.to("cpu")
199
  torch.cuda.empty_cache()
 
270
  except gr.Error:
271
  raise
272
  except Exception as e:
273
+ log.error("Generation failed: %s", e, exc_info=True)
274
+ raise gr.Error("Generation failed. Please try different settings or try again.")
275
  finally:
276
  if _tts_tmp and os.path.exists(_tts_tmp):
277
  try:
 
312
  try:
313
  translated_text = dubbing.translate(transcript.text, source_display, target_lang)
314
  except Exception as exc:
315
+ log.error("Translation failed: %s", exc, exc_info=True)
316
+ raise gr.Error(t["err_translate"])
317
  else:
318
  translated_text = transcript.text
319
 
 
337
  except gr.Error:
338
  raise
339
  except Exception as e:
340
+ log.error("Dubbing failed: %s", e, exc_info=True)
341
+ raise gr.Error("Dubbing failed. Please try a shorter video or different settings.")
342
  finally:
343
  for fp in temp_files:
344
  if fp and os.path.exists(fp):
 
555
 
556
 
557
  if __name__ == "__main__":
558
+ demo.queue(max_size=10, default_concurrency_limit=1)
559
  demo.launch(theme=THEME, css=CSS, mcp_server=True)
dubbing.py CHANGED
@@ -185,4 +185,11 @@ def get_video_duration(video_path: str) -> float:
185
  video_path,
186
  ]
187
  result = subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=30)
188
- return float(result.stdout.strip())
 
 
 
 
 
 
 
 
185
  video_path,
186
  ]
187
  result = subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=30)
188
+ raw = result.stdout.strip()
189
+ try:
190
+ duration = float(raw)
191
+ except (ValueError, TypeError) as exc:
192
+ raise ValueError(f"ffprobe returned invalid duration: {raw!r}") from exc
193
+ if duration < 0 or not duration:
194
+ raise ValueError(f"ffprobe returned unusable duration: {duration}")
195
+ return duration