Spaces:
Running on Zero
Running on Zero
lulavc commited on
Commit ·
a628050
1
Parent(s): 43f8b96
fix: apply code-review and security-review findings
Browse files- ndim guard before arr.shape[2] in _coerce_frames
- _mux_video cleanup partial output on ffmpeg failure
- _run_tts cleanup temp WAV on save failure
- Generic user-facing error messages (log full trace server-side)
- Fix missing closing ) in translate error f-string
- Validate ffprobe duration output before float cast
- Add queue(max_size=10, concurrency_limit=1) for GPU endpoints
- app.py +38 -18
- dubbing.py +8 -1
app.py
CHANGED
|
@@ -132,7 +132,7 @@ def _coerce_frames(frames):
|
|
| 132 |
if arr.ndim == 2:
|
| 133 |
import cv2
|
| 134 |
arr = cv2.cvtColor(arr, cv2.COLOR_GRAY2RGB)
|
| 135 |
-
elif arr.shape[2] == 4:
|
| 136 |
arr = arr[:, :, :3]
|
| 137 |
result.append(arr)
|
| 138 |
return result
|
|
@@ -145,20 +145,28 @@ def _mux_video(frames, audio_path: str, fps: int = DEFAULT_FPS) -> str:
|
|
| 145 |
coerced = _coerce_frames(frames)
|
| 146 |
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f:
|
| 147 |
out_path = f.name
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
return out_path
|
| 163 |
|
| 164 |
|
|
@@ -168,6 +176,7 @@ def _run_tts(text: str, voice_ref: str | None, emotion: float, language: str = "
|
|
| 168 |
model = _load_tts()
|
| 169 |
log.info("TTS: language=%s text_len=%d emotion=%.2f", language, len(text), emotion)
|
| 170 |
model.to("cuda")
|
|
|
|
| 171 |
try:
|
| 172 |
wav = model.generate(
|
| 173 |
text=text.strip(),
|
|
@@ -178,6 +187,13 @@ def _run_tts(text: str, voice_ref: str | None, emotion: float, language: str = "
|
|
| 178 |
out_path = f.name
|
| 179 |
torchaudio.save(out_path, wav, model.sr)
|
| 180 |
return out_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
finally:
|
| 182 |
model.to("cpu")
|
| 183 |
torch.cuda.empty_cache()
|
|
@@ -254,7 +270,8 @@ def generate(portrait_img, input_mode: str, text: str, tts_language: str,
|
|
| 254 |
except gr.Error:
|
| 255 |
raise
|
| 256 |
except Exception as e:
|
| 257 |
-
|
|
|
|
| 258 |
finally:
|
| 259 |
if _tts_tmp and os.path.exists(_tts_tmp):
|
| 260 |
try:
|
|
@@ -295,7 +312,8 @@ def dub_video(video_input, target_lang: str, voice_ref, emotion: float, lang: st
|
|
| 295 |
try:
|
| 296 |
translated_text = dubbing.translate(transcript.text, source_display, target_lang)
|
| 297 |
except Exception as exc:
|
| 298 |
-
|
|
|
|
| 299 |
else:
|
| 300 |
translated_text = transcript.text
|
| 301 |
|
|
@@ -319,7 +337,8 @@ def dub_video(video_input, target_lang: str, voice_ref, emotion: float, lang: st
|
|
| 319 |
except gr.Error:
|
| 320 |
raise
|
| 321 |
except Exception as e:
|
| 322 |
-
|
|
|
|
| 323 |
finally:
|
| 324 |
for fp in temp_files:
|
| 325 |
if fp and os.path.exists(fp):
|
|
@@ -536,4 +555,5 @@ with gr.Blocks(title="AnimaStudio 🎬") as demo:
|
|
| 536 |
|
| 537 |
|
| 538 |
if __name__ == "__main__":
|
|
|
|
| 539 |
demo.launch(theme=THEME, css=CSS, mcp_server=True)
|
|
|
|
| 132 |
if arr.ndim == 2:
|
| 133 |
import cv2
|
| 134 |
arr = cv2.cvtColor(arr, cv2.COLOR_GRAY2RGB)
|
| 135 |
+
elif arr.ndim == 3 and arr.shape[2] == 4:
|
| 136 |
arr = arr[:, :, :3]
|
| 137 |
result.append(arr)
|
| 138 |
return result
|
|
|
|
| 145 |
coerced = _coerce_frames(frames)
|
| 146 |
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f:
|
| 147 |
out_path = f.name
|
| 148 |
+
try:
|
| 149 |
+
with tempfile.TemporaryDirectory() as tmpdir:
|
| 150 |
+
for i, arr in enumerate(coerced):
|
| 151 |
+
cv2.imwrite(os.path.join(tmpdir, f"{i:06d}.png"), cv2.cvtColor(arr, cv2.COLOR_RGB2BGR))
|
| 152 |
+
cmd = [
|
| 153 |
+
"ffmpeg", "-y", "-loglevel", "error",
|
| 154 |
+
"-framerate", str(fps),
|
| 155 |
+
"-i", os.path.join(tmpdir, "%06d.png"),
|
| 156 |
+
"-i", audio_path,
|
| 157 |
+
"-c:v", "libx264", "-preset", "fast", "-crf", "22",
|
| 158 |
+
"-c:a", "aac", "-b:a", "128k",
|
| 159 |
+
"-shortest", "-pix_fmt", "yuv420p",
|
| 160 |
+
out_path,
|
| 161 |
+
]
|
| 162 |
+
subprocess.run(cmd, check=True, timeout=120)
|
| 163 |
+
except Exception:
|
| 164 |
+
if os.path.exists(out_path):
|
| 165 |
+
try:
|
| 166 |
+
os.unlink(out_path)
|
| 167 |
+
except OSError:
|
| 168 |
+
pass
|
| 169 |
+
raise
|
| 170 |
return out_path
|
| 171 |
|
| 172 |
|
|
|
|
| 176 |
model = _load_tts()
|
| 177 |
log.info("TTS: language=%s text_len=%d emotion=%.2f", language, len(text), emotion)
|
| 178 |
model.to("cuda")
|
| 179 |
+
out_path = None
|
| 180 |
try:
|
| 181 |
wav = model.generate(
|
| 182 |
text=text.strip(),
|
|
|
|
| 187 |
out_path = f.name
|
| 188 |
torchaudio.save(out_path, wav, model.sr)
|
| 189 |
return out_path
|
| 190 |
+
except Exception:
|
| 191 |
+
if out_path and os.path.exists(out_path):
|
| 192 |
+
try:
|
| 193 |
+
os.unlink(out_path)
|
| 194 |
+
except OSError:
|
| 195 |
+
pass
|
| 196 |
+
raise
|
| 197 |
finally:
|
| 198 |
model.to("cpu")
|
| 199 |
torch.cuda.empty_cache()
|
|
|
|
| 270 |
except gr.Error:
|
| 271 |
raise
|
| 272 |
except Exception as e:
|
| 273 |
+
log.error("Generation failed: %s", e, exc_info=True)
|
| 274 |
+
raise gr.Error("Generation failed. Please try different settings or try again.")
|
| 275 |
finally:
|
| 276 |
if _tts_tmp and os.path.exists(_tts_tmp):
|
| 277 |
try:
|
|
|
|
| 312 |
try:
|
| 313 |
translated_text = dubbing.translate(transcript.text, source_display, target_lang)
|
| 314 |
except Exception as exc:
|
| 315 |
+
log.error("Translation failed: %s", exc, exc_info=True)
|
| 316 |
+
raise gr.Error(t["err_translate"])
|
| 317 |
else:
|
| 318 |
translated_text = transcript.text
|
| 319 |
|
|
|
|
| 337 |
except gr.Error:
|
| 338 |
raise
|
| 339 |
except Exception as e:
|
| 340 |
+
log.error("Dubbing failed: %s", e, exc_info=True)
|
| 341 |
+
raise gr.Error("Dubbing failed. Please try a shorter video or different settings.")
|
| 342 |
finally:
|
| 343 |
for fp in temp_files:
|
| 344 |
if fp and os.path.exists(fp):
|
|
|
|
| 555 |
|
| 556 |
|
| 557 |
if __name__ == "__main__":
|
| 558 |
+
demo.queue(max_size=10, default_concurrency_limit=1)
|
| 559 |
demo.launch(theme=THEME, css=CSS, mcp_server=True)
|
dubbing.py
CHANGED
|
@@ -185,4 +185,11 @@ def get_video_duration(video_path: str) -> float:
|
|
| 185 |
video_path,
|
| 186 |
]
|
| 187 |
result = subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=30)
|
| 188 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
video_path,
|
| 186 |
]
|
| 187 |
result = subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=30)
|
| 188 |
+
raw = result.stdout.strip()
|
| 189 |
+
try:
|
| 190 |
+
duration = float(raw)
|
| 191 |
+
except (ValueError, TypeError) as exc:
|
| 192 |
+
raise ValueError(f"ffprobe returned invalid duration: {raw!r}") from exc
|
| 193 |
+
if duration < 0 or not duration:
|
| 194 |
+
raise ValueError(f"ffprobe returned unusable duration: {duration}")
|
| 195 |
+
return duration
|