Commit ·
d5399ac
1
Parent(s): e7175d4
Fix regen GPU fns: move seg clip extraction inside GPU scope
Browse files_regen_mmaudio_gpu and _regen_hunyuan_gpu now call _extract_segment_clip
internally (pure ffmpeg, safe within GPU window) instead of relying on
_ctx_store/_ctx_load cross-process context passing.
Removes redundant _extract_segment_clip + _ctx_store calls from CPU
wrappers: regen_mmaudio_segment, regen_hunyuan_segment, xregen_mmaudio,
xregen_hunyuan.
app.py
CHANGED
|
@@ -1389,8 +1389,12 @@ def _regen_mmaudio_gpu(video_file, seg_idx, seg_meta_json,
|
|
| 1389 |
net, feature_utils, model_cfg, seq_cfg = _load_mmaudio_models(device, dtype)
|
| 1390 |
sr = seq_cfg.sampling_rate
|
| 1391 |
|
| 1392 |
-
|
| 1393 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1394 |
|
| 1395 |
rng = torch.Generator(device=device)
|
| 1396 |
rng.manual_seed(random.randint(0, 2**32 - 1))
|
|
@@ -1422,18 +1426,8 @@ def regen_mmaudio_segment(video_file, seg_idx, seg_meta_json,
|
|
| 1422 |
"""Regenerate one MMAudio segment. GPU inference + CPU splice/save."""
|
| 1423 |
meta = json.loads(seg_meta_json)
|
| 1424 |
seg_idx = int(seg_idx)
|
| 1425 |
-
seg_start, seg_end = meta["segments"][seg_idx]
|
| 1426 |
-
seg_dur = seg_end - seg_start
|
| 1427 |
|
| 1428 |
-
#
|
| 1429 |
-
tmp_dir = _register_tmp_dir(tempfile.mkdtemp())
|
| 1430 |
-
seg_path = _extract_segment_clip(
|
| 1431 |
-
meta["silent_video"], seg_start, seg_dur,
|
| 1432 |
-
os.path.join(tmp_dir, "regen_seg.mp4"),
|
| 1433 |
-
)
|
| 1434 |
-
_ctx_store("regen_mmaudio_gpu", {"seg_path": seg_path})
|
| 1435 |
-
|
| 1436 |
-
# GPU: inference only
|
| 1437 |
new_wav, sr = _regen_mmaudio_gpu(video_file, seg_idx, seg_meta_json,
|
| 1438 |
prompt, negative_prompt, seed_val,
|
| 1439 |
cfg_strength, num_steps, crossfade_s, crossfade_db,
|
|
@@ -1481,11 +1475,13 @@ def _regen_hunyuan_gpu(video_file, seg_idx, seg_meta_json,
|
|
| 1481 |
|
| 1482 |
set_global_seed(random.randint(0, 2**32 - 1))
|
| 1483 |
|
| 1484 |
-
|
| 1485 |
-
seg_path =
|
| 1486 |
-
|
|
|
|
|
|
|
| 1487 |
|
| 1488 |
-
text_feats_path =
|
| 1489 |
if text_feats_path and os.path.exists(text_feats_path):
|
| 1490 |
print("[HunyuanFoley regen] Loading cached text features from disk")
|
| 1491 |
from hunyuanvideo_foley.utils.feature_utils import encode_video_features
|
|
@@ -1516,21 +1512,8 @@ def regen_hunyuan_segment(video_file, seg_idx, seg_meta_json,
|
|
| 1516 |
"""Regenerate one HunyuanFoley segment. GPU inference + CPU splice/save."""
|
| 1517 |
meta = json.loads(seg_meta_json)
|
| 1518 |
seg_idx = int(seg_idx)
|
| 1519 |
-
seg_start, seg_end = meta["segments"][seg_idx]
|
| 1520 |
-
seg_dur = seg_end - seg_start
|
| 1521 |
|
| 1522 |
-
#
|
| 1523 |
-
tmp_dir = _register_tmp_dir(tempfile.mkdtemp())
|
| 1524 |
-
seg_path = _extract_segment_clip(
|
| 1525 |
-
meta["silent_video"], seg_start, seg_dur,
|
| 1526 |
-
os.path.join(tmp_dir, "regen_seg.mp4"),
|
| 1527 |
-
)
|
| 1528 |
-
_ctx_store("regen_hunyuan_gpu", {
|
| 1529 |
-
"seg_path": seg_path,
|
| 1530 |
-
"text_feats_path": meta.get("text_feats_path", ""),
|
| 1531 |
-
})
|
| 1532 |
-
|
| 1533 |
-
# GPU: inference only
|
| 1534 |
new_wav, sr = _regen_hunyuan_gpu(video_file, seg_idx, seg_meta_json,
|
| 1535 |
prompt, negative_prompt, seed_val,
|
| 1536 |
guidance_scale, num_steps, model_size,
|
|
@@ -1643,15 +1626,9 @@ def xregen_mmaudio(seg_idx, state_json, slot_id,
|
|
| 1643 |
request: gr.Request = None):
|
| 1644 |
"""Cross-model regen: run MMAudio inference and splice into *slot_id*."""
|
| 1645 |
seg_idx = int(seg_idx)
|
| 1646 |
-
meta = json.loads(state_json)
|
| 1647 |
-
seg_start, seg_end = meta["segments"][seg_idx]
|
| 1648 |
|
| 1649 |
def _run():
|
| 1650 |
-
|
| 1651 |
-
meta["silent_video"], seg_start, seg_end - seg_start,
|
| 1652 |
-
os.path.join(tempfile.mkdtemp(), "xregen_seg.mp4"),
|
| 1653 |
-
)
|
| 1654 |
-
_ctx_store("regen_mmaudio_gpu", {"seg_path": seg_path})
|
| 1655 |
wav, src_sr = _regen_mmaudio_gpu(None, seg_idx, state_json,
|
| 1656 |
prompt, negative_prompt, seed_val,
|
| 1657 |
cfg_strength, num_steps,
|
|
@@ -1668,18 +1645,9 @@ def xregen_hunyuan(seg_idx, state_json, slot_id,
|
|
| 1668 |
request: gr.Request = None):
|
| 1669 |
"""Cross-model regen: run HunyuanFoley inference and splice into *slot_id*."""
|
| 1670 |
seg_idx = int(seg_idx)
|
| 1671 |
-
meta = json.loads(state_json)
|
| 1672 |
-
seg_start, seg_end = meta["segments"][seg_idx]
|
| 1673 |
|
| 1674 |
def _run():
|
| 1675 |
-
|
| 1676 |
-
meta["silent_video"], seg_start, seg_end - seg_start,
|
| 1677 |
-
os.path.join(tempfile.mkdtemp(), "xregen_seg.mp4"),
|
| 1678 |
-
)
|
| 1679 |
-
_ctx_store("regen_hunyuan_gpu", {
|
| 1680 |
-
"seg_path": seg_path,
|
| 1681 |
-
"text_feats_path": meta.get("text_feats_path", ""),
|
| 1682 |
-
})
|
| 1683 |
wav, src_sr = _regen_hunyuan_gpu(None, seg_idx, state_json,
|
| 1684 |
prompt, negative_prompt, seed_val,
|
| 1685 |
guidance_scale, num_steps, model_size,
|
|
|
|
| 1389 |
net, feature_utils, model_cfg, seq_cfg = _load_mmaudio_models(device, dtype)
|
| 1390 |
sr = seq_cfg.sampling_rate
|
| 1391 |
|
| 1392 |
+
# Extract segment clip inside the GPU function — ffmpeg is CPU-only and safe here.
|
| 1393 |
+
# This avoids any cross-process context passing that fails under ZeroGPU isolation.
|
| 1394 |
+
seg_path = _extract_segment_clip(
|
| 1395 |
+
meta["silent_video"], seg_start, seg_dur,
|
| 1396 |
+
os.path.join(tempfile.mkdtemp(), "regen_seg.mp4"),
|
| 1397 |
+
)
|
| 1398 |
|
| 1399 |
rng = torch.Generator(device=device)
|
| 1400 |
rng.manual_seed(random.randint(0, 2**32 - 1))
|
|
|
|
| 1426 |
"""Regenerate one MMAudio segment. GPU inference + CPU splice/save."""
|
| 1427 |
meta = json.loads(seg_meta_json)
|
| 1428 |
seg_idx = int(seg_idx)
|
|
|
|
|
|
|
| 1429 |
|
| 1430 |
+
# GPU: inference (segment clip extraction happens inside the GPU function)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1431 |
new_wav, sr = _regen_mmaudio_gpu(video_file, seg_idx, seg_meta_json,
|
| 1432 |
prompt, negative_prompt, seed_val,
|
| 1433 |
cfg_strength, num_steps, crossfade_s, crossfade_db,
|
|
|
|
| 1475 |
|
| 1476 |
set_global_seed(random.randint(0, 2**32 - 1))
|
| 1477 |
|
| 1478 |
+
# Extract segment clip inside the GPU function — ffmpeg is CPU-only and safe here.
|
| 1479 |
+
seg_path = _extract_segment_clip(
|
| 1480 |
+
meta["silent_video"], seg_start, seg_dur,
|
| 1481 |
+
os.path.join(tempfile.mkdtemp(), "regen_seg.mp4"),
|
| 1482 |
+
)
|
| 1483 |
|
| 1484 |
+
text_feats_path = meta.get("text_feats_path", "")
|
| 1485 |
if text_feats_path and os.path.exists(text_feats_path):
|
| 1486 |
print("[HunyuanFoley regen] Loading cached text features from disk")
|
| 1487 |
from hunyuanvideo_foley.utils.feature_utils import encode_video_features
|
|
|
|
| 1512 |
"""Regenerate one HunyuanFoley segment. GPU inference + CPU splice/save."""
|
| 1513 |
meta = json.loads(seg_meta_json)
|
| 1514 |
seg_idx = int(seg_idx)
|
|
|
|
|
|
|
| 1515 |
|
| 1516 |
+
# GPU: inference (segment clip extraction happens inside the GPU function)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1517 |
new_wav, sr = _regen_hunyuan_gpu(video_file, seg_idx, seg_meta_json,
|
| 1518 |
prompt, negative_prompt, seed_val,
|
| 1519 |
guidance_scale, num_steps, model_size,
|
|
|
|
| 1626 |
request: gr.Request = None):
|
| 1627 |
"""Cross-model regen: run MMAudio inference and splice into *slot_id*."""
|
| 1628 |
seg_idx = int(seg_idx)
|
|
|
|
|
|
|
| 1629 |
|
| 1630 |
def _run():
|
| 1631 |
+
# Segment clip extraction happens inside _regen_mmaudio_gpu
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1632 |
wav, src_sr = _regen_mmaudio_gpu(None, seg_idx, state_json,
|
| 1633 |
prompt, negative_prompt, seed_val,
|
| 1634 |
cfg_strength, num_steps,
|
|
|
|
| 1645 |
request: gr.Request = None):
|
| 1646 |
"""Cross-model regen: run HunyuanFoley inference and splice into *slot_id*."""
|
| 1647 |
seg_idx = int(seg_idx)
|
|
|
|
|
|
|
| 1648 |
|
| 1649 |
def _run():
|
| 1650 |
+
# Segment clip extraction happens inside _regen_hunyuan_gpu
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1651 |
wav, src_sr = _regen_hunyuan_gpu(None, seg_idx, state_json,
|
| 1652 |
prompt, negative_prompt, seed_val,
|
| 1653 |
guidance_scale, num_steps, model_size,
|