Spaces:
Running
on
Zero
Running
on
Zero
feat: trim
Browse files
app.py
CHANGED
|
@@ -436,7 +436,7 @@ def run_graio_demo(args):
|
|
| 436 |
logging.info("Model and face processor loaded successfully.")
|
| 437 |
|
| 438 |
def generate_video(img2vid_image, img2vid_prompt, n_prompt, img2vid_audio_1, img2vid_audio_2, img2vid_audio_3,
|
| 439 |
-
sd_steps, seed, guide_scale, person_num_selector, audio_mode_selector, fixed_steps=None,
|
| 440 |
# 参考 LivePortrait: 在 worker 进程中直接使用 cuda 设备
|
| 441 |
# 参考: https://huggingface.co/spaces/KlingTeam/LivePortrait/blob/main/src/gradio_pipeline.py
|
| 442 |
# @spaces.GPU 装饰器已经初始化了 GPU,这里直接使用即可
|
|
@@ -483,8 +483,8 @@ def run_graio_demo(args):
|
|
| 483 |
fps = getattr(cfg, 'fps', 24)
|
| 484 |
calculated_frame_num = calculate_frame_num_from_audio(audio_paths, fps, mode=audio_mode_selector)
|
| 485 |
|
| 486 |
-
# Fast模式:如果
|
| 487 |
-
if
|
| 488 |
# 4秒固定为97帧(4n+1格式:4秒*24fps=96帧,向上取整为97帧)
|
| 489 |
max_frames_4s = 97
|
| 490 |
current_frame_num = min(calculated_frame_num, max_frames_4s)
|
|
@@ -530,7 +530,7 @@ def run_graio_demo(args):
|
|
| 530 |
audio_paths=audio_paths,
|
| 531 |
task_key="gradio_output",
|
| 532 |
mode=audio_mode_selector,
|
| 533 |
-
|
| 534 |
)
|
| 535 |
|
| 536 |
if isinstance(video, dict):
|
|
@@ -561,6 +561,10 @@ def run_graio_demo(args):
|
|
| 561 |
if audio_paths:
|
| 562 |
existing_audio_paths = [path for path in audio_paths if path and os.path.exists(path)]
|
| 563 |
if existing_audio_paths:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 564 |
# 构建输出文件名
|
| 565 |
audio_names = [os.path.basename(path).split('.')[0] for path in existing_audio_paths]
|
| 566 |
audio_suffix = "_".join([f"audio{i}_{name}" for i, name in enumerate(audio_names)])
|
|
@@ -569,31 +573,54 @@ def run_graio_demo(args):
|
|
| 569 |
# 构建 ffmpeg 命令
|
| 570 |
if len(existing_audio_paths) == 1:
|
| 571 |
# 只有一个音频
|
| 572 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 573 |
else:
|
| 574 |
input_args = f'-i "{output_file}"'
|
| 575 |
if audio_mode_selector == "concat":
|
| 576 |
# concat 模式:串联音频
|
| 577 |
for audio_path in existing_audio_paths:
|
| 578 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 579 |
|
| 580 |
num_audios = len(existing_audio_paths)
|
| 581 |
concat_inputs = ''.join([f'[{i+1}:a]' for i in range(num_audios)])
|
| 582 |
filter_complex = f'"{concat_inputs}concat=n={num_audios}:v=0:a=1[aout]"'
|
| 583 |
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 588 |
else:
|
| 589 |
# pad 模式:混合所有音频
|
| 590 |
filter_inputs = []
|
| 591 |
for i, audio_path in enumerate(existing_audio_paths):
|
| 592 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 593 |
filter_inputs.append(f'[{i+1}:a]')
|
| 594 |
|
| 595 |
filter_complex = f'{"".join(filter_inputs)}amix=inputs={len(existing_audio_paths)}:duration=shortest[aout]'
|
| 596 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 597 |
|
| 598 |
logging.info(f"Adding audio: {ffmpeg_command}")
|
| 599 |
os.system(ffmpeg_command)
|
|
@@ -727,17 +754,17 @@ def run_graio_demo(args):
|
|
| 727 |
max_frames_4s = 97
|
| 728 |
|
| 729 |
if calculated_frame_num > max_frames_4s:
|
| 730 |
-
# 超过4秒,设置
|
| 731 |
-
kwargs['
|
| 732 |
calculated_duration = calculated_frame_num / fps
|
| 733 |
logging.warning(f"Fast mode: Audio duration ({calculated_duration:.2f}s) exceeds 4 seconds limit. Will trim to 4 seconds.")
|
| 734 |
else:
|
| 735 |
-
kwargs['
|
| 736 |
except Exception as e:
|
| 737 |
logging.warning(f"Failed to check audio duration: {e}")
|
| 738 |
-
kwargs['
|
| 739 |
else:
|
| 740 |
-
kwargs['
|
| 741 |
|
| 742 |
return gpu_wrapped_generate_video_worker(*args, **kwargs)
|
| 743 |
|
|
|
|
| 436 |
logging.info("Model and face processor loaded successfully.")
|
| 437 |
|
| 438 |
def generate_video(img2vid_image, img2vid_prompt, n_prompt, img2vid_audio_1, img2vid_audio_2, img2vid_audio_3,
|
| 439 |
+
sd_steps, seed, guide_scale, person_num_selector, audio_mode_selector, fixed_steps=None, trim_to_4s=False):
|
| 440 |
# 参考 LivePortrait: 在 worker 进程中直接使用 cuda 设备
|
| 441 |
# 参考: https://huggingface.co/spaces/KlingTeam/LivePortrait/blob/main/src/gradio_pipeline.py
|
| 442 |
# @spaces.GPU 装饰器已经初始化了 GPU,这里直接使用即可
|
|
|
|
| 483 |
fps = getattr(cfg, 'fps', 24)
|
| 484 |
calculated_frame_num = calculate_frame_num_from_audio(audio_paths, fps, mode=audio_mode_selector)
|
| 485 |
|
| 486 |
+
# Fast模式:如果trim_to_4s为True,强制限制为4秒对应的帧数
|
| 487 |
+
if trim_to_4s:
|
| 488 |
# 4秒固定为97帧(4n+1格式:4秒*24fps=96帧,向上取整为97帧)
|
| 489 |
max_frames_4s = 97
|
| 490 |
current_frame_num = min(calculated_frame_num, max_frames_4s)
|
|
|
|
| 530 |
audio_paths=audio_paths,
|
| 531 |
task_key="gradio_output",
|
| 532 |
mode=audio_mode_selector,
|
| 533 |
+
trim_to_4s=trim_to_4s,
|
| 534 |
)
|
| 535 |
|
| 536 |
if isinstance(video, dict):
|
|
|
|
| 561 |
if audio_paths:
|
| 562 |
existing_audio_paths = [path for path in audio_paths if path and os.path.exists(path)]
|
| 563 |
if existing_audio_paths:
|
| 564 |
+
# 计算视频时长(用于Fast模式限制音频长度)
|
| 565 |
+
fps = getattr(cfg, 'fps', 24)
|
| 566 |
+
video_duration_seconds = current_frame_num / fps if current_frame_num and fps else 0
|
| 567 |
+
|
| 568 |
# 构建输出文件名
|
| 569 |
audio_names = [os.path.basename(path).split('.')[0] for path in existing_audio_paths]
|
| 570 |
audio_suffix = "_".join([f"audio{i}_{name}" for i, name in enumerate(audio_names)])
|
|
|
|
| 573 |
# 构建 ffmpeg 命令
|
| 574 |
if len(existing_audio_paths) == 1:
|
| 575 |
# 只有一个音频
|
| 576 |
+
if trim_to_4s and video_duration_seconds > 0:
|
| 577 |
+
# Fast模式:限制音频输入和输出时长为视频时长
|
| 578 |
+
ffmpeg_command = f'ffmpeg -i "{output_file}" -ss 0 -t {video_duration_seconds:.3f} -i "{existing_audio_paths[0]}" -t {video_duration_seconds:.3f} -vcodec libx264 -acodec aac -crf 18 -y "{audio_video_path}"'
|
| 579 |
+
else:
|
| 580 |
+
ffmpeg_command = f'ffmpeg -i "{output_file}" -i "{existing_audio_paths[0]}" -vcodec libx264 -acodec aac -crf 18 -shortest -y "{audio_video_path}"'
|
| 581 |
else:
|
| 582 |
input_args = f'-i "{output_file}"'
|
| 583 |
if audio_mode_selector == "concat":
|
| 584 |
# concat 模式:串联音频
|
| 585 |
for audio_path in existing_audio_paths:
|
| 586 |
+
if trim_to_4s and video_duration_seconds > 0:
|
| 587 |
+
# Fast模式:限制每个音频输入的时长
|
| 588 |
+
input_args += f' -ss 0 -t {video_duration_seconds:.3f} -i "{audio_path}"'
|
| 589 |
+
else:
|
| 590 |
+
input_args += f' -i "{audio_path}"'
|
| 591 |
|
| 592 |
num_audios = len(existing_audio_paths)
|
| 593 |
concat_inputs = ''.join([f'[{i+1}:a]' for i in range(num_audios)])
|
| 594 |
filter_complex = f'"{concat_inputs}concat=n={num_audios}:v=0:a=1[aout]"'
|
| 595 |
|
| 596 |
+
if trim_to_4s and video_duration_seconds > 0:
|
| 597 |
+
# Fast模式:限制最终输出时长
|
| 598 |
+
ffmpeg_command = (
|
| 599 |
+
f'ffmpeg {input_args} -filter_complex {filter_complex} '
|
| 600 |
+
f'-map 0:v -map "[aout]" -t {video_duration_seconds:.3f} -vcodec libx264 -acodec aac -crf 18 -y "{audio_video_path}"'
|
| 601 |
+
)
|
| 602 |
+
else:
|
| 603 |
+
ffmpeg_command = (
|
| 604 |
+
f'ffmpeg {input_args} -filter_complex {filter_complex} '
|
| 605 |
+
f'-map 0:v -map "[aout]" -vcodec libx264 -acodec aac -crf 18 -y "{audio_video_path}"'
|
| 606 |
+
)
|
| 607 |
else:
|
| 608 |
# pad 模式:混合所有音频
|
| 609 |
filter_inputs = []
|
| 610 |
for i, audio_path in enumerate(existing_audio_paths):
|
| 611 |
+
if trim_to_4s and video_duration_seconds > 0:
|
| 612 |
+
# Fast模式:限制每个音频输入的时长
|
| 613 |
+
input_args += f' -ss 0 -t {video_duration_seconds:.3f} -i "{audio_path}"'
|
| 614 |
+
else:
|
| 615 |
+
input_args += f' -i "{audio_path}"'
|
| 616 |
filter_inputs.append(f'[{i+1}:a]')
|
| 617 |
|
| 618 |
filter_complex = f'{"".join(filter_inputs)}amix=inputs={len(existing_audio_paths)}:duration=shortest[aout]'
|
| 619 |
+
if trim_to_4s and video_duration_seconds > 0:
|
| 620 |
+
# Fast模式:限制最终输出时长
|
| 621 |
+
ffmpeg_command = f'ffmpeg {input_args} -filter_complex "{filter_complex}" -map 0:v -map "[aout]" -t {video_duration_seconds:.3f} -vcodec libx264 -acodec aac -crf 18 -y "{audio_video_path}"'
|
| 622 |
+
else:
|
| 623 |
+
ffmpeg_command = f'ffmpeg {input_args} -filter_complex "{filter_complex}" -map 0:v -map "[aout]" -vcodec libx264 -acodec aac -crf 18 -y "{audio_video_path}"'
|
| 624 |
|
| 625 |
logging.info(f"Adding audio: {ffmpeg_command}")
|
| 626 |
os.system(ffmpeg_command)
|
|
|
|
| 754 |
max_frames_4s = 97
|
| 755 |
|
| 756 |
if calculated_frame_num > max_frames_4s:
|
| 757 |
+
# 超过4秒,设置trim_to_4s标记
|
| 758 |
+
kwargs['trim_to_4s'] = True
|
| 759 |
calculated_duration = calculated_frame_num / fps
|
| 760 |
logging.warning(f"Fast mode: Audio duration ({calculated_duration:.2f}s) exceeds 4 seconds limit. Will trim to 4 seconds.")
|
| 761 |
else:
|
| 762 |
+
kwargs['trim_to_4s'] = False
|
| 763 |
except Exception as e:
|
| 764 |
logging.warning(f"Failed to check audio duration: {e}")
|
| 765 |
+
kwargs['trim_to_4s'] = False
|
| 766 |
else:
|
| 767 |
+
kwargs['trim_to_4s'] = False
|
| 768 |
|
| 769 |
return gpu_wrapped_generate_video_worker(*args, **kwargs)
|
| 770 |
|