C4G-HKUST commited on
Commit
0c6b95b
·
1 Parent(s): 1b0ed38

feat: trim

Browse files
Files changed (1) hide show
  1. app.py +44 -17
app.py CHANGED
@@ -436,7 +436,7 @@ def run_graio_demo(args):
436
  logging.info("Model and face processor loaded successfully.")
437
 
438
  def generate_video(img2vid_image, img2vid_prompt, n_prompt, img2vid_audio_1, img2vid_audio_2, img2vid_audio_3,
439
- sd_steps, seed, guide_scale, person_num_selector, audio_mode_selector, fixed_steps=None, trim_to_6s=False):
440
  # 参考 LivePortrait: 在 worker 进程中直接使用 cuda 设备
441
  # 参考: https://huggingface.co/spaces/KlingTeam/LivePortrait/blob/main/src/gradio_pipeline.py
442
  # @spaces.GPU 装饰器已经初始化了 GPU,这里直接使用即可
@@ -483,8 +483,8 @@ def run_graio_demo(args):
483
  fps = getattr(cfg, 'fps', 24)
484
  calculated_frame_num = calculate_frame_num_from_audio(audio_paths, fps, mode=audio_mode_selector)
485
 
486
- # Fast模式:如果trim_to_6s为True,强制限制为4秒对应的帧数
487
- if trim_to_6s:
488
  # 4秒固定为97帧(4n+1格式:4秒*24fps=96帧,向上取整为97帧)
489
  max_frames_4s = 97
490
  current_frame_num = min(calculated_frame_num, max_frames_4s)
@@ -530,7 +530,7 @@ def run_graio_demo(args):
530
  audio_paths=audio_paths,
531
  task_key="gradio_output",
532
  mode=audio_mode_selector,
533
- trim_to_6s=trim_to_6s,
534
  )
535
 
536
  if isinstance(video, dict):
@@ -561,6 +561,10 @@ def run_graio_demo(args):
561
  if audio_paths:
562
  existing_audio_paths = [path for path in audio_paths if path and os.path.exists(path)]
563
  if existing_audio_paths:
 
 
 
 
564
  # 构建输出文件名
565
  audio_names = [os.path.basename(path).split('.')[0] for path in existing_audio_paths]
566
  audio_suffix = "_".join([f"audio{i}_{name}" for i, name in enumerate(audio_names)])
@@ -569,31 +573,54 @@ def run_graio_demo(args):
569
  # 构建 ffmpeg 命令
570
  if len(existing_audio_paths) == 1:
571
  # 只有一个音频
572
- ffmpeg_command = f'ffmpeg -i "{output_file}" -i "{existing_audio_paths[0]}" -vcodec libx264 -acodec aac -crf 18 -shortest -y "{audio_video_path}"'
 
 
 
 
573
  else:
574
  input_args = f'-i "{output_file}"'
575
  if audio_mode_selector == "concat":
576
  # concat 模式:串联音频
577
  for audio_path in existing_audio_paths:
578
- input_args += f' -i "{audio_path}"'
 
 
 
 
579
 
580
  num_audios = len(existing_audio_paths)
581
  concat_inputs = ''.join([f'[{i+1}:a]' for i in range(num_audios)])
582
  filter_complex = f'"{concat_inputs}concat=n={num_audios}:v=0:a=1[aout]"'
583
 
584
- ffmpeg_command = (
585
- f'ffmpeg {input_args} -filter_complex {filter_complex} '
586
- f'-map 0:v -map "[aout]" -vcodec libx264 -acodec aac -crf 18 -y "{audio_video_path}"'
587
- )
 
 
 
 
 
 
 
588
  else:
589
  # pad 模式:混合所有音频
590
  filter_inputs = []
591
  for i, audio_path in enumerate(existing_audio_paths):
592
- input_args += f' -i "{audio_path}"'
 
 
 
 
593
  filter_inputs.append(f'[{i+1}:a]')
594
 
595
  filter_complex = f'{"".join(filter_inputs)}amix=inputs={len(existing_audio_paths)}:duration=shortest[aout]'
596
- ffmpeg_command = f'ffmpeg {input_args} -filter_complex "{filter_complex}" -map 0:v -map "[aout]" -vcodec libx264 -acodec aac -crf 18 -y "{audio_video_path}"'
 
 
 
 
597
 
598
  logging.info(f"Adding audio: {ffmpeg_command}")
599
  os.system(ffmpeg_command)
@@ -727,17 +754,17 @@ def run_graio_demo(args):
727
  max_frames_4s = 97
728
 
729
  if calculated_frame_num > max_frames_4s:
730
- # 超过4秒,设置trim_to_6s标记
731
- kwargs['trim_to_6s'] = True
732
  calculated_duration = calculated_frame_num / fps
733
  logging.warning(f"Fast mode: Audio duration ({calculated_duration:.2f}s) exceeds 4 seconds limit. Will trim to 4 seconds.")
734
  else:
735
- kwargs['trim_to_6s'] = False
736
  except Exception as e:
737
  logging.warning(f"Failed to check audio duration: {e}")
738
- kwargs['trim_to_6s'] = False
739
  else:
740
- kwargs['trim_to_6s'] = False
741
 
742
  return gpu_wrapped_generate_video_worker(*args, **kwargs)
743
 
 
436
  logging.info("Model and face processor loaded successfully.")
437
 
438
  def generate_video(img2vid_image, img2vid_prompt, n_prompt, img2vid_audio_1, img2vid_audio_2, img2vid_audio_3,
439
+ sd_steps, seed, guide_scale, person_num_selector, audio_mode_selector, fixed_steps=None, trim_to_4s=False):
440
  # 参考 LivePortrait: 在 worker 进程中直接使用 cuda 设备
441
  # 参考: https://huggingface.co/spaces/KlingTeam/LivePortrait/blob/main/src/gradio_pipeline.py
442
  # @spaces.GPU 装饰器已经初始化了 GPU,这里直接使用即可
 
483
  fps = getattr(cfg, 'fps', 24)
484
  calculated_frame_num = calculate_frame_num_from_audio(audio_paths, fps, mode=audio_mode_selector)
485
 
486
+ # Fast模式:如果trim_to_4s为True,强制限制为4秒对应的帧数
487
+ if trim_to_4s:
488
  # 4秒固定为97帧(4n+1格式:4秒*24fps=96帧,向上取整为97帧)
489
  max_frames_4s = 97
490
  current_frame_num = min(calculated_frame_num, max_frames_4s)
 
530
  audio_paths=audio_paths,
531
  task_key="gradio_output",
532
  mode=audio_mode_selector,
533
+ trim_to_4s=trim_to_4s,
534
  )
535
 
536
  if isinstance(video, dict):
 
561
  if audio_paths:
562
  existing_audio_paths = [path for path in audio_paths if path and os.path.exists(path)]
563
  if existing_audio_paths:
564
+ # 计算视频时长(用于Fast模式限制音频长度)
565
+ fps = getattr(cfg, 'fps', 24)
566
+ video_duration_seconds = current_frame_num / fps if current_frame_num and fps else 0
567
+
568
  # 构建输出文件名
569
  audio_names = [os.path.basename(path).split('.')[0] for path in existing_audio_paths]
570
  audio_suffix = "_".join([f"audio{i}_{name}" for i, name in enumerate(audio_names)])
 
573
  # 构建 ffmpeg 命令
574
  if len(existing_audio_paths) == 1:
575
  # 只有一个音频
576
+ if trim_to_4s and video_duration_seconds > 0:
577
+ # Fast模式:限制音频输入和输出时长为视频时长
578
+ ffmpeg_command = f'ffmpeg -i "{output_file}" -ss 0 -t {video_duration_seconds:.3f} -i "{existing_audio_paths[0]}" -t {video_duration_seconds:.3f} -vcodec libx264 -acodec aac -crf 18 -y "{audio_video_path}"'
579
+ else:
580
+ ffmpeg_command = f'ffmpeg -i "{output_file}" -i "{existing_audio_paths[0]}" -vcodec libx264 -acodec aac -crf 18 -shortest -y "{audio_video_path}"'
581
  else:
582
  input_args = f'-i "{output_file}"'
583
  if audio_mode_selector == "concat":
584
  # concat 模式:串联音频
585
  for audio_path in existing_audio_paths:
586
+ if trim_to_4s and video_duration_seconds > 0:
587
+ # Fast模式:限制每个音频输入的时长
588
+ input_args += f' -ss 0 -t {video_duration_seconds:.3f} -i "{audio_path}"'
589
+ else:
590
+ input_args += f' -i "{audio_path}"'
591
 
592
  num_audios = len(existing_audio_paths)
593
  concat_inputs = ''.join([f'[{i+1}:a]' for i in range(num_audios)])
594
  filter_complex = f'"{concat_inputs}concat=n={num_audios}:v=0:a=1[aout]"'
595
 
596
+ if trim_to_4s and video_duration_seconds > 0:
597
+ # Fast模式:限制最终输出时长
598
+ ffmpeg_command = (
599
+ f'ffmpeg {input_args} -filter_complex {filter_complex} '
600
+ f'-map 0:v -map "[aout]" -t {video_duration_seconds:.3f} -vcodec libx264 -acodec aac -crf 18 -y "{audio_video_path}"'
601
+ )
602
+ else:
603
+ ffmpeg_command = (
604
+ f'ffmpeg {input_args} -filter_complex {filter_complex} '
605
+ f'-map 0:v -map "[aout]" -vcodec libx264 -acodec aac -crf 18 -y "{audio_video_path}"'
606
+ )
607
  else:
608
  # pad 模式:混合所有音频
609
  filter_inputs = []
610
  for i, audio_path in enumerate(existing_audio_paths):
611
+ if trim_to_4s and video_duration_seconds > 0:
612
+ # Fast模式:限制每个音频输入的时长
613
+ input_args += f' -ss 0 -t {video_duration_seconds:.3f} -i "{audio_path}"'
614
+ else:
615
+ input_args += f' -i "{audio_path}"'
616
  filter_inputs.append(f'[{i+1}:a]')
617
 
618
  filter_complex = f'{"".join(filter_inputs)}amix=inputs={len(existing_audio_paths)}:duration=shortest[aout]'
619
+ if trim_to_4s and video_duration_seconds > 0:
620
+ # Fast模式:限制最终输出时长
621
+ ffmpeg_command = f'ffmpeg {input_args} -filter_complex "{filter_complex}" -map 0:v -map "[aout]" -t {video_duration_seconds:.3f} -vcodec libx264 -acodec aac -crf 18 -y "{audio_video_path}"'
622
+ else:
623
+ ffmpeg_command = f'ffmpeg {input_args} -filter_complex "{filter_complex}" -map 0:v -map "[aout]" -vcodec libx264 -acodec aac -crf 18 -y "{audio_video_path}"'
624
 
625
  logging.info(f"Adding audio: {ffmpeg_command}")
626
  os.system(ffmpeg_command)
 
754
  max_frames_4s = 97
755
 
756
  if calculated_frame_num > max_frames_4s:
757
+ # 超过4秒,设置trim_to_4s标记
758
+ kwargs['trim_to_4s'] = True
759
  calculated_duration = calculated_frame_num / fps
760
  logging.warning(f"Fast mode: Audio duration ({calculated_duration:.2f}s) exceeds 4 seconds limit. Will trim to 4 seconds.")
761
  else:
762
+ kwargs['trim_to_4s'] = False
763
  except Exception as e:
764
  logging.warning(f"Failed to check audio duration: {e}")
765
+ kwargs['trim_to_4s'] = False
766
  else:
767
+ kwargs['trim_to_4s'] = False
768
 
769
  return gpu_wrapped_generate_video_worker(*args, **kwargs)
770