BrilliantCoolHuge commited on
Commit
48f0289
·
verified ·
1 Parent(s): c21dd28

将BGM开关改为音量条

Browse files
Files changed (1) hide show
  1. app.py +57 -39
app.py CHANGED
@@ -5,6 +5,7 @@ import os
5
  import sys
6
  import hashlib
7
  import time
 
8
  from typing import List, Dict, Optional, Tuple
9
  from pathlib import Path
10
 
@@ -12,9 +13,9 @@ import numpy as np
12
  from pydub import AudioSegment
13
  # 引入与 sample.py 一致的组件
14
  try:
15
- from voice_bank import PredAll
16
- from voice_bank.commons.ds_reader import DSReader
17
- from voice_bank.commons.phome_num_counter import Phome
18
  from pypinyin import pinyin, Style
19
  from pypinyin.constants import RE_HANS
20
  except Exception:
@@ -166,17 +167,23 @@ def export_wav(seg: AudioSegment, path: Path):
166
  seg.export(str(path), format="wav")
167
 
168
 
169
- def overlay_bgm_snippet(vocal_wav: Path, bgm_audio: AudioSegment, offset_sec: float, gain_db: float = 0.0) -> AudioSegment:
170
  vocal = audiosegment_from_file(vocal_wav)
171
- if gain_db != 0.0:
172
- vocal = vocal + gain_db
173
  start_ms = max(int(offset_sec * 1000), 0)
174
- # 确保 BGM 足够长,不够则静音补齐
175
- if len(bgm_audio) < start_ms + len(vocal):
176
- pad_ms = start_ms + len(vocal) - len(bgm_audio)
177
- bgm_audio = bgm_audio + AudioSegment.silent(duration=pad_ms)
178
- mixed = bgm_audio.overlay(vocal, position=start_ms)
179
- # 裁剪到混音范围以便预览:从 start_ms start_ms+len(vocal)
 
 
 
 
 
 
180
  return mixed[start_ms : start_ms + len(vocal)]
181
 
182
 
@@ -195,13 +202,19 @@ def concat_with_offsets(clips: List[Tuple[AudioSegment, float]]) -> AudioSegment
195
  return timeline
196
 
197
 
198
- def mix_full_song(vocal: AudioSegment, bgm: AudioSegment) -> AudioSegment:
199
  # 保证两者同长度
200
  if len(bgm) < len(vocal):
201
  bgm = bgm + AudioSegment.silent(duration=(len(vocal) - len(bgm)))
202
  else:
203
  vocal = vocal + AudioSegment.silent(duration=(len(bgm) - len(vocal)))
204
- return bgm.overlay(vocal)
 
 
 
 
 
 
205
 
206
 
207
  def param_hash(model_sel: str, speaker: str, key_shift: int, steps: int, text: str) -> str:
@@ -391,13 +404,13 @@ def get_template_choices_and_bgm_visible():
391
  def on_select_template(template_name: str):
392
  mapping = find_templates()
393
  if not template_name or template_name not in mapping:
394
- return gr.update(visible=False, value=False), [], []
395
  p = mapping[template_name]
396
  bgm = bgm_path_for(p)
397
  ds = load_ds(p)
398
  lines = [preprocess_zh_spaces(item.get("text", "")) for item in ds]
399
  offsets = [float(item.get("offset", 0.0)) for item in ds]
400
- bgm_update = gr.update(visible=(bgm is not None), value=(bgm is not None))
401
  return bgm_update, lines, offsets
402
 
403
 
@@ -409,7 +422,7 @@ def render_single_line(
409
  speaker: str,
410
  key_shift: int,
411
  steps: int,
412
- use_bgm: bool,
413
  ) -> Tuple[Optional[str], Optional[str]]:
414
  """
415
  渲染单句,返回:(音频路径, 错误消息)
@@ -451,10 +464,10 @@ def render_single_line(
451
 
452
  # 是否混音预览
453
  mapping_bgm = bgm_path_for(template_path)
454
- if use_bgm and mapping_bgm and mapping_bgm.exists():
455
  bgm_audio = audiosegment_from_file(mapping_bgm)
456
  offset = float(ds[line_index].get("offset", 0.0))
457
- mixed_seg = overlay_bgm_snippet(wav_out, bgm_audio, offset)
458
  preview_wav = cache_dir / f"line_{line_index+1}_preview.wav"
459
  export_wav(mixed_seg, preview_wav)
460
  return str(preview_wav), None
@@ -472,7 +485,7 @@ def generate_full_song(
472
  speaker: str,
473
  key_shift: int,
474
  steps: int,
475
- use_bgm: bool,
476
  ) -> Tuple[Optional[str], Optional[str]]:
477
  """
478
  生成整曲,返回:(主���出音频路径, 混音输出音频路径或错误消息字符串)
@@ -523,9 +536,9 @@ def generate_full_song(
523
  # 混音版本(如有 BGM 且开启开关)
524
  mixed_path = None
525
  bgm_p = bgm_path_for(template_path)
526
- if use_bgm and bgm_p and bgm_p.exists():
527
  bgm_audio = audiosegment_from_file(bgm_p)
528
- mixed = mix_full_song(vocal_full, bgm_audio)
529
  mixed_path = final_dir / f"{template_name}_mixed_{ts_tag}.wav"
530
  export_wav(mixed, mixed_path)
531
 
@@ -589,7 +602,7 @@ def build_ui():
589
  download_btn = gr.DownloadButton(label="下载当前ds状态", elem_classes=["compact-btn"])
590
 
591
  with gr.Row():
592
- bgm_switch = gr.Checkbox(label="BGM开关", value=False, visible=False)
593
  key_shift = gr.Slider(-12, 12, value=0, step=1, label="音高偏移")
594
  steps = gr.Slider(1, 50, value=4, step=1, label="渲染步数")
595
  speaker = gr.Dropdown(label="演唱者", choices=[], value=None, interactive=True)
@@ -680,17 +693,17 @@ def build_ui():
680
  outputs=[speaker],
681
  )
682
 
683
- # BGM 开关变化:控制“整首(混音)”可见性(需当前模板存在 BGM)
684
- def on_bgm_toggle(use_bgm, template_name):
685
  mapping = find_templates()
686
  has_bgm = False
687
  if template_name in mapping:
688
  has_bgm = bgm_path_for(mapping[template_name]) is not None
689
- return gr.update(visible=bool(use_bgm and has_bgm))
690
 
691
- bgm_switch.change(
692
- fn=on_bgm_toggle,
693
- inputs=[bgm_switch, template_sel],
694
  outputs=[full_mixed],
695
  )
696
 
@@ -698,7 +711,7 @@ def build_ui():
698
  template_sel.change(
699
  fn=on_template_change,
700
  inputs=[template_sel],
701
- outputs=[bgm_switch, template_sel, lines_state, offsets_state, per_line_error, full_mixed, *textboxes],
702
  )
703
 
704
  # 上传模板:将用户 .ds 保存到 templates/user,并刷新模板下拉
@@ -739,11 +752,11 @@ def build_ui():
739
  # 文本提交事件:逐句渲染(为预创建文本框绑定)
740
  for idx, tb in enumerate(textboxes):
741
  def make_submit(i):
742
- def _submit(new_text, lines_list, model_sel_v, template_sel_v, speaker_v, key_shift_v, steps_v, use_bgm_v):
743
  # 仅处理当前可见范围内的行
744
  if not isinstance(lines_list, list) or i >= max(len(lines_list), 0):
745
  return gr.update(), gr.update(), lines_list
746
- audio_path, err = render_single_line(model_sel_v, template_sel_v, i, new_text, speaker_v, key_shift_v, steps_v, use_bgm_v)
747
  if i < len(lines_list):
748
  lines_list[i] = new_text
749
  if err:
@@ -752,12 +765,12 @@ def build_ui():
752
  return _submit
753
  tb.submit(
754
  fn=make_submit(idx),
755
- inputs=[tb, lines_state, model_sel, template_sel, speaker, key_shift, steps, bgm_switch],
756
  outputs=[per_line_audio, per_line_error, lines_state],
757
  )
758
 
759
  # 生成整首(支持进度与中断)
760
- def on_gen_or_stop(model_sel_v, template_sel_v, speaker_v, key_shift_v, steps_v, use_bgm_v, lines, stop, generating, progress=gr.Progress(track_tqdm=True)):
761
  # 若正在生成,本次点击作为“停止”信号,仅更新按钮与提示
762
  if generating:
763
  stop = True
@@ -823,8 +836,8 @@ def build_ui():
823
 
824
  mixed_path = None
825
  bgm_p = bgm_path_for(template_path)
826
- if use_bgm_v and bgm_p and bgm_p.exists():
827
- mixed = mix_full_song(vocal_full, audiosegment_from_file(bgm_p))
828
  mixed_path = final_dir / f"{template_sel_v}_mixed_{ts_tag}.wav"
829
  export_wav(mixed, mixed_path)
830
 
@@ -835,7 +848,7 @@ def build_ui():
835
 
836
  gen_btn.click(
837
  fn=on_gen_or_stop,
838
- inputs=[model_sel, template_sel, speaker, key_shift, steps, bgm_switch, lines_state, stop_flag, generating_flag],
839
  outputs=[full_vocal, full_mixed, gen_btn, progress_md, stop_flag, generating_flag],
840
  )
841
 
@@ -890,15 +903,20 @@ def build_ui():
890
  demo.load(
891
  fn=on_app_load,
892
  inputs=[model_sel, template_sel],
893
- outputs=[speaker, bgm_switch, template_sel, lines_state, offsets_state, per_line_error, full_mixed, *textboxes],
894
  )
895
 
896
  return demo
897
 
898
 
899
  def main():
 
 
 
 
 
900
  demo = build_ui()
901
- demo.launch()
902
 
903
 
904
  if __name__ == "__main__":
 
5
  import sys
6
  import hashlib
7
  import time
8
+ import math
9
  from typing import List, Dict, Optional, Tuple
10
  from pathlib import Path
11
 
 
13
  from pydub import AudioSegment
14
  # 引入与 sample.py 一致的组件
15
  try:
16
+ from diffsinger_utau.voice_bank import PredAll
17
+ from diffsinger_utau.voice_bank.commons.ds_reader import DSReader
18
+ from diffsinger_utau.voice_bank.commons.phome_num_counter import Phome
19
  from pypinyin import pinyin, Style
20
  from pypinyin.constants import RE_HANS
21
  except Exception:
 
167
  seg.export(str(path), format="wav")
168
 
169
 
170
+ def overlay_bgm_snippet(vocal_wav: Path, bgm_audio: AudioSegment, offset_sec: float, bgm_volume: float = 1.0, vocal_gain_db: float = 0.0) -> AudioSegment:
171
  vocal = audiosegment_from_file(vocal_wav)
172
+ if vocal_gain_db != 0.0:
173
+ vocal = vocal + vocal_gain_db
174
  start_ms = max(int(offset_sec * 1000), 0)
175
+ # 应用 BGM 音量倍率
176
+ if bgm_volume <= 0.0:
177
+ base = AudioSegment.silent(duration=start_ms + len(vocal))
178
+ else:
179
+ gain_db = 20.0 * math.log10(bgm_volume)
180
+ bgm_audio = bgm_audio + gain_db
181
+ base = bgm_audio
182
+ # 确保底轨足够长
183
+ if len(base) < start_ms + len(vocal):
184
+ pad_ms = start_ms + len(vocal) - len(base)
185
+ base = base + AudioSegment.silent(duration=pad_ms)
186
+ mixed = base.overlay(vocal, position=start_ms)
187
  return mixed[start_ms : start_ms + len(vocal)]
188
 
189
 
 
202
  return timeline
203
 
204
 
205
+ def mix_full_song(vocal: AudioSegment, bgm: AudioSegment, bgm_volume: float = 1.0) -> AudioSegment:
206
  # 保证两者同长度
207
  if len(bgm) < len(vocal):
208
  bgm = bgm + AudioSegment.silent(duration=(len(vocal) - len(bgm)))
209
  else:
210
  vocal = vocal + AudioSegment.silent(duration=(len(bgm) - len(vocal)))
211
+ # 应用 BGM 音量倍率
212
+ if bgm_volume <= 0.0:
213
+ bgm_adj = AudioSegment.silent(duration=len(bgm))
214
+ else:
215
+ gain_db = 20.0 * math.log10(bgm_volume)
216
+ bgm_adj = bgm + gain_db
217
+ return bgm_adj.overlay(vocal)
218
 
219
 
220
  def param_hash(model_sel: str, speaker: str, key_shift: int, steps: int, text: str) -> str:
 
404
  def on_select_template(template_name: str):
405
  mapping = find_templates()
406
  if not template_name or template_name not in mapping:
407
+ return gr.update(visible=False, value=0.0), [], []
408
  p = mapping[template_name]
409
  bgm = bgm_path_for(p)
410
  ds = load_ds(p)
411
  lines = [preprocess_zh_spaces(item.get("text", "")) for item in ds]
412
  offsets = [float(item.get("offset", 0.0)) for item in ds]
413
+ bgm_update = gr.update(visible=(bgm is not None), value=(1.0 if bgm is not None else 0.0))
414
  return bgm_update, lines, offsets
415
 
416
 
 
422
  speaker: str,
423
  key_shift: int,
424
  steps: int,
425
+ bgm_volume: float,
426
  ) -> Tuple[Optional[str], Optional[str]]:
427
  """
428
  渲染单句,返回:(音频路径, 错误消息)
 
464
 
465
  # 是否混音预览
466
  mapping_bgm = bgm_path_for(template_path)
467
+ if (bgm_volume and bgm_volume > 0.0) and mapping_bgm and mapping_bgm.exists():
468
  bgm_audio = audiosegment_from_file(mapping_bgm)
469
  offset = float(ds[line_index].get("offset", 0.0))
470
+ mixed_seg = overlay_bgm_snippet(wav_out, bgm_audio, offset, bgm_volume)
471
  preview_wav = cache_dir / f"line_{line_index+1}_preview.wav"
472
  export_wav(mixed_seg, preview_wav)
473
  return str(preview_wav), None
 
485
  speaker: str,
486
  key_shift: int,
487
  steps: int,
488
+ bgm_volume: float,
489
  ) -> Tuple[Optional[str], Optional[str]]:
490
  """
491
  生成整曲,返回:(主���出音频路径, 混音输出音频路径或错误消息字符串)
 
536
  # 混音版本(如有 BGM 且开启开关)
537
  mixed_path = None
538
  bgm_p = bgm_path_for(template_path)
539
+ if (bgm_volume and bgm_volume > 0.0) and bgm_p and bgm_p.exists():
540
  bgm_audio = audiosegment_from_file(bgm_p)
541
+ mixed = mix_full_song(vocal_full, bgm_audio, bgm_volume)
542
  mixed_path = final_dir / f"{template_name}_mixed_{ts_tag}.wav"
543
  export_wav(mixed, mixed_path)
544
 
 
602
  download_btn = gr.DownloadButton(label="下载当前ds状态", elem_classes=["compact-btn"])
603
 
604
  with gr.Row():
605
+ bgm_volume = gr.Slider(0.0, 2.0, value=1.0, step=0.01, label="BGM音量倍率", visible=False)
606
  key_shift = gr.Slider(-12, 12, value=0, step=1, label="音高偏移")
607
  steps = gr.Slider(1, 50, value=4, step=1, label="渲染步数")
608
  speaker = gr.Dropdown(label="演唱者", choices=[], value=None, interactive=True)
 
693
  outputs=[speaker],
694
  )
695
 
696
+ # BGM 音量倍率变化:控制“整首(混音)”可见性(需当前模板存在 BGM 且倍率>0
697
+ def on_bgm_volume_change(bgm_vol, template_name):
698
  mapping = find_templates()
699
  has_bgm = False
700
  if template_name in mapping:
701
  has_bgm = bgm_path_for(mapping[template_name]) is not None
702
+ return gr.update(visible=bool(bgm_vol and bgm_vol > 0 and has_bgm))
703
 
704
+ bgm_volume.change(
705
+ fn=on_bgm_volume_change,
706
+ inputs=[bgm_volume, template_sel],
707
  outputs=[full_mixed],
708
  )
709
 
 
711
  template_sel.change(
712
  fn=on_template_change,
713
  inputs=[template_sel],
714
+ outputs=[bgm_volume, template_sel, lines_state, offsets_state, per_line_error, full_mixed, *textboxes],
715
  )
716
 
717
  # 上传模板:将用户 .ds 保存到 templates/user,并刷新模板下拉
 
752
  # 文本提交事件:逐句渲染(为预创建文本框绑定)
753
  for idx, tb in enumerate(textboxes):
754
  def make_submit(i):
755
+ def _submit(new_text, lines_list, model_sel_v, template_sel_v, speaker_v, key_shift_v, steps_v, bgm_volume_v):
756
  # 仅处理当前可见范围内的行
757
  if not isinstance(lines_list, list) or i >= max(len(lines_list), 0):
758
  return gr.update(), gr.update(), lines_list
759
+ audio_path, err = render_single_line(model_sel_v, template_sel_v, i, new_text, speaker_v, key_shift_v, steps_v, bgm_volume_v)
760
  if i < len(lines_list):
761
  lines_list[i] = new_text
762
  if err:
 
765
  return _submit
766
  tb.submit(
767
  fn=make_submit(idx),
768
+ inputs=[tb, lines_state, model_sel, template_sel, speaker, key_shift, steps, bgm_volume],
769
  outputs=[per_line_audio, per_line_error, lines_state],
770
  )
771
 
772
  # 生成整首(支持进度与中断)
773
+ def on_gen_or_stop(model_sel_v, template_sel_v, speaker_v, key_shift_v, steps_v, bgm_volume_v, lines, stop, generating, progress=gr.Progress(track_tqdm=True)):
774
  # 若正在生成,本次点击作为“停止”信号,仅更新按钮与提示
775
  if generating:
776
  stop = True
 
836
 
837
  mixed_path = None
838
  bgm_p = bgm_path_for(template_path)
839
+ if (bgm_volume_v and bgm_volume_v > 0.0) and bgm_p and bgm_p.exists():
840
+ mixed = mix_full_song(vocal_full, audiosegment_from_file(bgm_p), bgm_volume_v)
841
  mixed_path = final_dir / f"{template_sel_v}_mixed_{ts_tag}.wav"
842
  export_wav(mixed, mixed_path)
843
 
 
848
 
849
  gen_btn.click(
850
  fn=on_gen_or_stop,
851
+ inputs=[model_sel, template_sel, speaker, key_shift, steps, bgm_volume, lines_state, stop_flag, generating_flag],
852
  outputs=[full_vocal, full_mixed, gen_btn, progress_md, stop_flag, generating_flag],
853
  )
854
 
 
903
  demo.load(
904
  fn=on_app_load,
905
  inputs=[model_sel, template_sel],
906
+ outputs=[speaker, bgm_volume, template_sel, lines_state, offsets_state, per_line_error, full_mixed, *textboxes],
907
  )
908
 
909
  return demo
910
 
911
 
912
  def main():
913
+ parser = argparse.ArgumentParser()
914
+ parser.add_argument("--host", type=str, default="127.0.0.1")
915
+ parser.add_argument("--port", type=int, default=7860)
916
+ args = parser.parse_args()
917
+
918
  demo = build_ui()
919
+ demo.launch(server_name=args.host, server_port=args.port, show_error=True)
920
 
921
 
922
  if __name__ == "__main__":