xjsc0 commited on
Commit
1edd14f
·
1 Parent(s): 877f49e
app.py CHANGED
@@ -33,35 +33,26 @@ def local_move2gpu(x):
33
 
34
 
35
  # ---------------------------------------------------------------------------
36
- # Model loading (lazy, singleton) / 模型懒加载(单例)
37
  # ---------------------------------------------------------------------------
38
- _model = None
39
- _separator = None
40
-
41
-
42
- def _load_model_impl():
43
- """Internal: load YingMusicSinger (no GPU decorator, called inside GPU context)."""
44
- download_files(task="infer")
45
- global _model
46
- if _model is None:
47
- from src.YingMusicSinger.infer.YingMusicSinger import YingMusicSinger
48
- _model = YingMusicSinger.from_pretrained("ASLP-lab/YingMusic-Singer")
49
- _model = local_move2gpu(_model)
50
- _model.eval()
51
- return _model
52
-
53
-
54
- def _load_separator_impl():
55
- """Internal: load MelBandRoformer separator (no GPU decorator, called inside GPU context)."""
56
- download_files(task="infer")
57
- global _separator
58
- if _separator is None:
59
- from src.third_party.MusicSourceSeparationTraining.inference_api import Separator
60
- _separator = Separator(
61
- config_path="ckpts/config_vocals_mel_band_roformer_kj.yaml",
62
- checkpoint_path="ckpts/MelBandRoformer.ckpt",
63
- )
64
- return _separator
65
 
66
 
67
  # ---------------------------------------------------------------------------
@@ -72,7 +63,7 @@ def _separate_vocals_impl(audio_path: str) -> tuple:
72
  Separate audio into vocals and accompaniment using MelBandRoformer.
73
  Must be called within an active GPU context.
74
  """
75
- separator = _load_separator_impl()
76
 
77
  wav, sr = torchaudio.load(audio_path)
78
  vocal_wav, inst_wav, out_sr = separator.separate(wav, sr)
@@ -169,9 +160,7 @@ def synthesize(
169
  melody_vocals_path, melody_accomp_path = _separate_vocals_impl(melody_audio_path)
170
  actual_melody_path = melody_vocals_path
171
 
172
- model = _load_model_impl()
173
-
174
- audio_tensor, sr = model(
175
  ref_audio_path=actual_ref_path,
176
  melody_audio_path=actual_melody_path,
177
  ref_text=ref_text.strip(),
@@ -266,11 +255,6 @@ EXAMPLES_LYRIC_EDIT = [
266
 
267
  # ---------------------------------------------------------------------------
268
  # Custom CSS / 自定义样式
269
- # CHANGES:
270
- # 1. Top gradient bar using palette colors
271
- # 2. Section titles: left-bar accent instead of bottom border
272
- # 3. Color palette updated to #FE9EC7 / #F9F6C4 / #89D4FF / #44ACFF
273
- # 4. Vocal-sep info box: reference/melody text use palette colors
274
  # ---------------------------------------------------------------------------
275
  CUSTOM_CSS = """
276
  @import url('https://fonts.googleapis.com/css2?family=DM+Sans:ital,opsz,wght@0,9..40,300;0,9..40,500;0,9..40,700;1,9..40,400&family=Playfair+Display:wght@600;800&display=swap');
@@ -363,39 +347,24 @@ CUSTOM_CSS = """
363
  }
364
 
365
  /* ========== 2. Section labels: left accent bar ========== */
366
-
367
- /*
368
- 用更高特异性的选择器替代大量 !important
369
- 假设父容器有 .resume-container 或类似的包裹类
370
- */
371
  .resume-container .section-title {
372
- /* Reset */
373
  border: none;
374
  outline: none;
375
  box-shadow: none;
376
-
377
- /* Typography */
378
  font-family: 'DM Sans', sans-serif;
379
  font-weight: 700;
380
  font-size: 1rem;
381
  letter-spacing: 0.06em;
382
  text-transform: uppercase;
383
  color: var(--primary);
384
-
385
- /* Layout */
386
  display: block;
387
  padding: 3px 0 3px 10px;
388
  margin-bottom: 14px;
389
-
390
- /* Left accent bar — 用 border-left 单独声明,避免被 border shorthand 覆盖 */
391
  border-left: 4px solid var(--primary-warm);
392
-
393
- /* Background & shape */
394
  background: linear-gradient(90deg, rgb(254 158 199 / 8%) 0%, transparent 70%);
395
  border-radius: 0 4px 4px 0;
396
  }
397
 
398
- /* 子元素继承重置,避免外部样式污染 */
399
  .resume-container .section-title * {
400
  border: inherit;
401
  outline: none;
@@ -434,13 +403,10 @@ CUSTOM_CSS = """
434
  }
435
 
436
  /* ========== 4. Vocal-sep info box: fix highlighted text colors ========== */
437
- /* Target the <b> tags inside the vocal separation info box */
438
  .vocal-sep-info b {
439
- /* default fallback */
440
  color: #c9d1d9;
441
  font-weight: 700;
442
  }
443
- /* Specifically target ref audio and melody audio highlights via data-color */
444
  b[data-ref] {
445
  color: #FE9EC7 !important;
446
  }
@@ -553,53 +519,11 @@ def build_ui():
553
  )
554
 
555
  # ================================================================
556
- # ROW 2 – 预设示例
557
- # ================================================================
558
- gr.HTML("<hr style='border-color:#30363d; margin: 16px 0 12px;'>")
559
- gr.Markdown("#### 🎵 预设示例 / Example Presets")
560
- gr.Markdown(
561
- "<small style='color:#8b949e;'>点击任意行自动填入上方输入区域 / Click any row to auto-fill the inputs above</small>"
562
- )
563
-
564
- with gr.Row(visible=False):
565
- _sep_flag_ex = gr.Checkbox(value=True, label="分离人声 / Separate Vocals")
566
- _mix_flag_ex = gr.Checkbox(value=False, label="混入伴奏 / Mix Accomp.")
567
- _sil_ex = gr.Number(value=0.5, label="静音 / Silence (s)")
568
- _tshift_ex = gr.Number(value=0.5, label="t-shift")
569
- _nfe_ex = gr.Number(value=32, label="NFE Steps")
570
- _cfg_ex = gr.Number(value=3.0, label="CFG")
571
- _seed_ex = gr.Number(value=-1, precision=0, label="Seed")
572
-
573
- _example_inputs = [
574
- ref_audio, melody_audio, ref_text, target_text,
575
- _sep_flag_ex, _mix_flag_ex,
576
- _sil_ex, _tshift_ex, _nfe_ex, _cfg_ex, _seed_ex,
577
- ]
578
-
579
- with gr.Tabs():
580
- with gr.Tab("🎼 Melody Control"):
581
- gr.Examples(
582
- examples=EXAMPLES_MELODY_CONTROL,
583
- inputs=_example_inputs,
584
- label="Melody Control Examples",
585
- examples_per_page=5,
586
- )
587
- with gr.Tab("✏️ Lyric Edit"):
588
- gr.Examples(
589
- examples=EXAMPLES_LYRIC_EDIT,
590
- inputs=_example_inputs,
591
- label="Lyric Edit Examples",
592
- examples_per_page=5,
593
- )
594
-
595
- # ================================================================
596
- # ROW 3 – 伴奏分离
597
  # ================================================================
598
  gr.HTML("<hr style='border-color:#30363d; margin: 16px 0 12px;'>")
599
  gr.Markdown("#### 🎚️ 伴奏分离 / Vocal Separation")
600
 
601
- # CHANGE 4: Use inline style colors for 参考音频 and 旋律音频
602
- # so they render correctly regardless of CSS specificity issues
603
  gr.HTML("""
604
  <div style="font-size:0.85rem; color:#8b949e; line-height:1.75; margin: 0 0 12px; padding: 10px 16px;
605
  background: rgba(255,255,255,0.03); border-radius: 8px; border: 1px solid #21262d;">
@@ -632,6 +556,9 @@ def build_ui():
632
  info="将合成人声与分离出的伴奏混合作为最终输出(需先开启人声分离)/ Mix synthesised vocals with the separated accompaniment (requires separation enabled)",
633
  )
634
 
 
 
 
635
  with gr.Accordion("⚙️ 高级参数 / Advanced Parameters", open=False):
636
  with gr.Row():
637
  nfe_step = gr.Slider(
@@ -660,6 +587,38 @@ def build_ui():
660
  info="-1 表示随机生成 / -1 means random",
661
  )
662
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
663
  # ================================================================
664
  # ROW 5 – 合成按钮与输出
665
  # ================================================================
 
33
 
34
 
35
  # ---------------------------------------------------------------------------
36
+ # Model loading (eager, at startup) / 启动时立即加载,常驻内存
37
  # ---------------------------------------------------------------------------
38
+ print("🔄 Downloading required files...")
39
+ download_files(task="infer")
40
+
41
+ print("🔄 Loading YingMusicSinger model...")
42
+ from src.YingMusicSinger.infer.YingMusicSinger import YingMusicSinger
43
+ _model = YingMusicSinger.from_pretrained("ASLP-lab/YingMusic-Singer")
44
+ _model = local_move2gpu(_model)
45
+ _model.eval()
46
+ print("✅ YingMusicSinger model loaded.")
47
+
48
+ print("🔄 Loading MelBandRoformer separator...")
49
+ from src.third_party.MusicSourceSeparationTraining.inference_api import Separator
50
+ _separator = Separator(
51
+ config_path="ckpts/config_vocals_mel_band_roformer_kj.yaml",
52
+ checkpoint_path="ckpts/MelBandRoformer.ckpt",
53
+ )
54
+ print("✅ MelBandRoformer separator loaded.")
55
+ print("🎤 All models ready. Starting UI...")
 
 
 
 
 
 
 
 
 
56
 
57
 
58
  # ---------------------------------------------------------------------------
 
63
  Separate audio into vocals and accompaniment using MelBandRoformer.
64
  Must be called within an active GPU context.
65
  """
66
+ separator = _separator
67
 
68
  wav, sr = torchaudio.load(audio_path)
69
  vocal_wav, inst_wav, out_sr = separator.separate(wav, sr)
 
160
  melody_vocals_path, melody_accomp_path = _separate_vocals_impl(melody_audio_path)
161
  actual_melody_path = melody_vocals_path
162
 
163
+ audio_tensor, sr = _model(
 
 
164
  ref_audio_path=actual_ref_path,
165
  melody_audio_path=actual_melody_path,
166
  ref_text=ref_text.strip(),
 
255
 
256
  # ---------------------------------------------------------------------------
257
  # Custom CSS / 自定义样式
 
 
 
 
 
258
  # ---------------------------------------------------------------------------
259
  CUSTOM_CSS = """
260
  @import url('https://fonts.googleapis.com/css2?family=DM+Sans:ital,opsz,wght@0,9..40,300;0,9..40,500;0,9..40,700;1,9..40,400&family=Playfair+Display:wght@600;800&display=swap');
 
347
  }
348
 
349
  /* ========== 2. Section labels: left accent bar ========== */
 
 
 
 
 
350
  .resume-container .section-title {
 
351
  border: none;
352
  outline: none;
353
  box-shadow: none;
 
 
354
  font-family: 'DM Sans', sans-serif;
355
  font-weight: 700;
356
  font-size: 1rem;
357
  letter-spacing: 0.06em;
358
  text-transform: uppercase;
359
  color: var(--primary);
 
 
360
  display: block;
361
  padding: 3px 0 3px 10px;
362
  margin-bottom: 14px;
 
 
363
  border-left: 4px solid var(--primary-warm);
 
 
364
  background: linear-gradient(90deg, rgb(254 158 199 / 8%) 0%, transparent 70%);
365
  border-radius: 0 4px 4px 0;
366
  }
367
 
 
368
  .resume-container .section-title * {
369
  border: inherit;
370
  outline: none;
 
403
  }
404
 
405
  /* ========== 4. Vocal-sep info box: fix highlighted text colors ========== */
 
406
  .vocal-sep-info b {
 
407
  color: #c9d1d9;
408
  font-weight: 700;
409
  }
 
410
  b[data-ref] {
411
  color: #FE9EC7 !important;
412
  }
 
519
  )
520
 
521
  # ================================================================
522
+ # ROW 2 – 伴奏分离
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
523
  # ================================================================
524
  gr.HTML("<hr style='border-color:#30363d; margin: 16px 0 12px;'>")
525
  gr.Markdown("#### 🎚️ 伴奏分离 / Vocal Separation")
526
 
 
 
527
  gr.HTML("""
528
  <div style="font-size:0.85rem; color:#8b949e; line-height:1.75; margin: 0 0 12px; padding: 10px 16px;
529
  background: rgba(255,255,255,0.03); border-radius: 8px; border: 1px solid #21262d;">
 
556
  info="将合成人声与分离出的伴奏混合作为最终输出(需先开启人声分离)/ Mix synthesised vocals with the separated accompaniment (requires separation enabled)",
557
  )
558
 
559
+ # ================================================================
560
+ # ROW 3 – 高级参数
561
+ # ================================================================
562
  with gr.Accordion("⚙️ 高级参数 / Advanced Parameters", open=False):
563
  with gr.Row():
564
  nfe_step = gr.Slider(
 
587
  info="-1 表示随机生成 / -1 means random",
588
  )
589
 
590
+ # ================================================================
591
+ # ROW 4 – 预设示例(放在所有真实控件定义之后)
592
+ # ================================================================
593
+ gr.HTML("<hr style='border-color:#30363d; margin: 16px 0 12px;'>")
594
+ gr.Markdown("#### 🎵 预设示例 / Example Presets")
595
+ gr.Markdown(
596
+ "<small style='color:#8b949e;'>点击任意行自动填入上方输入区域 / Click any row to auto-fill the inputs above</small>"
597
+ )
598
+
599
+ # 所有真实控件均已定义,直接绑定
600
+ _example_inputs = [
601
+ ref_audio, melody_audio, ref_text, target_text,
602
+ separate_vocals_flag, mix_accompaniment_flag,
603
+ sil_len_to_end, t_shift, nfe_step, cfg_strength, seed,
604
+ ]
605
+
606
+ with gr.Tabs():
607
+ with gr.Tab("🎼 Melody Control"):
608
+ gr.Examples(
609
+ examples=EXAMPLES_MELODY_CONTROL,
610
+ inputs=_example_inputs,
611
+ label="Melody Control Examples",
612
+ examples_per_page=5,
613
+ )
614
+ with gr.Tab("✏️ Lyric Edit"):
615
+ gr.Examples(
616
+ examples=EXAMPLES_LYRIC_EDIT,
617
+ inputs=_example_inputs,
618
+ label="Lyric Edit Examples",
619
+ examples_per_page=5,
620
+ )
621
+
622
  # ================================================================
623
  # ROW 5 – 合成按钮与输出
624
  # ================================================================
examples/hf_space/melody_control/melody_control_ZH_02_melody.wav CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:795289e027cf06fd8e3898453fc353265dc114ca111446999900aa4dba40c126
3
- size 1221884
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d022b45bd36f0e3618a85422f21a9fd244a18c998c60f0aa59fdfe3c1beb9c59
3
+ size 2039084
examples/hf_space/melody_control/melody_control_ZH_02_timbre.wav CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d022b45bd36f0e3618a85422f21a9fd244a18c998c60f0aa59fdfe3c1beb9c59
3
- size 2039084
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:795289e027cf06fd8e3898453fc353265dc114ca111446999900aa4dba40c126
3
+ size 1221884