xjsc0 commited on
Commit
e12d467
·
1 Parent(s): a8cff3b
Files changed (1) hide show
  1. app_local.py +169 -148
app_local.py CHANGED
@@ -28,35 +28,27 @@ def local_move2gpu(x):
28
 
29
 
30
  # ---------------------------------------------------------------------------
31
- # Model loading (lazy, singleton) / 模型懒加载(单例)
32
  # ---------------------------------------------------------------------------
33
- _model = None
34
- _separator = None
35
-
36
-
37
- def _load_model_impl():
38
- """Internal: load YingMusicSinger (no GPU decorator, called inside GPU context)."""
39
- download_files(task="infer")
40
- global _model
41
- if _model is None:
42
- from src.YingMusicSinger.infer.YingMusicSinger import YingMusicSinger
43
- _model = YingMusicSinger.from_pretrained("ASLP-lab/YingMusic-Singer")
44
- _model = local_move2gpu(_model)
45
- _model.eval()
46
- return _model
47
-
48
-
49
- def _load_separator_impl():
50
- """Internal: load MelBandRoformer separator (no GPU decorator, called inside GPU context)."""
51
- download_files(task="infer")
52
- global _separator
53
- if _separator is None:
54
- from src.third_party.MusicSourceSeparationTraining.inference_api import Separator
55
- _separator = Separator(
56
- config_path="ckpts/config_vocals_mel_band_roformer_kj.yaml",
57
- checkpoint_path="ckpts/MelBandRoformer.ckpt",
58
- )
59
- return _separator
60
 
61
 
62
  # ---------------------------------------------------------------------------
@@ -67,7 +59,7 @@ def _separate_vocals_impl(audio_path: str) -> tuple:
67
  Separate audio into vocals and accompaniment using MelBandRoformer.
68
  Must be called within an active GPU context.
69
  """
70
- separator = _load_separator_impl()
71
 
72
  wav, sr = torchaudio.load(audio_path)
73
  vocal_wav, inst_wav, out_sr = separator.separate(wav, sr)
@@ -118,10 +110,8 @@ def mix_vocal_and_accompaniment(
118
 
119
  # ---------------------------------------------------------------------------
120
  # Inference wrapper / 推理入口
121
- # Single @spaces.GPU scope covers ALL heavy work (separation + synthesis)
122
- # so models stay resident in GPU memory across steps within one call.
123
  # ---------------------------------------------------------------------------
124
-
125
  def synthesize(
126
  ref_audio,
127
  melody_audio,
@@ -135,16 +125,8 @@ def synthesize(
135
  cfg_strength,
136
  seed,
137
  ):
138
- """
139
- 主合成流程 / Main synthesis pipeline.
140
-
141
- 1. (可选) 用 MelBandRoformer 分离参考音频和旋律音频的人声与伴奏
142
- 2. 送入 YingMusicSinger 合成
143
- 3. (可选) 将合成人声与旋律音频的伴奏混合
144
- """
145
  import random
146
 
147
- # ---- 输入校验 / Input validation ----------------------------------------
148
  if ref_audio is None:
149
  raise gr.Error("请上传参考音频 / Please upload Reference Audio")
150
  if melody_audio is None:
@@ -153,6 +135,8 @@ def synthesize(
153
  raise gr.Error("请输入参考音频对应的歌词 / Please enter Reference Text")
154
  if not target_text.strip():
155
  raise gr.Error("请输入目标合成歌词 / Please enter Target Text")
 
 
156
 
157
  ref_audio_path = ref_audio if isinstance(ref_audio, str) else ref_audio[0]
158
  melody_audio_path = (
@@ -163,7 +147,6 @@ def synthesize(
163
  if actual_seed < 0:
164
  actual_seed = random.randint(0, 2**31 - 1)
165
 
166
- # ---- Step 1: 人声分离(合并在同一 GPU 上下文中)/ Vocal separation (same GPU context) ----
167
  melody_accomp_path = None
168
  actual_ref_path = ref_audio_path
169
  actual_melody_path = melody_audio_path
@@ -175,10 +158,7 @@ def synthesize(
175
  melody_vocals_path, melody_accomp_path = _separate_vocals_impl(melody_audio_path)
176
  actual_melody_path = melody_vocals_path
177
 
178
- # ---- Step 2: 模型推理 / Model inference (same GPU context) ---------------
179
- model = _load_model_impl()
180
-
181
- audio_tensor, sr = model(
182
  ref_audio_path=actual_ref_path,
183
  melody_audio_path=actual_melody_path,
184
  ref_text=ref_text.strip(),
@@ -194,7 +174,6 @@ def synthesize(
194
  vocal_out_path = os.path.join(tempfile.mkdtemp(), "vocal_output.wav")
195
  torchaudio.save(vocal_out_path, audio_tensor.to("cpu"), sample_rate=sr)
196
 
197
- # ---- Step 3: 混合伴奏 / Mix accompaniment (optional) ---------------------
198
  if (
199
  separate_vocals_flag
200
  and mix_accompaniment_flag
@@ -210,37 +189,64 @@ def synthesize(
210
  # Example presets / 预设示例
211
  # ---------------------------------------------------------------------------
212
  EXAMPLES_MELODY_CONTROL = [
213
- # [ref_audio, melody_audio, ref_text, target_text, sep, mix, sil, t_shift, nfe, cfg, seed]
214
  [
215
- "examples/melody_control/ref_01.wav",
216
- "examples/melody_control/melody_01.wav",
217
- "该体谅不执着|如果那天我",
218
- "好多天|看不完你",
219
- True, False, 0.5, 0.5, 32, 3.0, -1,
220
  ],
221
  [
222
- "examples/melody_control/ref_02.wav",
223
- "examples/melody_control/melody_02.wav",
224
- "月光下的身影|渐渐模糊",
225
- "星光照亮前路|指引方向",
226
- True, False, 0.5, 0.5, 32, 3.0, -1,
227
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  ]
229
 
230
  EXAMPLES_LYRIC_EDIT = [
231
  [
232
- "examples/lyric_edit/ref_01.wav",
233
- "examples/lyric_edit/melody_01.wav",
234
- "该体谅的不执着|如果那天我",
235
- "忘不掉的笑容|心里面",
236
- True, False, 0.5, 0.5, 32, 3.0, -1,
 
 
 
 
 
 
 
237
  ],
238
  [
239
- "examples/lyric_edit/ref_02.wav",
240
- "examples/lyric_edit/melody_02.wav",
241
- "夜深还不睡|想着你",
242
- "春风又吹过来|带走我思念",
243
- True, False, 0.5, 0.5, 32, 3.0, -1,
 
 
 
 
 
 
 
244
  ],
245
  ]
246
 
@@ -252,14 +258,16 @@ CUSTOM_CSS = """
252
  @import url('https://fonts.googleapis.com/css2?family=DM+Sans:ital,opsz,wght@0,9..40,300;0,9..40,500;0,9..40,700;1,9..40,400&family=Playfair+Display:wght@600;800&display=swap');
253
 
254
  :root {
255
- --primary: #e85d04;
256
- --primary-light: #f48c06;
 
 
257
  --bg-dark: #0d1117;
258
  --surface: #161b22;
259
  --surface-light: #21262d;
260
  --text: #f0f6fc;
261
  --text-muted: #8b949e;
262
- --accent-glow: rgba(232, 93, 4, 0.15);
263
  --border: #30363d;
264
  }
265
 
@@ -269,7 +277,7 @@ CUSTOM_CSS = """
269
  margin: auto !important;
270
  }
271
 
272
- /* ---------- Badge links: no underline, no gap artifacts ---------- */
273
  #app-header .badges a {
274
  text-decoration: none !important;
275
  display: inline-block;
@@ -286,7 +294,7 @@ CUSTOM_CSS = """
286
  line-height: 1.8;
287
  }
288
 
289
- /* ---------- Header / 头部 ---------- */
290
  #app-header {
291
  text-align: center;
292
  padding: 1.8rem 1rem 0.5rem;
@@ -336,17 +344,29 @@ CUSTOM_CSS = """
336
  font-weight: 600;
337
  }
338
 
339
- /* ---------- Section labels / 分区标题 ---------- */
340
- .section-title {
341
- font-family: 'DM Sans', sans-serif !important;
342
- font-weight: 700 !important;
343
- font-size: 1rem !important;
 
 
 
344
  letter-spacing: 0.06em;
345
  text-transform: uppercase;
346
- color: var(--primary-light) !important;
347
- border-bottom: 2px solid var(--primary);
348
- padding-bottom: 6px;
349
- margin-bottom: 12px !important;
 
 
 
 
 
 
 
 
 
350
  }
351
 
352
  /* ---------- Example tabs ---------- */
@@ -355,29 +375,41 @@ CUSTOM_CSS = """
355
  font-size: 0.95rem !important;
356
  }
357
 
358
- /* ---------- Run button / 合成按钮 ---------- */
359
  #run-btn {
360
- background: linear-gradient(135deg, #e85d04, #dc2f02) !important;
361
  border: none !important;
362
- color: #fff !important;
363
  font-weight: 700 !important;
364
  font-size: 1.1rem !important;
365
  letter-spacing: 0.04em;
366
  padding: 12px 0 !important;
367
  border-radius: 10px !important;
368
  transition: transform 0.15s, box-shadow 0.25s !important;
369
- box-shadow: 0 4px 20px rgba(232, 93, 4, 0.35) !important;
370
  }
371
  #run-btn:hover {
372
  transform: translateY(-1px) !important;
373
- box-shadow: 0 6px 28px rgba(232, 93, 4, 0.5) !important;
374
  }
375
 
376
- /* ---------- Output audio / 输出音频 ---------- */
377
  #output-audio {
378
- border: 2px solid var(--primary) !important;
379
  border-radius: 12px !important;
380
- background: var(--accent-glow) !important;
 
 
 
 
 
 
 
 
 
 
 
 
381
  }
382
  """
383
 
@@ -472,7 +504,7 @@ def build_ui():
472
  type="filepath",
473
  )
474
  with gr.Column(scale=1):
475
- gr.Markdown("#### ✏️ 歌词输入 / Lyrics", elem_classes="section-title")
476
  ref_text = gr.Textbox(
477
  label="参考音频歌词 / Reference Lyrics",
478
  placeholder="例如 / e.g.:该体谅的不执着|如果那天我",
@@ -485,58 +517,17 @@ def build_ui():
485
  )
486
 
487
  # ================================================================
488
- # ROW 2 – 预设示例 / Example Presets ← before vocal separation
489
  # ================================================================
490
  gr.HTML("<hr style='border-color:#30363d; margin: 16px 0 12px;'>")
491
- gr.Markdown("#### 🎵 预设示例 / Example Presets", elem_classes="section-title")
492
- gr.Markdown(
493
- "<small style='color:#8b949e;'>点击任意行自动填入上方输入区域 / Click any row to auto-fill the inputs above</small>"
494
- )
495
-
496
- # Hidden advanced-param components so gr.Examples can reference them
497
- # (real sliders rendered inside the accordion below override these values)
498
- with gr.Row(visible=False):
499
- _sep_flag_ex = gr.Checkbox(value=True)
500
- _mix_flag_ex = gr.Checkbox(value=False)
501
- _sil_ex = gr.Number(value=0.5)
502
- _tshift_ex = gr.Number(value=0.5)
503
- _nfe_ex = gr.Number(value=32)
504
- _cfg_ex = gr.Number(value=3.0)
505
- _seed_ex = gr.Number(value=-1, precision=0)
506
 
507
- _example_inputs = [
508
- ref_audio, melody_audio, ref_text, target_text,
509
- _sep_flag_ex, _mix_flag_ex,
510
- _sil_ex, _tshift_ex, _nfe_ex, _cfg_ex, _seed_ex,
511
- ]
512
-
513
- with gr.Tabs():
514
- with gr.Tab("🎼 Melody Control"):
515
- gr.Examples(
516
- examples=EXAMPLES_MELODY_CONTROL,
517
- inputs=_example_inputs,
518
- label="Melody Control Examples",
519
- examples_per_page=5,
520
- )
521
- with gr.Tab("✏️ Lyric Edit"):
522
- gr.Examples(
523
- examples=EXAMPLES_LYRIC_EDIT,
524
- inputs=_example_inputs,
525
- label="Lyric Edit Examples",
526
- examples_per_page=5,
527
- )
528
-
529
- # ================================================================
530
- # ROW 3 – 伴奏分离 / Vocal Separation
531
- # ================================================================
532
- gr.HTML("<hr style='border-color:#30363d; margin: 16px 0 12px;'>")
533
- gr.Markdown("#### 🎚️ 伴奏分离 / Vocal Separation", elem_classes="section-title")
534
  gr.HTML("""
535
  <div style="font-size:0.85rem; color:#8b949e; line-height:1.75; margin: 0 0 12px; padding: 10px 16px;
536
  background: rgba(255,255,255,0.03); border-radius: 8px; border: 1px solid #21262d;">
537
  <ul style="margin:0; padding-left:1.2em; list-style: none;">
538
  <li style="margin-bottom:7px;">
539
- 💡 若输入的<b style="color:#c9d1d9;">参考音频</b>或<b style="color:#c9d1d9;">旋律音频</b>中含有伴奏或背景噪音,请开启「分离人声后过模型」—— 模型基于纯人声训练,混合音频会影响合成质量。<br>
540
  <span style="color:#6e7681; font-size:0.82rem;">If either input contains accompaniment or background noise, enable <i>Separate vocals before synthesis</i> — the model is trained on clean vocals only and mixed audio degrades quality.</span>
541
  </li>
542
  <li style="margin-bottom:7px;">
@@ -544,7 +535,7 @@ def build_ui():
544
  <span style="color:#6e7681; font-size:0.82rem;">If both inputs are already clean vocals, skip separation — enabling it unnecessarily may introduce artifacts from the separation model.</span>
545
  </li>
546
  <li>
547
- 💡 若旋律音频含有伴奏,开启「分离人声后过模型」后,最终输出是否保留伴奏由「输出时混入伴奏」控制。<br>
548
  <span style="color:#6e7681; font-size:0.82rem;">If the melody audio contains accompaniment and separation is enabled, use <i>Mix accompaniment into output</i> to decide whether to include it in the final result.</span>
549
  </li>
550
  </ul>
@@ -563,6 +554,9 @@ def build_ui():
563
  info="将合成人声与分离出的伴奏混合作为最终输出(需先开启人声分离)/ Mix synthesised vocals with the separated accompaniment (requires separation enabled)",
564
  )
565
 
 
 
 
566
  with gr.Accordion("⚙️ 高级参数 / Advanced Parameters", open=False):
567
  with gr.Row():
568
  nfe_step = gr.Slider(
@@ -592,7 +586,45 @@ def build_ui():
592
  )
593
 
594
  # ================================================================
595
- # ROW 5合成按钮与输出 / Run & Output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
596
  # ================================================================
597
  gr.HTML("<hr style='border-color:#30363d; margin: 12px 0;'>")
598
  run_btn = gr.Button("🎤 开始合成 / Start Synthesizing", elem_id="run-btn", size="lg")
@@ -603,36 +635,25 @@ def build_ui():
603
  elem_id="output-audio",
604
  )
605
 
606
- # All inputs for the synthesize() call (uses real sliders, not example placeholders)
607
  _all_inputs = [
608
  ref_audio, melody_audio, ref_text, target_text,
609
  separate_vocals_flag, mix_accompaniment_flag,
610
  sil_len_to_end, t_shift, nfe_step, cfg_strength, seed,
611
  ]
612
 
613
- # ================================================================
614
- # Event wiring / 事件绑定
615
- # ================================================================
616
- separate_vocals_flag.change(
617
- fn=lambda sep: gr.update(interactive=sep, value=False),
618
- inputs=[separate_vocals_flag],
619
- outputs=[mix_accompaniment_flag],
620
- )
621
-
622
  run_btn.click(
623
  fn=synthesize,
624
  inputs=_all_inputs,
625
  outputs=output_audio,
626
  )
627
 
628
- # ---- 页脚:免责声明 / Footer: disclaimer ----
629
  gr.HTML(DISCLAIMER_HTML)
630
 
631
  return demo
632
 
633
 
634
  # ---------------------------------------------------------------------------
635
- # Entry point / 启动入口
636
  # ---------------------------------------------------------------------------
637
  if __name__ == "__main__":
638
  demo = build_ui()
 
28
 
29
 
30
  # ---------------------------------------------------------------------------
31
+ # Model loading (eager, at startup) / 启动时立即加载,常驻内存
32
  # ---------------------------------------------------------------------------
33
+ print("🔄 Downloading required files...")
34
+ download_files(task="infer")
35
+
36
+ print("🔄 Loading YingMusicSinger model...")
37
+ from src.YingMusicSinger.infer.YingMusicSinger import YingMusicSinger
38
+ _model = YingMusicSinger.from_pretrained("ASLP-lab/YingMusic-Singer")
39
+ _model = local_move2gpu(_model)
40
+ _model.eval()
41
+ print("✅ YingMusicSinger model loaded.")
42
+
43
+ print("🔄 Loading MelBandRoformer separator...")
44
+ from src.third_party.MusicSourceSeparationTraining.inference_api import Separator
45
+ _separator = Separator(
46
+ config_path="ckpts/config_vocals_mel_band_roformer_kj.yaml",
47
+ checkpoint_path="ckpts/MelBandRoformer.ckpt",
48
+ device=LOCAL_DEVICE
49
+ )
50
+ print(" MelBandRoformer separator loaded.")
51
+ print("🎤 All models ready. Starting UI...")
 
 
 
 
 
 
 
 
52
 
53
 
54
  # ---------------------------------------------------------------------------
 
59
  Separate audio into vocals and accompaniment using MelBandRoformer.
60
  Must be called within an active GPU context.
61
  """
62
+ separator = _separator
63
 
64
  wav, sr = torchaudio.load(audio_path)
65
  vocal_wav, inst_wav, out_sr = separator.separate(wav, sr)
 
110
 
111
  # ---------------------------------------------------------------------------
112
  # Inference wrapper / 推理入口
 
 
113
  # ---------------------------------------------------------------------------
114
+ @spaces.GPU
115
  def synthesize(
116
  ref_audio,
117
  melody_audio,
 
125
  cfg_strength,
126
  seed,
127
  ):
 
 
 
 
 
 
 
128
  import random
129
 
 
130
  if ref_audio is None:
131
  raise gr.Error("请上传参考音频 / Please upload Reference Audio")
132
  if melody_audio is None:
 
135
  raise gr.Error("请输入参考音频对应的歌词 / Please enter Reference Text")
136
  if not target_text.strip():
137
  raise gr.Error("请输入目标合成歌词 / Please enter Target Text")
138
+ if mix_accompaniment_flag and not separate_vocals_flag:
139
+ raise gr.Error("「输出时混入伴奏」需要先开启「分离人声后过模型」/ 'Mix accompaniment into output' requires 'Separate vocals before synthesis' to be enabled first")
140
 
141
  ref_audio_path = ref_audio if isinstance(ref_audio, str) else ref_audio[0]
142
  melody_audio_path = (
 
147
  if actual_seed < 0:
148
  actual_seed = random.randint(0, 2**31 - 1)
149
 
 
150
  melody_accomp_path = None
151
  actual_ref_path = ref_audio_path
152
  actual_melody_path = melody_audio_path
 
158
  melody_vocals_path, melody_accomp_path = _separate_vocals_impl(melody_audio_path)
159
  actual_melody_path = melody_vocals_path
160
 
161
+ audio_tensor, sr = _model(
 
 
 
162
  ref_audio_path=actual_ref_path,
163
  melody_audio_path=actual_melody_path,
164
  ref_text=ref_text.strip(),
 
174
  vocal_out_path = os.path.join(tempfile.mkdtemp(), "vocal_output.wav")
175
  torchaudio.save(vocal_out_path, audio_tensor.to("cpu"), sample_rate=sr)
176
 
 
177
  if (
178
  separate_vocals_flag
179
  and mix_accompaniment_flag
 
189
  # Example presets / 预设示例
190
  # ---------------------------------------------------------------------------
191
  EXAMPLES_MELODY_CONTROL = [
 
192
  [
193
+ "examples/hf_space/melody_control/melody_control_ZH_01_timbre.wav",
194
+ "examples/hf_space/melody_control/melody_control_ZH_01_melody.wav",
195
+ "人和人沟通|有时候没有用",
196
+ "此刻记忆中的点滴啊|能否再次被珍藏",
197
+ True, True, 0.5, 0.5, 32, 3.0, -1,
198
  ],
199
  [
200
+ "examples/hf_space/melody_control/melody_control_EN_01_timbre.wav",
201
+ "examples/hf_space/melody_control/melody_control_EN_01_melody.wav",
202
+ "i don't know feel|but i wanna try",
203
+ "won't open the door|and say tomorrow",
204
+ True, True, 0.5, 0.5, 32, 3.0, -1,
205
  ],
206
+ [
207
+ "examples/hf_space/melody_control/melody_control_EN_02_timbre.wav",
208
+ "examples/hf_space/melody_control/melody_control_EN_02_melody.wav",
209
+ "and she'll never know your story like|i do",
210
+ "你将安然无恙|无人能再伤你",
211
+ False, False, 0.5, 0.5, 32, 3.0, -1,
212
+ ],
213
+ [
214
+ "examples/hf_space/melody_control/melody_control_ZH_02_timbre.wav",
215
+ "examples/hf_space/melody_control/melody_control_ZH_02_melody.wav",
216
+ "就让你|在别人怀里|快乐",
217
+ "Missing you in my mind|missing you in my heart",
218
+ False, False, 0.5, 0.5, 32, 3.0, -1,
219
+ ]
220
  ]
221
 
222
  EXAMPLES_LYRIC_EDIT = [
223
  [
224
+ "examples/hf_space/lyric_edit/SingEdit_ZH_01.wav",
225
+ "examples/hf_space/lyric_edit/SingEdit_ZH_01.wav",
226
+ "天青色等烟雨|而在等你|炊烟袅袅升起",
227
+ "阳光中赏花香|花瓣飘落|山间幽静致远",
228
+ True, True, 0.5, 0.5, 32, 3.0, -1,
229
+ ],
230
+ [
231
+ "examples/hf_space/lyric_edit/SingEdit_EN_01.wav",
232
+ "examples/hf_space/lyric_edit/SingEdit_EN_01.wav",
233
+ "can you tell my heart is speaking|my eyes will give you clues",
234
+ "can you spot the moon is grinning|my lips will show you hints",
235
+ True, True, 0.5, 0.5, 32, 3.0, -1,
236
  ],
237
  [
238
+ "examples/hf_space/lyric_edit/SingEdit_ZH_02.wav",
239
+ "examples/hf_space/lyric_edit/SingEdit_ZH_02.wav",
240
+ "歌声是翅膀|唱出希望|所有付出只因爱的力量|和你一样",
241
+ "火锅是梦想|煮出了欢畅|全部的辛劳全因肉的力量|与汤一样",
242
+ False, False, 0.5, 0.5, 32, 3.0, -1,
243
+ ],
244
+ [
245
+ "examples/hf_space/lyric_edit/SingEdit_EN_02.wav",
246
+ "examples/hf_space/lyric_edit/SingEdit_EN_02.wav",
247
+ "i can hear what you say|now i know|why know we can|make it",
248
+ "i can see where you go|but i say|why not we will|break it",
249
+ False, False, 0.5, 0.5, 32, 3.0, -1,
250
  ],
251
  ]
252
 
 
258
  @import url('https://fonts.googleapis.com/css2?family=DM+Sans:ital,opsz,wght@0,9..40,300;0,9..40,500;0,9..40,700;1,9..40,400&family=Playfair+Display:wght@600;800&display=swap');
259
 
260
  :root {
261
+ --primary: #44ACFF;
262
+ --primary-light: #89D4FF;
263
+ --primary-warm: #FE9EC7;
264
+ --palette-yellow: #F9F6C4;
265
  --bg-dark: #0d1117;
266
  --surface: #161b22;
267
  --surface-light: #21262d;
268
  --text: #f0f6fc;
269
  --text-muted: #8b949e;
270
+ --accent-glow: rgba(68, 172, 255, 0.10);
271
  --border: #30363d;
272
  }
273
 
 
277
  margin: auto !important;
278
  }
279
 
280
+ /* ---------- Badge links ---------- */
281
  #app-header .badges a {
282
  text-decoration: none !important;
283
  display: inline-block;
 
294
  line-height: 1.8;
295
  }
296
 
297
+ /* ---------- Header ---------- */
298
  #app-header {
299
  text-align: center;
300
  padding: 1.8rem 1rem 0.5rem;
 
344
  font-weight: 600;
345
  }
346
 
347
+ /* ========== 2. Section labels: left accent bar ========== */
348
+ .resume-container .section-title {
349
+ border: none;
350
+ outline: none;
351
+ box-shadow: none;
352
+ font-family: 'DM Sans', sans-serif;
353
+ font-weight: 700;
354
+ font-size: 1rem;
355
  letter-spacing: 0.06em;
356
  text-transform: uppercase;
357
+ color: var(--primary);
358
+ display: block;
359
+ padding: 3px 0 3px 10px;
360
+ margin-bottom: 14px;
361
+ border-left: 4px solid var(--primary-warm);
362
+ background: linear-gradient(90deg, rgb(254 158 199 / 8%) 0%, transparent 70%);
363
+ border-radius: 0 4px 4px 0;
364
+ }
365
+
366
+ .resume-container .section-title * {
367
+ border: inherit;
368
+ outline: none;
369
+ box-shadow: none;
370
  }
371
 
372
  /* ---------- Example tabs ---------- */
 
375
  font-size: 0.95rem !important;
376
  }
377
 
378
+ /* ========== 3. Run button palette blue ========== */
379
  #run-btn {
380
+ background: linear-gradient(135deg, #44ACFF, #89D4FF) !important;
381
  border: none !important;
382
+ color: #0a1628 !important;
383
  font-weight: 700 !important;
384
  font-size: 1.1rem !important;
385
  letter-spacing: 0.04em;
386
  padding: 12px 0 !important;
387
  border-radius: 10px !important;
388
  transition: transform 0.15s, box-shadow 0.25s !important;
389
+ box-shadow: 0 4px 20px rgba(68, 172, 255, 0.35) !important;
390
  }
391
  #run-btn:hover {
392
  transform: translateY(-1px) !important;
393
+ box-shadow: 0 6px 28px rgba(68, 172, 255, 0.5) !important;
394
  }
395
 
396
+ /* ---------- Output audio ---------- */
397
  #output-audio {
398
+ border: 2px solid #44ACFF !important;
399
  border-radius: 12px !important;
400
+ background: rgba(68, 172, 255, 0.07) !important;
401
+ }
402
+
403
+ /* ========== 4. Vocal-sep info box: fix highlighted text colors ========== */
404
+ .vocal-sep-info b {
405
+ color: #c9d1d9;
406
+ font-weight: 700;
407
+ }
408
+ b[data-ref] {
409
+ color: #FE9EC7 !important;
410
+ }
411
+ b[data-melody] {
412
+ color: #89D4FF !important;
413
  }
414
  """
415
 
 
504
  type="filepath",
505
  )
506
  with gr.Column(scale=1):
507
+ gr.Markdown("#### ✏️ 歌词输入 / Lyrics")
508
  ref_text = gr.Textbox(
509
  label="参考音频歌词 / Reference Lyrics",
510
  placeholder="例如 / e.g.:该体谅的不执着|如果那天我",
 
517
  )
518
 
519
  # ================================================================
520
+ # ROW 2 – 伴奏分离
521
  # ================================================================
522
  gr.HTML("<hr style='border-color:#30363d; margin: 16px 0 12px;'>")
523
+ gr.Markdown("#### 🎚️ 伴奏分离 / Vocal Separation")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
524
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
525
  gr.HTML("""
526
  <div style="font-size:0.85rem; color:#8b949e; line-height:1.75; margin: 0 0 12px; padding: 10px 16px;
527
  background: rgba(255,255,255,0.03); border-radius: 8px; border: 1px solid #21262d;">
528
  <ul style="margin:0; padding-left:1.2em; list-style: none;">
529
  <li style="margin-bottom:7px;">
530
+ 💡 若输入的<b style="color:#FE9EC7; font-weight:700;">参考音频</b>或<b style="color:#89D4FF; font-weight:700;">旋律音频</b>中含有伴奏或背景噪音,请开启「分离人声后过模型」—— 模型基于纯人声训练,混合音频会影响合成质量。<br>
531
  <span style="color:#6e7681; font-size:0.82rem;">If either input contains accompaniment or background noise, enable <i>Separate vocals before synthesis</i> — the model is trained on clean vocals only and mixed audio degrades quality.</span>
532
  </li>
533
  <li style="margin-bottom:7px;">
 
535
  <span style="color:#6e7681; font-size:0.82rem;">If both inputs are already clean vocals, skip separation — enabling it unnecessarily may introduce artifacts from the separation model.</span>
536
  </li>
537
  <li>
538
+ 💡 若<b style="color:#89D4FF; font-weight:700;">旋律音频</b>含有伴奏,开启「分离人声后过模型」后,最终输出是否保留伴奏由「输出时混入伴奏」控制。<br>
539
  <span style="color:#6e7681; font-size:0.82rem;">If the melody audio contains accompaniment and separation is enabled, use <i>Mix accompaniment into output</i> to decide whether to include it in the final result.</span>
540
  </li>
541
  </ul>
 
554
  info="将合成人声与分离出的伴奏混合作为最终输出(需先开启人声分离)/ Mix synthesised vocals with the separated accompaniment (requires separation enabled)",
555
  )
556
 
557
+ # ================================================================
558
+ # ROW 3 – 高级参数
559
+ # ================================================================
560
  with gr.Accordion("⚙️ 高级参数 / Advanced Parameters", open=False):
561
  with gr.Row():
562
  nfe_step = gr.Slider(
 
586
  )
587
 
588
  # ================================================================
589
+ # ROW 4预设示例(放在所有真实控件定义之后)
590
+ # ================================================================
591
+ gr.HTML("<hr style='border-color:#30363d; margin: 16px 0 12px;'>")
592
+ gr.Markdown("#### 🎵 预设示例 / Example Presets")
593
+ gr.Markdown(
594
+ "<small style='color:#8b949e;'>点击任意行自动填入上方输入区域 / Click any row to auto-fill the inputs above</small>"
595
+ )
596
+ gr.HTML("""
597
+ <p style="text-align:center; font-size:0.78rem; color:#484f58; margin: 2px 0 10px; line-height:1.7;">
598
+ 示例中所用音频片段均仅供学术研究与演示目的,不用于任何商业用途。如有版权问题,请联系作者予以删除。<br>
599
+ Audio clips used in the examples are for academic research and demonstration purposes only, with no commercial use intended. If you believe any content infringes your copyright, please contact the authors for removal.
600
+ </p>
601
+ """)
602
+
603
+ # 所有真实控件均已定义,直接绑定
604
+ _example_inputs = [
605
+ ref_audio, melody_audio, ref_text, target_text,
606
+ separate_vocals_flag, mix_accompaniment_flag,
607
+ sil_len_to_end, t_shift, nfe_step, cfg_strength, seed,
608
+ ]
609
+
610
+ with gr.Tabs():
611
+ with gr.Tab("🎼 Melody Control"):
612
+ gr.Examples(
613
+ examples=EXAMPLES_MELODY_CONTROL,
614
+ inputs=_example_inputs,
615
+ label="Melody Control Examples",
616
+ examples_per_page=5,
617
+ )
618
+ with gr.Tab("✏️ Lyric Edit"):
619
+ gr.Examples(
620
+ examples=EXAMPLES_LYRIC_EDIT,
621
+ inputs=_example_inputs,
622
+ label="Lyric Edit Examples",
623
+ examples_per_page=5,
624
+ )
625
+
626
+ # ================================================================
627
+ # ROW 5 – 合成按钮与输出
628
  # ================================================================
629
  gr.HTML("<hr style='border-color:#30363d; margin: 12px 0;'>")
630
  run_btn = gr.Button("🎤 开始合成 / Start Synthesizing", elem_id="run-btn", size="lg")
 
635
  elem_id="output-audio",
636
  )
637
 
 
638
  _all_inputs = [
639
  ref_audio, melody_audio, ref_text, target_text,
640
  separate_vocals_flag, mix_accompaniment_flag,
641
  sil_len_to_end, t_shift, nfe_step, cfg_strength, seed,
642
  ]
643
 
 
 
 
 
 
 
 
 
 
644
  run_btn.click(
645
  fn=synthesize,
646
  inputs=_all_inputs,
647
  outputs=output_audio,
648
  )
649
 
 
650
  gr.HTML(DISCLAIMER_HTML)
651
 
652
  return demo
653
 
654
 
655
  # ---------------------------------------------------------------------------
656
+ # Entry point
657
  # ---------------------------------------------------------------------------
658
  if __name__ == "__main__":
659
  demo = build_ui()