xjsc0 commited on
Commit
c0f6c5b
·
1 Parent(s): 5fa078d
Files changed (1) hide show
  1. app.py +223 -98
app.py CHANGED
@@ -25,12 +25,6 @@ HF_ENABLE = False
25
  LOCAL_DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
26
 
27
 
28
- # def gpu_decorator(fn):
29
- # if IS_HF_SPACE and HF_ENABLE and spaces is not None:
30
- # return spaces.GPU(fn)
31
- # return fn
32
-
33
-
34
  def local_move2gpu(x):
35
  """Move models to GPU on local environment. No-op on HuggingFace Spaces (ZeroGPU handles it)."""
36
  if IS_HF_SPACE:
@@ -45,37 +39,27 @@ _model = None
45
  _separator = None
46
 
47
 
48
- @spaces.GPU
49
- def get_model():
50
- """加载 YingMusicSinger 模型 / Load YingMusicSinger model."""
51
  download_files(task="infer")
52
  global _model
53
  if _model is None:
54
  from src.YingMusicSinger.infer.YingMusicSinger import YingMusicSinger
55
-
56
  _model = YingMusicSinger.from_pretrained("ASLP-lab/YingMusic-Singer")
57
- _model = local_move2gpu(_model)
58
- _model.eval()
59
  return _model
60
 
61
 
62
- @spaces.GPU
63
- def get_separator():
64
- """
65
- 加载 MelBandRoformer 分离模型 / Load MelBandRoformer separator.
66
- Returns a Separator instance ready for inference.
67
- """
68
  download_files(task="infer")
69
  global _separator
70
  if _separator is None:
71
- from src.third_party.MusicSourceSeparationTraining.inference_api import (
72
- Separator,
73
- )
74
-
75
  _separator = Separator(
76
  config_path="ckpts/config_vocals_mel_band_roformer_kj.yaml",
77
  checkpoint_path="ckpts/MelBandRoformer.ckpt",
78
- # device=device,
79
  )
80
  return _separator
81
 
@@ -83,19 +67,12 @@ def get_separator():
83
  # ---------------------------------------------------------------------------
84
  # Vocal separation utilities / 人声分离工具
85
  # ---------------------------------------------------------------------------
86
- @spaces.GPU
87
- def separate_vocals(
88
- audio_path: str,
89
- device: str = "cuda:0",
90
- ) -> tuple:
91
  """
92
- 使用 MelBandRoformer 将音频分离为人声和伴奏。
93
  Separate audio into vocals and accompaniment using MelBandRoformer.
94
-
95
- Returns:
96
- (vocals_path, accompaniment_path)
97
  """
98
- separator = get_separator()
99
 
100
  wav, sr = torchaudio.load(audio_path)
101
  vocal_wav, inst_wav, out_sr = separator.separate(wav, sr)
@@ -121,24 +98,20 @@ def mix_vocal_and_accompaniment(
121
  vocal_wav, vocal_sr = torchaudio.load(vocal_path)
122
  accomp_wav, accomp_sr = torchaudio.load(accomp_path)
123
 
124
- # 统一采样率至人声采样率 / Resample accompaniment to match vocal sample rate
125
  if accomp_sr != vocal_sr:
126
  accomp_wav = torchaudio.functional.resample(accomp_wav, accomp_sr, vocal_sr)
127
 
128
- # 统一声道数 / Match channel count
129
  if vocal_wav.shape[0] != accomp_wav.shape[0]:
130
  if vocal_wav.shape[0] == 1:
131
  vocal_wav = vocal_wav.expand(accomp_wav.shape[0], -1)
132
  else:
133
  accomp_wav = accomp_wav.expand(vocal_wav.shape[0], -1)
134
 
135
- # 对齐长度(以较短者为准)/ Align to shorter length
136
  min_len = min(vocal_wav.shape[1], accomp_wav.shape[1])
137
  vocal_wav = vocal_wav[:, :min_len]
138
  accomp_wav = accomp_wav[:, :min_len]
139
 
140
  mixed = vocal_wav * vocal_gain + accomp_wav
141
- # 防止 clipping / Prevent clipping
142
  peak = mixed.abs().max()
143
  if peak > 1.0:
144
  mixed = mixed / peak
@@ -150,6 +123,8 @@ def mix_vocal_and_accompaniment(
150
 
151
  # ---------------------------------------------------------------------------
152
  # Inference wrapper / 推理入口
 
 
153
  # ---------------------------------------------------------------------------
154
  @spaces.GPU
155
  def synthesize(
@@ -169,15 +144,12 @@ def synthesize(
169
  主合成流程 / Main synthesis pipeline.
170
 
171
  1. (可选) 用 MelBandRoformer 分离参考音频和旋律音频的人声与伴奏
172
- (Optional) Separate vocals & accompaniment from both ref and melody audio via MelBandRoformer
173
  2. 送入 YingMusicSinger 合成
174
- Run YingMusicSinger inference
175
  3. (可选) 将合成人声与旋律音频的伴奏混合
176
- (Optional) Mix synthesised vocals with melody accompaniment
177
  """
178
  import random
179
 
180
- # ---- 输入校验 / Input validation -----------------------------------------
181
  if ref_audio is None:
182
  raise gr.Error("请上传参考音频 / Please upload Reference Audio")
183
  if melody_audio is None:
@@ -192,31 +164,24 @@ def synthesize(
192
  melody_audio if isinstance(melody_audio, str) else melody_audio[0]
193
  )
194
 
195
- # seed = -1 means random / seed 为 -1 时随机生成
196
  actual_seed = int(seed)
197
  if actual_seed < 0:
198
  actual_seed = random.randint(0, 2**31 - 1)
199
 
200
- device = "cuda:0" if torch.cuda.is_available() else "cpu"
201
-
202
- # ---- Step 1: 人声分离(参考音频 + 旋律音频)/ Vocal separation for both (optional) ----
203
  melody_accomp_path = None
204
  actual_ref_path = ref_audio_path
205
  actual_melody_path = melody_audio_path
206
 
207
  if separate_vocals_flag:
208
- # 分离参考音频 / Separate reference audio
209
- ref_vocals_path, _ = separate_vocals(ref_audio_path, device=device)
210
  actual_ref_path = ref_vocals_path
211
 
212
- # 分离旋律音频 / Separate melody audio
213
- melody_vocals_path, melody_accomp_path = separate_vocals(
214
- melody_audio_path, device=device
215
- )
216
  actual_melody_path = melody_vocals_path
217
 
218
- # ---- Step 2: 模型推理 / Model inference ----------------------------------
219
- model = get_model()
220
 
221
  audio_tensor, sr = model(
222
  ref_audio_path=actual_ref_path,
@@ -231,7 +196,6 @@ def synthesize(
231
  seed=actual_seed,
232
  )
233
 
234
- # 先保存纯人声合成结果 / Save raw vocal synthesis result
235
  vocal_out_path = os.path.join(tempfile.mkdtemp(), "vocal_output.wav")
236
  torchaudio.save(vocal_out_path, audio_tensor.to("cpu"), sample_rate=sr)
237
 
@@ -247,6 +211,45 @@ def synthesize(
247
  return vocal_out_path
248
 
249
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  # ---------------------------------------------------------------------------
251
  # Custom CSS / 自定义样式
252
  # ---------------------------------------------------------------------------
@@ -274,23 +277,50 @@ CUSTOM_CSS = """
274
  /* ---------- Header / 头部 ---------- */
275
  #app-header {
276
  text-align: center;
277
- padding: 2.5rem 1rem 1.5rem;
278
  }
279
  #app-header h1 {
280
- font-family: 'Playfair Display', serif !important;
281
- font-size: 2.6rem !important;
282
- font-weight: 800 !important;
283
- background: linear-gradient(135deg, #f48c06, #e85d04, #dc2f02);
284
- -webkit-background-clip: text;
285
- -webkit-text-fill-color: transparent;
286
- margin-bottom: 0.3rem !important;
287
- letter-spacing: -0.02em;
 
288
  }
289
- #app-header p {
290
  color: var(--text-muted);
291
- font-size: 1.05rem;
292
- margin-top: 0;
 
293
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
 
295
  /* ---------- Section labels / 分区标题 ---------- */
296
  .section-title {
@@ -305,6 +335,12 @@ CUSTOM_CSS = """
305
  margin-bottom: 12px !important;
306
  }
307
 
 
 
 
 
 
 
308
  /* ---------- Run button / 合成按钮 ---------- */
309
  #run-btn {
310
  background: linear-gradient(135deg, #e85d04, #dc2f02) !important;
@@ -331,23 +367,93 @@ CUSTOM_CSS = """
331
  }
332
  """
333
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
 
335
  # ---------------------------------------------------------------------------
336
  # Build the Gradio UI / 构建界面
337
  # ---------------------------------------------------------------------------
338
  def build_ui():
 
 
 
339
  with gr.Blocks(
340
  css=CUSTOM_CSS, title="YingMusic Singer", theme=gr.themes.Base()
341
  ) as demo:
 
342
  # ---- Header / 头部 ----
343
- gr.HTML(
344
- """
345
- <div id="app-header">
346
- <h1>♫ YingMusic Singer</h1>
347
- <p>基于参考音色与旋律音频的歌声合成系统 &nbsp;·&nbsp; Singing Voice Synthesis</p>
348
- </div>
349
- """
350
- )
351
 
352
  # ================================================================
353
  # ROW 1 – 音频输入 / Audio Inputs + 歌词 / Lyrics (side by side)
@@ -459,42 +565,61 @@ def build_ui():
459
  elem_id="output-audio",
460
  )
461
 
462
- # ---- 联动:未开启分离时,禁用伴奏混合 ----
463
- # ---- Disable mix checkbox when separation is off ----
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
464
  separate_vocals_flag.change(
465
- fn=lambda sep: gr.update(
466
- interactive=sep, value=False if not sep else False
467
- ),
468
  inputs=[separate_vocals_flag],
469
  outputs=[mix_accompaniment_flag],
470
  )
471
 
472
- # ---- 绑定事件 / Wire up ----
473
  run_btn.click(
474
  fn=synthesize,
475
- inputs=[
476
- ref_audio,
477
- melody_audio,
478
- ref_text,
479
- target_text,
480
- separate_vocals_flag,
481
- mix_accompaniment_flag,
482
- sil_len_to_end,
483
- t_shift,
484
- nfe_step,
485
- cfg_strength,
486
- seed,
487
- ],
488
  outputs=output_audio,
489
  )
490
 
491
  # ---- 页脚 / Footer ----
492
  gr.Markdown(
493
  """
494
- ---
495
- <center style="color:#8b949e; font-size:0.85rem;">
496
- YingMusic Singer &nbsp;·&nbsp; 基于 Flow Matching + VAE / Powered by Flow Matching + VAE &nbsp;·&nbsp;
497
- 用 <code>|</code> 分隔歌词中的乐句 / Use <code>|</code> to separate phrases in lyrics
498
  </center>
499
  """,
500
  )
@@ -508,4 +633,4 @@ def build_ui():
508
  if __name__ == "__main__":
509
  demo = build_ui()
510
  demo.queue()
511
- demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
 
25
  LOCAL_DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
26
 
27
 
 
 
 
 
 
 
28
  def local_move2gpu(x):
29
  """Move models to GPU on local environment. No-op on HuggingFace Spaces (ZeroGPU handles it)."""
30
  if IS_HF_SPACE:
 
39
  _separator = None
40
 
41
 
42
+ def _load_model_impl():
43
+ """Internal: load YingMusicSinger (no GPU decorator, called inside GPU context)."""
 
44
  download_files(task="infer")
45
  global _model
46
  if _model is None:
47
  from src.YingMusicSinger.infer.YingMusicSinger import YingMusicSinger
 
48
  _model = YingMusicSinger.from_pretrained("ASLP-lab/YingMusic-Singer")
49
+ _model = local_move2gpu(_model)
50
+ _model.eval()
51
  return _model
52
 
53
 
54
+ def _load_separator_impl():
55
+ """Internal: load MelBandRoformer separator (no GPU decorator, called inside GPU context)."""
 
 
 
 
56
  download_files(task="infer")
57
  global _separator
58
  if _separator is None:
59
+ from src.third_party.MusicSourceSeparationTraining.inference_api import Separator
 
 
 
60
  _separator = Separator(
61
  config_path="ckpts/config_vocals_mel_band_roformer_kj.yaml",
62
  checkpoint_path="ckpts/MelBandRoformer.ckpt",
 
63
  )
64
  return _separator
65
 
 
67
  # ---------------------------------------------------------------------------
68
  # Vocal separation utilities / 人声分离工具
69
  # ---------------------------------------------------------------------------
70
+ def _separate_vocals_impl(audio_path: str) -> tuple:
 
 
 
 
71
  """
 
72
  Separate audio into vocals and accompaniment using MelBandRoformer.
73
+ Must be called within an active GPU context.
 
 
74
  """
75
+ separator = _load_separator_impl()
76
 
77
  wav, sr = torchaudio.load(audio_path)
78
  vocal_wav, inst_wav, out_sr = separator.separate(wav, sr)
 
98
  vocal_wav, vocal_sr = torchaudio.load(vocal_path)
99
  accomp_wav, accomp_sr = torchaudio.load(accomp_path)
100
 
 
101
  if accomp_sr != vocal_sr:
102
  accomp_wav = torchaudio.functional.resample(accomp_wav, accomp_sr, vocal_sr)
103
 
 
104
  if vocal_wav.shape[0] != accomp_wav.shape[0]:
105
  if vocal_wav.shape[0] == 1:
106
  vocal_wav = vocal_wav.expand(accomp_wav.shape[0], -1)
107
  else:
108
  accomp_wav = accomp_wav.expand(vocal_wav.shape[0], -1)
109
 
 
110
  min_len = min(vocal_wav.shape[1], accomp_wav.shape[1])
111
  vocal_wav = vocal_wav[:, :min_len]
112
  accomp_wav = accomp_wav[:, :min_len]
113
 
114
  mixed = vocal_wav * vocal_gain + accomp_wav
 
115
  peak = mixed.abs().max()
116
  if peak > 1.0:
117
  mixed = mixed / peak
 
123
 
124
  # ---------------------------------------------------------------------------
125
  # Inference wrapper / 推理入口
126
+ # Single @spaces.GPU scope covers ALL heavy work (separation + synthesis)
127
+ # so models stay resident in GPU memory across steps within one call.
128
  # ---------------------------------------------------------------------------
129
  @spaces.GPU
130
  def synthesize(
 
144
  主合成流程 / Main synthesis pipeline.
145
 
146
  1. (可选) 用 MelBandRoformer 分离参考音频和旋律音频的人声与伴奏
 
147
  2. 送入 YingMusicSinger 合成
 
148
  3. (可选) 将合成人声与旋律音频的伴奏混合
 
149
  """
150
  import random
151
 
152
+ # ---- 输入校验 / Input validation ----------------------------------------
153
  if ref_audio is None:
154
  raise gr.Error("请上传参考音频 / Please upload Reference Audio")
155
  if melody_audio is None:
 
164
  melody_audio if isinstance(melody_audio, str) else melody_audio[0]
165
  )
166
 
 
167
  actual_seed = int(seed)
168
  if actual_seed < 0:
169
  actual_seed = random.randint(0, 2**31 - 1)
170
 
171
+ # ---- Step 1: 人声分离(合并在同一 GPU 上下文中)/ Vocal separation (same GPU context) ----
 
 
172
  melody_accomp_path = None
173
  actual_ref_path = ref_audio_path
174
  actual_melody_path = melody_audio_path
175
 
176
  if separate_vocals_flag:
177
+ ref_vocals_path, _ = _separate_vocals_impl(ref_audio_path)
 
178
  actual_ref_path = ref_vocals_path
179
 
180
+ melody_vocals_path, melody_accomp_path = _separate_vocals_impl(melody_audio_path)
 
 
 
181
  actual_melody_path = melody_vocals_path
182
 
183
+ # ---- Step 2: 模型推理 / Model inference (same GPU context) ---------------
184
+ model = _load_model_impl()
185
 
186
  audio_tensor, sr = model(
187
  ref_audio_path=actual_ref_path,
 
196
  seed=actual_seed,
197
  )
198
 
 
199
  vocal_out_path = os.path.join(tempfile.mkdtemp(), "vocal_output.wav")
200
  torchaudio.save(vocal_out_path, audio_tensor.to("cpu"), sample_rate=sr)
201
 
 
211
  return vocal_out_path
212
 
213
 
214
+ # ---------------------------------------------------------------------------
215
+ # Example presets / 预设示例
216
+ # ---------------------------------------------------------------------------
217
+ EXAMPLES_MELODY_CONTROL = [
218
+ # [ref_audio, melody_audio, ref_text, target_text, sep, mix, sil, t_shift, nfe, cfg, seed]
219
+ [
220
+ "examples/melody_control/ref_01.wav",
221
+ "examples/melody_control/melody_01.wav",
222
+ "该体谅的不执着|如果那天我",
223
+ "好多天|看不完你",
224
+ True, False, 0.5, 0.5, 32, 3.0, -1,
225
+ ],
226
+ [
227
+ "examples/melody_control/ref_02.wav",
228
+ "examples/melody_control/melody_02.wav",
229
+ "月光下的身影|渐渐模糊",
230
+ "星光照亮前路|指引方向",
231
+ True, False, 0.5, 0.5, 32, 3.0, -1,
232
+ ],
233
+ ]
234
+
235
+ EXAMPLES_LYRIC_EDIT = [
236
+ [
237
+ "examples/lyric_edit/ref_01.wav",
238
+ "examples/lyric_edit/melody_01.wav",
239
+ "该体谅的不执着|如果那天我",
240
+ "忘不掉的笑容|留在心里面",
241
+ True, False, 0.5, 0.5, 32, 3.0, -1,
242
+ ],
243
+ [
244
+ "examples/lyric_edit/ref_02.wav",
245
+ "examples/lyric_edit/melody_02.wav",
246
+ "夜深了还不睡|想着你的脸",
247
+ "春风又吹过来|带走我思念",
248
+ True, False, 0.5, 0.5, 32, 3.0, -1,
249
+ ],
250
+ ]
251
+
252
+
253
  # ---------------------------------------------------------------------------
254
  # Custom CSS / 自定义样式
255
  # ---------------------------------------------------------------------------
 
277
  /* ---------- Header / 头部 ---------- */
278
  #app-header {
279
  text-align: center;
280
+ padding: 1.8rem 1rem 0.5rem;
281
  }
282
  #app-header h1 {
283
+ font-size: 1.45rem !important;
284
+ font-weight: 700 !important;
285
+ line-height: 1.4;
286
+ margin-bottom: 0.6rem !important;
287
+ }
288
+ #app-header .badges img {
289
+ display: inline-block;
290
+ margin: 3px 2px;
291
+ vertical-align: middle;
292
  }
293
+ #app-header .authors {
294
  color: var(--text-muted);
295
+ font-size: 0.92rem;
296
+ margin: 0.5rem 0 0.2rem;
297
+ line-height: 1.7;
298
  }
299
+ #app-header .affiliations {
300
+ color: var(--text-muted);
301
+ font-size: 0.85rem;
302
+ margin-bottom: 0.5rem;
303
+ }
304
+ #app-header .lang-links a {
305
+ color: var(--primary-light);
306
+ text-decoration: none;
307
+ margin: 0 4px;
308
+ font-size: 0.9rem;
309
+ }
310
+ #app-header .lang-links a:hover { text-decoration: underline; }
311
+
312
+ /* ---------- Disclaimer ---------- */
313
+ #disclaimer {
314
+ border-left: 4px solid var(--primary);
315
+ background: var(--accent-glow);
316
+ border-radius: 8px;
317
+ padding: 12px 16px;
318
+ margin: 8px 0 16px;
319
+ font-size: 0.88rem;
320
+ color: var(--text-muted);
321
+ line-height: 1.6;
322
+ }
323
+ #disclaimer strong { color: var(--primary-light); }
324
 
325
  /* ---------- Section labels / 分区标题 ---------- */
326
  .section-title {
 
335
  margin-bottom: 12px !important;
336
  }
337
 
338
+ /* ---------- Example tabs ---------- */
339
+ .example-tab-label {
340
+ font-weight: 600 !important;
341
+ font-size: 0.95rem !important;
342
+ }
343
+
344
  /* ---------- Run button / 合成按钮 ---------- */
345
  #run-btn {
346
  background: linear-gradient(135deg, #e85d04, #dc2f02) !important;
 
367
  }
368
  """
369
 
370
+ # ---------------------------------------------------------------------------
371
+ # Header HTML / 头部 HTML
372
+ # ---------------------------------------------------------------------------
373
+ HEADER_HTML = """
374
+ <div id="app-header" align="center">
375
+ <h1>
376
+ 🏆 LyricEditBench: The first benchmark for melody-preserving lyric modification evaluation<br>
377
+ 🚀 <a href="https://github.com/ASLP-lab/YingMusic-Singer" target="_blank">YingMusic-Singer</a>
378
+ </h1>
379
+
380
+ <div class="lang-links">
381
+ <a href="">English</a> | <a href="README_ZH.md">中文</a>
382
+ </div>
383
+
384
+ <div class="badges" style="margin: 10px 0;">
385
+ <img src="https://img.shields.io/badge/Python-3.10-3776AB?logo=python&logoColor=white" alt="Python">
386
+ <img src="https://img.shields.io/badge/License-CC%20BY--NC--SA%204.0-lightgrey" alt="License">
387
+ <a href="https://arxiv.org/abs/0.0" target="_blank">
388
+ <img src="https://img.shields.io/badge/arXiv-0.0-b31b1b?logo=arxiv&logoColor=white" alt="arXiv">
389
+ </a>
390
+ <a href="https://github.com/ASLP-lab/YingMusic-Singer" target="_blank">
391
+ <img src="https://img.shields.io/badge/GitHub-YingMusic--Singer-181717?logo=github&logoColor=white" alt="GitHub">
392
+ </a>
393
+ <a href="https://huggingface.co/spaces/ASLP-lab/YingMusic-Singer" target="_blank">
394
+ <img src="https://img.shields.io/badge/🤗%20HuggingFace-Space-FFD21E" alt="HuggingFace Space">
395
+ </a>
396
+ <a href="https://huggingface.co/ASLP-lab/YingMusic-Singer" target="_blank">
397
+ <img src="https://img.shields.io/badge/🤗%20HuggingFace-Model-FF9D00" alt="HuggingFace Model">
398
+ </a>
399
+ <a href="https://huggingface.co/datasets/ASLP-lab/LyricEditBench" target="_blank">
400
+ <img src="https://img.shields.io/badge/🤗%20HuggingFace-LyricEditBench-FF6F00" alt="LyricEditBench">
401
+ </a>
402
+ <a href="https://discord.gg/RXghgWyvrn" target="_blank">
403
+ <img src="https://img.shields.io/badge/Discord-Join%20Us-5865F2?logo=discord&logoColor=white" alt="Discord">
404
+ </a>
405
+ <a href="https://github.com/ASLP-lab/YingMusic-Singer/blob/main/assets/wechat_qr.png" target="_blank">
406
+ <img src="https://img.shields.io/badge/WeChat-Group-07C160?logo=wechat&logoColor=white" alt="WeChat">
407
+ </a>
408
+ <a href="http://www.npu-aslp.org/" target="_blank">
409
+ <img src="https://img.shields.io/badge/🏫%20ASLP-Lab-4A90D9" alt="ASLP Lab">
410
+ </a>
411
+ </div>
412
+
413
+ <p class="authors">
414
+ <a href="https://orcid.org/0009-0005-5957-8936" target="_blank"><b>Chunbo Hao</b></a>¹² &nbsp;·&nbsp;
415
+ <a href="https://orcid.org/0009-0003-2602-2910" target="_blank"><b>Junjie Zheng</b></a>² &nbsp;·&nbsp;
416
+ <a href="https://orcid.org/0009-0001-6706-0572" target="_blank"><b>Guobin Ma</b></a>¹ &nbsp;·&nbsp;
417
+ <b>Yuepeng Jiang</b>¹ &nbsp;·&nbsp;
418
+ <b>Huakang Chen</b>¹ &nbsp;·&nbsp;
419
+ <b>Wenjie Tian</b>¹ &nbsp;·&nbsp;
420
+ <a href="https://orcid.org/0009-0003-9258-4006" target="_blank"><b>Gongyu Chen</b></a>² &nbsp;·&nbsp;
421
+ <a href="https://orcid.org/0009-0005-5413-6725" target="_blank"><b>Zihao Chen</b></a>² &nbsp;·&nbsp;
422
+ <b>Lei Xie</b>¹
423
+ </p>
424
+ <p class="affiliations">
425
+ <sup>1</sup> Northwestern Polytechnical University &nbsp;·&nbsp; <sup>2</sup> Giant Network
426
+ </p>
427
+ </div>
428
+ """
429
+
430
+ DISCLAIMER_HTML = """
431
+ <div id="disclaimer">
432
+ <strong>⚠️ Disclaimer / 免责声明:</strong>
433
+ YingMusic-Singer enables the creation of singing voices with modified lyrics, supporting applications
434
+ in artistic creation and entertainment. Potential risks include unauthorized voice cloning and copyright
435
+ infringement. To ensure responsible deployment, users should obtain consent for voice usage, disclose
436
+ AI involvement, and verify musical originality.
437
+ </div>
438
+ """
439
+
440
 
441
  # ---------------------------------------------------------------------------
442
  # Build the Gradio UI / 构建界面
443
  # ---------------------------------------------------------------------------
444
  def build_ui():
445
+ # Shared input components referenced by both Examples tabs and main form
446
+ ALL_INPUTS_ORDER = None # defined after components are created
447
+
448
  with gr.Blocks(
449
  css=CUSTOM_CSS, title="YingMusic Singer", theme=gr.themes.Base()
450
  ) as demo:
451
+
452
  # ---- Header / 头部 ----
453
+ gr.HTML(HEADER_HTML)
454
+ gr.HTML(DISCLAIMER_HTML)
455
+
456
+ gr.HTML("<hr style='border-color:#30363d; margin: 8px 0 18px;'>")
 
 
 
 
457
 
458
  # ================================================================
459
  # ROW 1 – 音频输入 / Audio Inputs + 歌词 / Lyrics (side by side)
 
565
  elem_id="output-audio",
566
  )
567
 
568
+ # ================================================================
569
+ # ROW 5 示例 / Examples (two tabs)
570
+ # ================================================================
571
+ gr.HTML("<hr style='border-color:#30363d; margin: 20px 0 12px;'>")
572
+ gr.Markdown("#### 🎵 预设示例 / Example Presets", elem_classes="section-title")
573
+ gr.Markdown(
574
+ "<small style='color:#8b949e;'>点击任意行自动填入输入区域 / Click any row to auto-fill the inputs above</small>"
575
+ )
576
+
577
+ # All inputs in order expected by synthesize()
578
+ _all_inputs = [
579
+ ref_audio, melody_audio, ref_text, target_text,
580
+ separate_vocals_flag, mix_accompaniment_flag,
581
+ sil_len_to_end, t_shift, nfe_step, cfg_strength, seed,
582
+ ]
583
+
584
+ with gr.Tabs():
585
+ with gr.Tab("🎼 Melody Control"):
586
+ gr.Examples(
587
+ examples=EXAMPLES_MELODY_CONTROL,
588
+ inputs=_all_inputs,
589
+ label="Melody Control Examples",
590
+ examples_per_page=5,
591
+ )
592
+ with gr.Tab("✏️ Lyric Edit"):
593
+ gr.Examples(
594
+ examples=EXAMPLES_LYRIC_EDIT,
595
+ inputs=_all_inputs,
596
+ label="Lyric Edit Examples",
597
+ examples_per_page=5,
598
+ )
599
+
600
+ # ================================================================
601
+ # Event wiring / 事件绑定
602
+ # ================================================================
603
+
604
+ # 联动:未开启分离时,禁用伴奏混合
605
  separate_vocals_flag.change(
606
+ fn=lambda sep: gr.update(interactive=sep, value=False if not sep else False),
 
 
607
  inputs=[separate_vocals_flag],
608
  outputs=[mix_accompaniment_flag],
609
  )
610
 
 
611
  run_btn.click(
612
  fn=synthesize,
613
+ inputs=_all_inputs,
 
 
 
 
 
 
 
 
 
 
 
 
614
  outputs=output_audio,
615
  )
616
 
617
  # ---- 页脚 / Footer ----
618
  gr.Markdown(
619
  """
620
+ <center style="color:#8b949e; font-size:0.85rem; margin-top: 16px;">
621
+ Use <code>|</code> to separate lyric phrases &nbsp;·&nbsp;
622
+ <code>|</code> 分隔歌词乐句
 
623
  </center>
624
  """,
625
  )
 
633
  if __name__ == "__main__":
634
  demo = build_ui()
635
  demo.queue()
636
+ demo.launch(server_name="0.0.0.0", server_port=7860, share=False)