Spaces:
Running on Zero
Running on Zero
111
Browse files
app.py
CHANGED
|
@@ -33,35 +33,26 @@ def local_move2gpu(x):
|
|
| 33 |
|
| 34 |
|
| 35 |
# ---------------------------------------------------------------------------
|
| 36 |
-
# Model loading (
|
| 37 |
# ---------------------------------------------------------------------------
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
download_files(task="infer")
|
| 57 |
-
global _separator
|
| 58 |
-
if _separator is None:
|
| 59 |
-
from src.third_party.MusicSourceSeparationTraining.inference_api import Separator
|
| 60 |
-
_separator = Separator(
|
| 61 |
-
config_path="ckpts/config_vocals_mel_band_roformer_kj.yaml",
|
| 62 |
-
checkpoint_path="ckpts/MelBandRoformer.ckpt",
|
| 63 |
-
)
|
| 64 |
-
return _separator
|
| 65 |
|
| 66 |
|
| 67 |
# ---------------------------------------------------------------------------
|
|
@@ -72,7 +63,7 @@ def _separate_vocals_impl(audio_path: str) -> tuple:
|
|
| 72 |
Separate audio into vocals and accompaniment using MelBandRoformer.
|
| 73 |
Must be called within an active GPU context.
|
| 74 |
"""
|
| 75 |
-
separator =
|
| 76 |
|
| 77 |
wav, sr = torchaudio.load(audio_path)
|
| 78 |
vocal_wav, inst_wav, out_sr = separator.separate(wav, sr)
|
|
@@ -169,9 +160,7 @@ def synthesize(
|
|
| 169 |
melody_vocals_path, melody_accomp_path = _separate_vocals_impl(melody_audio_path)
|
| 170 |
actual_melody_path = melody_vocals_path
|
| 171 |
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
audio_tensor, sr = model(
|
| 175 |
ref_audio_path=actual_ref_path,
|
| 176 |
melody_audio_path=actual_melody_path,
|
| 177 |
ref_text=ref_text.strip(),
|
|
@@ -266,11 +255,6 @@ EXAMPLES_LYRIC_EDIT = [
|
|
| 266 |
|
| 267 |
# ---------------------------------------------------------------------------
|
| 268 |
# Custom CSS / 自定义样式
|
| 269 |
-
# CHANGES:
|
| 270 |
-
# 1. Top gradient bar using palette colors
|
| 271 |
-
# 2. Section titles: left-bar accent instead of bottom border
|
| 272 |
-
# 3. Color palette updated to #FE9EC7 / #F9F6C4 / #89D4FF / #44ACFF
|
| 273 |
-
# 4. Vocal-sep info box: reference/melody text use palette colors
|
| 274 |
# ---------------------------------------------------------------------------
|
| 275 |
CUSTOM_CSS = """
|
| 276 |
@import url('https://fonts.googleapis.com/css2?family=DM+Sans:ital,opsz,wght@0,9..40,300;0,9..40,500;0,9..40,700;1,9..40,400&family=Playfair+Display:wght@600;800&display=swap');
|
|
@@ -363,39 +347,24 @@ CUSTOM_CSS = """
|
|
| 363 |
}
|
| 364 |
|
| 365 |
/* ========== 2. Section labels: left accent bar ========== */
|
| 366 |
-
|
| 367 |
-
/*
|
| 368 |
-
用更高特异性的选择器替代大量 !important
|
| 369 |
-
假设父容器有 .resume-container 或类似的包裹类
|
| 370 |
-
*/
|
| 371 |
.resume-container .section-title {
|
| 372 |
-
/* Reset */
|
| 373 |
border: none;
|
| 374 |
outline: none;
|
| 375 |
box-shadow: none;
|
| 376 |
-
|
| 377 |
-
/* Typography */
|
| 378 |
font-family: 'DM Sans', sans-serif;
|
| 379 |
font-weight: 700;
|
| 380 |
font-size: 1rem;
|
| 381 |
letter-spacing: 0.06em;
|
| 382 |
text-transform: uppercase;
|
| 383 |
color: var(--primary);
|
| 384 |
-
|
| 385 |
-
/* Layout */
|
| 386 |
display: block;
|
| 387 |
padding: 3px 0 3px 10px;
|
| 388 |
margin-bottom: 14px;
|
| 389 |
-
|
| 390 |
-
/* Left accent bar — 用 border-left 单独声明,避免被 border shorthand 覆盖 */
|
| 391 |
border-left: 4px solid var(--primary-warm);
|
| 392 |
-
|
| 393 |
-
/* Background & shape */
|
| 394 |
background: linear-gradient(90deg, rgb(254 158 199 / 8%) 0%, transparent 70%);
|
| 395 |
border-radius: 0 4px 4px 0;
|
| 396 |
}
|
| 397 |
|
| 398 |
-
/* 子元素继承重置,避免外部样式污染 */
|
| 399 |
.resume-container .section-title * {
|
| 400 |
border: inherit;
|
| 401 |
outline: none;
|
|
@@ -434,13 +403,10 @@ CUSTOM_CSS = """
|
|
| 434 |
}
|
| 435 |
|
| 436 |
/* ========== 4. Vocal-sep info box: fix highlighted text colors ========== */
|
| 437 |
-
/* Target the <b> tags inside the vocal separation info box */
|
| 438 |
.vocal-sep-info b {
|
| 439 |
-
/* default fallback */
|
| 440 |
color: #c9d1d9;
|
| 441 |
font-weight: 700;
|
| 442 |
}
|
| 443 |
-
/* Specifically target ref audio and melody audio highlights via data-color */
|
| 444 |
b[data-ref] {
|
| 445 |
color: #FE9EC7 !important;
|
| 446 |
}
|
|
@@ -553,53 +519,11 @@ def build_ui():
|
|
| 553 |
)
|
| 554 |
|
| 555 |
# ================================================================
|
| 556 |
-
# ROW 2 –
|
| 557 |
-
# ================================================================
|
| 558 |
-
gr.HTML("<hr style='border-color:#30363d; margin: 16px 0 12px;'>")
|
| 559 |
-
gr.Markdown("#### 🎵 预设示例 / Example Presets")
|
| 560 |
-
gr.Markdown(
|
| 561 |
-
"<small style='color:#8b949e;'>点击任意行自动填入上方输入区域 / Click any row to auto-fill the inputs above</small>"
|
| 562 |
-
)
|
| 563 |
-
|
| 564 |
-
with gr.Row(visible=False):
|
| 565 |
-
_sep_flag_ex = gr.Checkbox(value=True, label="分离人声 / Separate Vocals")
|
| 566 |
-
_mix_flag_ex = gr.Checkbox(value=False, label="混入伴奏 / Mix Accomp.")
|
| 567 |
-
_sil_ex = gr.Number(value=0.5, label="静音 / Silence (s)")
|
| 568 |
-
_tshift_ex = gr.Number(value=0.5, label="t-shift")
|
| 569 |
-
_nfe_ex = gr.Number(value=32, label="NFE Steps")
|
| 570 |
-
_cfg_ex = gr.Number(value=3.0, label="CFG")
|
| 571 |
-
_seed_ex = gr.Number(value=-1, precision=0, label="Seed")
|
| 572 |
-
|
| 573 |
-
_example_inputs = [
|
| 574 |
-
ref_audio, melody_audio, ref_text, target_text,
|
| 575 |
-
_sep_flag_ex, _mix_flag_ex,
|
| 576 |
-
_sil_ex, _tshift_ex, _nfe_ex, _cfg_ex, _seed_ex,
|
| 577 |
-
]
|
| 578 |
-
|
| 579 |
-
with gr.Tabs():
|
| 580 |
-
with gr.Tab("🎼 Melody Control"):
|
| 581 |
-
gr.Examples(
|
| 582 |
-
examples=EXAMPLES_MELODY_CONTROL,
|
| 583 |
-
inputs=_example_inputs,
|
| 584 |
-
label="Melody Control Examples",
|
| 585 |
-
examples_per_page=5,
|
| 586 |
-
)
|
| 587 |
-
with gr.Tab("✏️ Lyric Edit"):
|
| 588 |
-
gr.Examples(
|
| 589 |
-
examples=EXAMPLES_LYRIC_EDIT,
|
| 590 |
-
inputs=_example_inputs,
|
| 591 |
-
label="Lyric Edit Examples",
|
| 592 |
-
examples_per_page=5,
|
| 593 |
-
)
|
| 594 |
-
|
| 595 |
-
# ================================================================
|
| 596 |
-
# ROW 3 – 伴奏分离
|
| 597 |
# ================================================================
|
| 598 |
gr.HTML("<hr style='border-color:#30363d; margin: 16px 0 12px;'>")
|
| 599 |
gr.Markdown("#### 🎚️ 伴奏分离 / Vocal Separation")
|
| 600 |
|
| 601 |
-
# CHANGE 4: Use inline style colors for 参考音频 and 旋律音频
|
| 602 |
-
# so they render correctly regardless of CSS specificity issues
|
| 603 |
gr.HTML("""
|
| 604 |
<div style="font-size:0.85rem; color:#8b949e; line-height:1.75; margin: 0 0 12px; padding: 10px 16px;
|
| 605 |
background: rgba(255,255,255,0.03); border-radius: 8px; border: 1px solid #21262d;">
|
|
@@ -632,6 +556,9 @@ def build_ui():
|
|
| 632 |
info="将合成人声与分离出的伴奏混合作为最终输出(需先开启人声分离)/ Mix synthesised vocals with the separated accompaniment (requires separation enabled)",
|
| 633 |
)
|
| 634 |
|
|
|
|
|
|
|
|
|
|
| 635 |
with gr.Accordion("⚙️ 高级参数 / Advanced Parameters", open=False):
|
| 636 |
with gr.Row():
|
| 637 |
nfe_step = gr.Slider(
|
|
@@ -660,6 +587,38 @@ def build_ui():
|
|
| 660 |
info="-1 表示随机生成 / -1 means random",
|
| 661 |
)
|
| 662 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 663 |
# ================================================================
|
| 664 |
# ROW 5 – 合成按钮与输出
|
| 665 |
# ================================================================
|
|
|
|
| 33 |
|
| 34 |
|
| 35 |
# ---------------------------------------------------------------------------
|
| 36 |
+
# Model loading (eager, at startup) / 启动时立即加载,常驻内存
|
| 37 |
# ---------------------------------------------------------------------------
|
| 38 |
+
print("🔄 Downloading required files...")
|
| 39 |
+
download_files(task="infer")
|
| 40 |
+
|
| 41 |
+
print("🔄 Loading YingMusicSinger model...")
|
| 42 |
+
from src.YingMusicSinger.infer.YingMusicSinger import YingMusicSinger
|
| 43 |
+
_model = YingMusicSinger.from_pretrained("ASLP-lab/YingMusic-Singer")
|
| 44 |
+
_model = local_move2gpu(_model)
|
| 45 |
+
_model.eval()
|
| 46 |
+
print("✅ YingMusicSinger model loaded.")
|
| 47 |
+
|
| 48 |
+
print("🔄 Loading MelBandRoformer separator...")
|
| 49 |
+
from src.third_party.MusicSourceSeparationTraining.inference_api import Separator
|
| 50 |
+
_separator = Separator(
|
| 51 |
+
config_path="ckpts/config_vocals_mel_band_roformer_kj.yaml",
|
| 52 |
+
checkpoint_path="ckpts/MelBandRoformer.ckpt",
|
| 53 |
+
)
|
| 54 |
+
print("✅ MelBandRoformer separator loaded.")
|
| 55 |
+
print("🎤 All models ready. Starting UI...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
|
| 58 |
# ---------------------------------------------------------------------------
|
|
|
|
| 63 |
Separate audio into vocals and accompaniment using MelBandRoformer.
|
| 64 |
Must be called within an active GPU context.
|
| 65 |
"""
|
| 66 |
+
separator = _separator
|
| 67 |
|
| 68 |
wav, sr = torchaudio.load(audio_path)
|
| 69 |
vocal_wav, inst_wav, out_sr = separator.separate(wav, sr)
|
|
|
|
| 160 |
melody_vocals_path, melody_accomp_path = _separate_vocals_impl(melody_audio_path)
|
| 161 |
actual_melody_path = melody_vocals_path
|
| 162 |
|
| 163 |
+
audio_tensor, sr = _model(
|
|
|
|
|
|
|
| 164 |
ref_audio_path=actual_ref_path,
|
| 165 |
melody_audio_path=actual_melody_path,
|
| 166 |
ref_text=ref_text.strip(),
|
|
|
|
| 255 |
|
| 256 |
# ---------------------------------------------------------------------------
|
| 257 |
# Custom CSS / 自定义样式
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
# ---------------------------------------------------------------------------
|
| 259 |
CUSTOM_CSS = """
|
| 260 |
@import url('https://fonts.googleapis.com/css2?family=DM+Sans:ital,opsz,wght@0,9..40,300;0,9..40,500;0,9..40,700;1,9..40,400&family=Playfair+Display:wght@600;800&display=swap');
|
|
|
|
| 347 |
}
|
| 348 |
|
| 349 |
/* ========== 2. Section labels: left accent bar ========== */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
.resume-container .section-title {
|
|
|
|
| 351 |
border: none;
|
| 352 |
outline: none;
|
| 353 |
box-shadow: none;
|
|
|
|
|
|
|
| 354 |
font-family: 'DM Sans', sans-serif;
|
| 355 |
font-weight: 700;
|
| 356 |
font-size: 1rem;
|
| 357 |
letter-spacing: 0.06em;
|
| 358 |
text-transform: uppercase;
|
| 359 |
color: var(--primary);
|
|
|
|
|
|
|
| 360 |
display: block;
|
| 361 |
padding: 3px 0 3px 10px;
|
| 362 |
margin-bottom: 14px;
|
|
|
|
|
|
|
| 363 |
border-left: 4px solid var(--primary-warm);
|
|
|
|
|
|
|
| 364 |
background: linear-gradient(90deg, rgb(254 158 199 / 8%) 0%, transparent 70%);
|
| 365 |
border-radius: 0 4px 4px 0;
|
| 366 |
}
|
| 367 |
|
|
|
|
| 368 |
.resume-container .section-title * {
|
| 369 |
border: inherit;
|
| 370 |
outline: none;
|
|
|
|
| 403 |
}
|
| 404 |
|
| 405 |
/* ========== 4. Vocal-sep info box: fix highlighted text colors ========== */
|
|
|
|
| 406 |
.vocal-sep-info b {
|
|
|
|
| 407 |
color: #c9d1d9;
|
| 408 |
font-weight: 700;
|
| 409 |
}
|
|
|
|
| 410 |
b[data-ref] {
|
| 411 |
color: #FE9EC7 !important;
|
| 412 |
}
|
|
|
|
| 519 |
)
|
| 520 |
|
| 521 |
# ================================================================
|
| 522 |
+
# ROW 2 – 伴奏分离
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 523 |
# ================================================================
|
| 524 |
gr.HTML("<hr style='border-color:#30363d; margin: 16px 0 12px;'>")
|
| 525 |
gr.Markdown("#### 🎚️ 伴奏分离 / Vocal Separation")
|
| 526 |
|
|
|
|
|
|
|
| 527 |
gr.HTML("""
|
| 528 |
<div style="font-size:0.85rem; color:#8b949e; line-height:1.75; margin: 0 0 12px; padding: 10px 16px;
|
| 529 |
background: rgba(255,255,255,0.03); border-radius: 8px; border: 1px solid #21262d;">
|
|
|
|
| 556 |
info="将合成人声与分离出的伴奏混合作为最终输出(需先开启人声分离)/ Mix synthesised vocals with the separated accompaniment (requires separation enabled)",
|
| 557 |
)
|
| 558 |
|
| 559 |
+
# ================================================================
|
| 560 |
+
# ROW 3 – 高级参数
|
| 561 |
+
# ================================================================
|
| 562 |
with gr.Accordion("⚙️ 高级参数 / Advanced Parameters", open=False):
|
| 563 |
with gr.Row():
|
| 564 |
nfe_step = gr.Slider(
|
|
|
|
| 587 |
info="-1 表示随机生成 / -1 means random",
|
| 588 |
)
|
| 589 |
|
| 590 |
+
# ================================================================
|
| 591 |
+
# ROW 4 – 预设示例(放在所有真实控件定义之后)
|
| 592 |
+
# ================================================================
|
| 593 |
+
gr.HTML("<hr style='border-color:#30363d; margin: 16px 0 12px;'>")
|
| 594 |
+
gr.Markdown("#### 🎵 预设示例 / Example Presets")
|
| 595 |
+
gr.Markdown(
|
| 596 |
+
"<small style='color:#8b949e;'>点击任意行自动填入上方输入区域 / Click any row to auto-fill the inputs above</small>"
|
| 597 |
+
)
|
| 598 |
+
|
| 599 |
+
# 所有真实控件均已定义,直接绑定
|
| 600 |
+
_example_inputs = [
|
| 601 |
+
ref_audio, melody_audio, ref_text, target_text,
|
| 602 |
+
separate_vocals_flag, mix_accompaniment_flag,
|
| 603 |
+
sil_len_to_end, t_shift, nfe_step, cfg_strength, seed,
|
| 604 |
+
]
|
| 605 |
+
|
| 606 |
+
with gr.Tabs():
|
| 607 |
+
with gr.Tab("🎼 Melody Control"):
|
| 608 |
+
gr.Examples(
|
| 609 |
+
examples=EXAMPLES_MELODY_CONTROL,
|
| 610 |
+
inputs=_example_inputs,
|
| 611 |
+
label="Melody Control Examples",
|
| 612 |
+
examples_per_page=5,
|
| 613 |
+
)
|
| 614 |
+
with gr.Tab("✏️ Lyric Edit"):
|
| 615 |
+
gr.Examples(
|
| 616 |
+
examples=EXAMPLES_LYRIC_EDIT,
|
| 617 |
+
inputs=_example_inputs,
|
| 618 |
+
label="Lyric Edit Examples",
|
| 619 |
+
examples_per_page=5,
|
| 620 |
+
)
|
| 621 |
+
|
| 622 |
# ================================================================
|
| 623 |
# ROW 5 – 合成按钮与输出
|
| 624 |
# ================================================================
|
examples/hf_space/melody_control/melody_control_ZH_02_melody.wav
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d022b45bd36f0e3618a85422f21a9fd244a18c998c60f0aa59fdfe3c1beb9c59
|
| 3 |
+
size 2039084
|
examples/hf_space/melody_control/melody_control_ZH_02_timbre.wav
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:795289e027cf06fd8e3898453fc353265dc114ca111446999900aa4dba40c126
|
| 3 |
+
size 1221884
|