Spaces:

ASLP-lab
/

YingMusic-Singer-Plus

Configuration error

App Files Files Community

xjsc0 commited on Mar 24

Commit

1edd14f

1 Parent(s): 877f49e

111

Browse files

Files changed (3) hide show

app.py +57 -98
examples/hf_space/melody_control/melody_control_ZH_02_melody.wav +2 -2
examples/hf_space/melody_control/melody_control_ZH_02_timbre.wav +2 -2

app.py CHANGED Viewed

@@ -33,35 +33,26 @@ def local_move2gpu(x):
 # ---------------------------------------------------------------------------
-# Model loading (lazy, singleton) / 模型懒加载（单例）
 # ---------------------------------------------------------------------------
-_model = None
-_separator = None
-def _load_model_impl():
-    """Internal: load YingMusicSinger (no GPU decorator, called inside GPU context)."""
-    download_files(task="infer")
-    global _model
-    if _model is None:
-        from src.YingMusicSinger.infer.YingMusicSinger import YingMusicSinger
-        _model = YingMusicSinger.from_pretrained("ASLP-lab/YingMusic-Singer")
-        _model = local_move2gpu(_model)
-        _model.eval()
-    return _model
-def _load_separator_impl():
-    """Internal: load MelBandRoformer separator (no GPU decorator, called inside GPU context)."""
-    download_files(task="infer")
-    global _separator
-    if _separator is None:
-        from src.third_party.MusicSourceSeparationTraining.inference_api import Separator
-        _separator = Separator(
-            config_path="ckpts/config_vocals_mel_band_roformer_kj.yaml",
-            checkpoint_path="ckpts/MelBandRoformer.ckpt",
-        )
-    return _separator
 # ---------------------------------------------------------------------------
@@ -72,7 +63,7 @@ def _separate_vocals_impl(audio_path: str) -> tuple:
     Separate audio into vocals and accompaniment using MelBandRoformer.
     Must be called within an active GPU context.
     """
-    separator = _load_separator_impl()
     wav, sr = torchaudio.load(audio_path)
     vocal_wav, inst_wav, out_sr = separator.separate(wav, sr)
@@ -169,9 +160,7 @@ def synthesize(
         melody_vocals_path, melody_accomp_path = _separate_vocals_impl(melody_audio_path)
         actual_melody_path = melody_vocals_path
-    model = _load_model_impl()
-    audio_tensor, sr = model(
         ref_audio_path=actual_ref_path,
         melody_audio_path=actual_melody_path,
         ref_text=ref_text.strip(),
@@ -266,11 +255,6 @@ EXAMPLES_LYRIC_EDIT = [
 # ---------------------------------------------------------------------------
 # Custom CSS / 自定义样式
-# CHANGES:
-#   1. Top gradient bar using palette colors
-#   2. Section titles: left-bar accent instead of bottom border
-#   3. Color palette updated to #FE9EC7 / #F9F6C4 / #89D4FF / #44ACFF
-#   4. Vocal-sep info box: reference/melody text use palette colors
 # ---------------------------------------------------------------------------
 CUSTOM_CSS = """
 @import url('https://fonts.googleapis.com/css2?family=DM+Sans:ital,opsz,wght@0,9..40,300;0,9..40,500;0,9..40,700;1,9..40,400&family=Playfair+Display:wght@600;800&display=swap');
@@ -363,39 +347,24 @@ CUSTOM_CSS = """
 }
 /* ========== 2. Section labels: left accent bar ========== */
-/*
-  用更高特异性的选择器替代大量 !important
-  假设父容器有 .resume-container 或类似的包裹类
-*/
 .resume-container .section-title {
-    /* Reset */
     border: none;
     outline: none;
     box-shadow: none;
-    /* Typography */
     font-family: 'DM Sans', sans-serif;
     font-weight: 700;
     font-size: 1rem;
     letter-spacing: 0.06em;
     text-transform: uppercase;
     color: var(--primary);
-    /* Layout */
     display: block;
     padding: 3px 0 3px 10px;
     margin-bottom: 14px;
-    /* Left accent bar — 用 border-left 单独声明，避免被 border shorthand 覆盖 */
     border-left: 4px solid var(--primary-warm);
-    /* Background & shape */
     background: linear-gradient(90deg, rgb(254 158 199 / 8%) 0%, transparent 70%);
     border-radius: 0 4px 4px 0;
 }
-/* 子元素继承重置，避免外部样式污染 */
 .resume-container .section-title * {
     border: inherit;
     outline: none;
@@ -434,13 +403,10 @@ CUSTOM_CSS = """
 }
 /* ========== 4. Vocal-sep info box: fix highlighted text colors ========== */
-/* Target the <b> tags inside the vocal separation info box */
 .vocal-sep-info b {
-    /* default fallback */
     color: #c9d1d9;
     font-weight: 700;
 }
-/* Specifically target ref audio and melody audio highlights via data-color */
 b[data-ref] {
     color: #FE9EC7 !important;
 }
@@ -553,53 +519,11 @@ def build_ui():
                 )
         # ================================================================
-        # ROW 2 – 预设示例
-        # ================================================================
-        gr.HTML("<hr style='border-color:#30363d; margin: 16px 0 12px;'>")
-        gr.Markdown("#### 🎵 预设示例 / Example Presets")
-        gr.Markdown(
-            "<small style='color:#8b949e;'>点击任意行自动填入上方输入区域 / Click any row to auto-fill the inputs above</small>"
-        )
-        with gr.Row(visible=False):
-            _sep_flag_ex   = gr.Checkbox(value=True,  label="分离人声 / Separate Vocals")
-            _mix_flag_ex   = gr.Checkbox(value=False, label="混入伴奏 / Mix Accomp.")
-            _sil_ex        = gr.Number(value=0.5,  label="静音 / Silence (s)")
-            _tshift_ex     = gr.Number(value=0.5,  label="t-shift")
-            _nfe_ex        = gr.Number(value=32,   label="NFE Steps")
-            _cfg_ex        = gr.Number(value=3.0,  label="CFG")
-            _seed_ex       = gr.Number(value=-1, precision=0, label="Seed")
-        _example_inputs = [
-            ref_audio, melody_audio, ref_text, target_text,
-            _sep_flag_ex, _mix_flag_ex,
-            _sil_ex, _tshift_ex, _nfe_ex, _cfg_ex, _seed_ex,
-        ]
-        with gr.Tabs():
-            with gr.Tab("🎼 Melody Control"):
-                gr.Examples(
-                    examples=EXAMPLES_MELODY_CONTROL,
-                    inputs=_example_inputs,
-                    label="Melody Control Examples",
-                    examples_per_page=5,
-                )
-            with gr.Tab("✏️ Lyric Edit"):
-                gr.Examples(
-                    examples=EXAMPLES_LYRIC_EDIT,
-                    inputs=_example_inputs,
-                    label="Lyric Edit Examples",
-                    examples_per_page=5,
-                )
-        # ================================================================
-        # ROW 3 – 伴奏分离
         # ================================================================
         gr.HTML("<hr style='border-color:#30363d; margin: 16px 0 12px;'>")
         gr.Markdown("#### 🎚️ 伴奏分离 / Vocal Separation")
-        # CHANGE 4: Use inline style colors for 参考音频 and 旋律音频
-        # so they render correctly regardless of CSS specificity issues
         gr.HTML("""
 <div style="font-size:0.85rem; color:#8b949e; line-height:1.75; margin: 0 0 12px; padding: 10px 16px;
             background: rgba(255,255,255,0.03); border-radius: 8px; border: 1px solid #21262d;">
@@ -632,6 +556,9 @@ def build_ui():
                 info="将合成人声与分离出的伴奏混合作为最终输出（需先开启人声分离）/ Mix synthesised vocals with the separated accompaniment (requires separation enabled)",
             )
         with gr.Accordion("⚙️ 高级参数 / Advanced Parameters", open=False):
             with gr.Row():
                 nfe_step = gr.Slider(
@@ -660,6 +587,38 @@ def build_ui():
                     info="-1 表示随机生成 / -1 means random",
                 )
         # ================================================================
         # ROW 5 – 合成按钮与输出
         # ================================================================

 # ---------------------------------------------------------------------------
+# Model loading (eager, at startup) / 启动时立即加载，常驻内存
 # ---------------------------------------------------------------------------
+print("🔄 Downloading required files...")
+download_files(task="infer")
+print("🔄 Loading YingMusicSinger model...")
+from src.YingMusicSinger.infer.YingMusicSinger import YingMusicSinger
+_model = YingMusicSinger.from_pretrained("ASLP-lab/YingMusic-Singer")
+_model = local_move2gpu(_model)
+_model.eval()
+print("✅ YingMusicSinger model loaded.")
+print("🔄 Loading MelBandRoformer separator...")
+from src.third_party.MusicSourceSeparationTraining.inference_api import Separator
+_separator = Separator(
+    config_path="ckpts/config_vocals_mel_band_roformer_kj.yaml",
+    checkpoint_path="ckpts/MelBandRoformer.ckpt",
+)
+print("✅ MelBandRoformer separator loaded.")
+print("🎤 All models ready. Starting UI...")
 # ---------------------------------------------------------------------------
     Separate audio into vocals and accompaniment using MelBandRoformer.
     Must be called within an active GPU context.
     """
+    separator = _separator
     wav, sr = torchaudio.load(audio_path)
     vocal_wav, inst_wav, out_sr = separator.separate(wav, sr)
         melody_vocals_path, melody_accomp_path = _separate_vocals_impl(melody_audio_path)
         actual_melody_path = melody_vocals_path
+    audio_tensor, sr = _model(
         ref_audio_path=actual_ref_path,
         melody_audio_path=actual_melody_path,
         ref_text=ref_text.strip(),
 # ---------------------------------------------------------------------------
 # Custom CSS / 自定义样式
 # ---------------------------------------------------------------------------
 CUSTOM_CSS = """
 @import url('https://fonts.googleapis.com/css2?family=DM+Sans:ital,opsz,wght@0,9..40,300;0,9..40,500;0,9..40,700;1,9..40,400&family=Playfair+Display:wght@600;800&display=swap');
 }
 /* ========== 2. Section labels: left accent bar ========== */
 .resume-container .section-title {
     border: none;
     outline: none;
     box-shadow: none;
     font-family: 'DM Sans', sans-serif;
     font-weight: 700;
     font-size: 1rem;
     letter-spacing: 0.06em;
     text-transform: uppercase;
     color: var(--primary);
     display: block;
     padding: 3px 0 3px 10px;
     margin-bottom: 14px;
     border-left: 4px solid var(--primary-warm);
     background: linear-gradient(90deg, rgb(254 158 199 / 8%) 0%, transparent 70%);
     border-radius: 0 4px 4px 0;
 }
 .resume-container .section-title * {
     border: inherit;
     outline: none;
 }
 /* ========== 4. Vocal-sep info box: fix highlighted text colors ========== */
 .vocal-sep-info b {
     color: #c9d1d9;
     font-weight: 700;
 }
 b[data-ref] {
     color: #FE9EC7 !important;
 }
                 )
         # ================================================================
+        # ROW 2 – 伴奏分离
         # ================================================================
         gr.HTML("<hr style='border-color:#30363d; margin: 16px 0 12px;'>")
         gr.Markdown("#### 🎚️ 伴奏分离 / Vocal Separation")
         gr.HTML("""
 <div style="font-size:0.85rem; color:#8b949e; line-height:1.75; margin: 0 0 12px; padding: 10px 16px;
             background: rgba(255,255,255,0.03); border-radius: 8px; border: 1px solid #21262d;">
                 info="将合成人声与分离出的伴奏混合作为最终输出（需先开启人声分离）/ Mix synthesised vocals with the separated accompaniment (requires separation enabled)",
             )
+        # ================================================================
+        # ROW 3 – 高级参数
+        # ================================================================
         with gr.Accordion("⚙️ 高级参数 / Advanced Parameters", open=False):
             with gr.Row():
                 nfe_step = gr.Slider(
                     info="-1 表示随机生成 / -1 means random",
                 )
+        # ================================================================
+        # ROW 4 – 预设示例（放在所有真实控件定义之后）
+        # ================================================================
+        gr.HTML("<hr style='border-color:#30363d; margin: 16px 0 12px;'>")
+        gr.Markdown("#### 🎵 预设示例 / Example Presets")
+        gr.Markdown(
+            "<small style='color:#8b949e;'>点击任意行自动填入上方输入区域 / Click any row to auto-fill the inputs above</small>"
+        )
+        # 所有真实控件均已定义，直接绑定
+        _example_inputs = [
+            ref_audio, melody_audio, ref_text, target_text,
+            separate_vocals_flag, mix_accompaniment_flag,
+            sil_len_to_end, t_shift, nfe_step, cfg_strength, seed,
+        ]
+        with gr.Tabs():
+            with gr.Tab("🎼 Melody Control"):
+                gr.Examples(
+                    examples=EXAMPLES_MELODY_CONTROL,
+                    inputs=_example_inputs,
+                    label="Melody Control Examples",
+                    examples_per_page=5,
+                )
+            with gr.Tab("✏️ Lyric Edit"):
+                gr.Examples(
+                    examples=EXAMPLES_LYRIC_EDIT,
+                    inputs=_example_inputs,
+                    label="Lyric Edit Examples",
+                    examples_per_page=5,
+                )
         # ================================================================
         # ROW 5 – 合成按钮与输出
         # ================================================================

examples/hf_space/melody_control/melody_control_ZH_02_melody.wav CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:795289e027cf06fd8e3898453fc353265dc114ca111446999900aa4dba40c126
-size 1221884

 version https://git-lfs.github.com/spec/v1
+oid sha256:d022b45bd36f0e3618a85422f21a9fd244a18c998c60f0aa59fdfe3c1beb9c59
+size 2039084

examples/hf_space/melody_control/melody_control_ZH_02_timbre.wav CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d022b45bd36f0e3618a85422f21a9fd244a18c998c60f0aa59fdfe3c1beb9c59
-size 2039084

 version https://git-lfs.github.com/spec/v1
+oid sha256:795289e027cf06fd8e3898453fc353265dc114ca111446999900aa4dba40c126
+size 1221884