Spaces:

ASLP-lab
/

YingMusic-Singer-Plus

Configuration error

App Files Files Community

xjsc0 commited on Mar 24

Commit

7389d4c

1 Parent(s): ffbb4ab

修复了一些已知问题

Browse files

Files changed (1) hide show

app.py +141 -65

app.py CHANGED Viewed

@@ -123,8 +123,6 @@ def mix_vocal_and_accompaniment(
 # ---------------------------------------------------------------------------
 # Inference wrapper / 推理入口
-# Single @spaces.GPU scope covers ALL heavy work (separation + synthesis)
-# so models stay resident in GPU memory across steps within one call.
 # ---------------------------------------------------------------------------
 @spaces.GPU
 def synthesize(
@@ -140,16 +138,8 @@ def synthesize(
     cfg_strength,
     seed,
 ):
-    """
-    主合成流程 / Main synthesis pipeline.
-    1. (可选) 用 MelBandRoformer 分离参考音频和旋律音频的人声与伴奏
-    2. 送入 YingMusicSinger 合成
-    3. (可选) 将合成人声与旋律音频的伴奏混合
-    """
     import random
-    # ---- 输入校验 / Input validation ----------------------------------------
     if ref_audio is None:
         raise gr.Error("请上传参考音频 / Please upload Reference Audio")
     if melody_audio is None:
@@ -168,7 +158,6 @@ def synthesize(
     if actual_seed < 0:
         actual_seed = random.randint(0, 2**31 - 1)
-    # ---- Step 1: 人声分离（合并在同一 GPU 上下文中）/ Vocal separation (same GPU context) ----
     melody_accomp_path = None
     actual_ref_path = ref_audio_path
     actual_melody_path = melody_audio_path
@@ -180,7 +169,6 @@ def synthesize(
         melody_vocals_path, melody_accomp_path = _separate_vocals_impl(melody_audio_path)
         actual_melody_path = melody_vocals_path
-    # ---- Step 2: 模型推理 / Model inference (same GPU context) ---------------
     model = _load_model_impl()
     audio_tensor, sr = model(
@@ -199,7 +187,6 @@ def synthesize(
     vocal_out_path = os.path.join(tempfile.mkdtemp(), "vocal_output.wav")
     torchaudio.save(vocal_out_path, audio_tensor.to("cpu"), sample_rate=sr)
-    # ---- Step 3: 混合伴奏 / Mix accompaniment (optional) ---------------------
     if (
         separate_vocals_flag
         and mix_accompaniment_flag
@@ -215,66 +202,138 @@ def synthesize(
 # Example presets / 预设示例
 # ---------------------------------------------------------------------------
 EXAMPLES_MELODY_CONTROL = [
-    # [ref_audio, melody_audio, ref_text, target_text, sep, mix, sil, t_shift, nfe, cfg, seed]
     [
-        "examples/melody_control/ref_01.wav",
-        "examples/melody_control/melody_01.wav",
-        "该体谅的不执着|如果那天我",
-        "好多天|看不完你",
-        True, False, 0.5, 0.5, 32, 3.0, -1,
     ],
     [
-        "examples/melody_control/ref_02.wav",
-        "examples/melody_control/melody_02.wav",
-        "月光下的身影|渐渐模糊",
-        "星光照亮前路|指引方向",
-        True, False, 0.5, 0.5, 32, 3.0, -1,
     ],
 ]
 EXAMPLES_LYRIC_EDIT = [
     [
-        "examples/lyric_edit/ref_01.wav",
-        "examples/lyric_edit/melody_01.wav",
-        "该体谅的不执着|如果那天我",
-        "忘不掉的笑容|留在心里面",
-        True, False, 0.5, 0.5, 32, 3.0, -1,
     ],
     [
-        "examples/lyric_edit/ref_02.wav",
-        "examples/lyric_edit/melody_02.wav",
-        "夜深了还不睡|想着你的脸",
-        "春风又吹过来|带走我思念",
-        True, False, 0.5, 0.5, 32, 3.0, -1,
     ],
 ]
 # ---------------------------------------------------------------------------
 # Custom CSS / 自定义样式
 # ---------------------------------------------------------------------------
 CUSTOM_CSS = """
 @import url('https://fonts.googleapis.com/css2?family=DM+Sans:ital,opsz,wght@0,9..40,300;0,9..40,500;0,9..40,700;1,9..40,400&family=Playfair+Display:wght@600;800&display=swap');
 :root {
-    --primary: #e85d04;
-    --primary-light: #f48c06;
     --bg-dark: #0d1117;
     --surface: #161b22;
     --surface-light: #21262d;
     --text: #f0f6fc;
     --text-muted: #8b949e;
-    --accent-glow: rgba(232, 93, 4, 0.15);
     --border: #30363d;
 }
 .gradio-container {
     font-family: 'DM Sans', sans-serif !important;
     max-width: 1100px !important;
     margin: auto !important;
 }
-/* ---------- Badge links: no underline, no gap artifacts ---------- */
 #app-header .badges a {
     text-decoration: none !important;
     display: inline-block;
@@ -291,7 +350,7 @@ CUSTOM_CSS = """
     line-height: 1.8;
 }
-/* ---------- Header / 头部 ---------- */
 #app-header {
     text-align: center;
     padding: 1.8rem 1rem 0.5rem;
@@ -341,17 +400,23 @@ CUSTOM_CSS = """
     font-weight: 600;
 }
-/* ---------- Section labels / 分区标题 ---------- */
 .section-title {
     font-family: 'DM Sans', sans-serif !important;
     font-weight: 700 !important;
     font-size: 1rem !important;
     letter-spacing: 0.06em;
     text-transform: uppercase;
-    color: var(--primary-light) !important;
-    border-bottom: 2px solid var(--primary);
-    padding-bottom: 6px;
-    margin-bottom: 12px !important;
 }
 /* ---------- Example tabs ---------- */
@@ -360,29 +425,44 @@ CUSTOM_CSS = """
     font-size: 0.95rem !important;
 }
-/* ---------- Run button / 合成按钮 ---------- */
 #run-btn {
-    background: linear-gradient(135deg, #e85d04, #dc2f02) !important;
     border: none !important;
-    color: #fff !important;
     font-weight: 700 !important;
     font-size: 1.1rem !important;
     letter-spacing: 0.04em;
     padding: 12px 0 !important;
     border-radius: 10px !important;
     transition: transform 0.15s, box-shadow 0.25s !important;
-    box-shadow: 0 4px 20px rgba(232, 93, 4, 0.35) !important;
 }
 #run-btn:hover {
     transform: translateY(-1px) !important;
-    box-shadow: 0 6px 28px rgba(232, 93, 4, 0.5) !important;
 }
-/* ---------- Output audio / 输出音频 ---------- */
 #output-audio {
-    border: 2px solid var(--primary) !important;
     border-radius: 12px !important;
-    background: var(--accent-glow) !important;
 }
 """
@@ -490,7 +570,7 @@ def build_ui():
                 )
         # ================================================================
-        # ROW 2 – 预设示例 / Example Presets  ← before vocal separation
         # ================================================================
         gr.HTML("<hr style='border-color:#30363d; margin: 16px 0 12px;'>")
         gr.Markdown("#### 🎵 预设示例 / Example Presets", elem_classes="section-title")
@@ -498,8 +578,6 @@ def build_ui():
             "<small style='color:#8b949e;'>点击任意行自动填入上方输入区域 / Click any row to auto-fill the inputs above</small>"
         )
-        # Hidden advanced-param components so gr.Examples can reference them
-        # (real sliders rendered inside the accordion below override these values)
         with gr.Row(visible=False):
             _sep_flag_ex   = gr.Checkbox(value=True)
             _mix_flag_ex   = gr.Checkbox(value=False)
@@ -532,16 +610,19 @@ def build_ui():
                 )
         # ================================================================
-        # ROW 3 – 伴奏分离 / Vocal Separation
         # ================================================================
         gr.HTML("<hr style='border-color:#30363d; margin: 16px 0 12px;'>")
         gr.Markdown("#### 🎚️ 伴奏分离 / Vocal Separation", elem_classes="section-title")
         gr.HTML("""
 <div style="font-size:0.85rem; color:#8b949e; line-height:1.75; margin: 0 0 12px; padding: 10px 16px;
             background: rgba(255,255,255,0.03); border-radius: 8px; border: 1px solid #21262d;">
   <ul style="margin:0; padding-left:1.2em; list-style: none;">
     <li style="margin-bottom:7px;">
-      💡 若输入的<b style="color:#c9d1d9;">参考音频</b>或<b style="color:#c9d1d9;">旋律音频</b>中含有伴奏或背景噪音，请开启「分离人声后过模型」—— 模型基于纯人声训练，混合音频会影响合成质量。<br>
       <span style="color:#6e7681; font-size:0.82rem;">If either input contains accompaniment or background noise, enable <i>Separate vocals before synthesis</i> — the model is trained on clean vocals only and mixed audio degrades quality.</span>
     </li>
     <li style="margin-bottom:7px;">
@@ -549,7 +630,7 @@ def build_ui():
       <span style="color:#6e7681; font-size:0.82rem;">If both inputs are already clean vocals, skip separation — enabling it unnecessarily may introduce artifacts from the separation model.</span>
     </li>
     <li>
-      💡 若旋律音频含有伴奏，开启「分离人声后过模型」后，最终输出是否保留伴奏由「输出时混入伴奏」控制。<br>
       <span style="color:#6e7681; font-size:0.82rem;">If the melody audio contains accompaniment and separation is enabled, use <i>Mix accompaniment into output</i> to decide whether to include it in the final result.</span>
     </li>
   </ul>
@@ -597,7 +678,7 @@ def build_ui():
                 )
         # ================================================================
-        # ROW 5 – 合成按钮与输出 / Run & Output
         # ================================================================
         gr.HTML("<hr style='border-color:#30363d; margin: 12px 0;'>")
         run_btn = gr.Button("🎤  开始合成 / Start Synthesizing", elem_id="run-btn", size="lg")
@@ -608,16 +689,12 @@ def build_ui():
             elem_id="output-audio",
         )
-        # All inputs for the synthesize() call (uses real sliders, not example placeholders)
         _all_inputs = [
             ref_audio, melody_audio, ref_text, target_text,
             separate_vocals_flag, mix_accompaniment_flag,
             sil_len_to_end, t_shift, nfe_step, cfg_strength, seed,
         ]
-        # ================================================================
-        # Event wiring / 事件绑定
-        # ================================================================
         separate_vocals_flag.change(
             fn=lambda sep: gr.update(interactive=sep, value=False),
             inputs=[separate_vocals_flag],
@@ -630,14 +707,13 @@ def build_ui():
             outputs=output_audio,
         )
-        # ---- 页脚：免责声明 / Footer: disclaimer ----
         gr.HTML(DISCLAIMER_HTML)
     return demo
 # ---------------------------------------------------------------------------
-# Entry point / 启动入口
 # ---------------------------------------------------------------------------
 if __name__ == "__main__":
     demo = build_ui()

 # ---------------------------------------------------------------------------
 # Inference wrapper / 推理入口
 # ---------------------------------------------------------------------------
 @spaces.GPU
 def synthesize(
     cfg_strength,
     seed,
 ):
     import random
     if ref_audio is None:
         raise gr.Error("请上传参考音频 / Please upload Reference Audio")
     if melody_audio is None:
     if actual_seed < 0:
         actual_seed = random.randint(0, 2**31 - 1)
     melody_accomp_path = None
     actual_ref_path = ref_audio_path
     actual_melody_path = melody_audio_path
         melody_vocals_path, melody_accomp_path = _separate_vocals_impl(melody_audio_path)
         actual_melody_path = melody_vocals_path
     model = _load_model_impl()
     audio_tensor, sr = model(
     vocal_out_path = os.path.join(tempfile.mkdtemp(), "vocal_output.wav")
     torchaudio.save(vocal_out_path, audio_tensor.to("cpu"), sample_rate=sr)
     if (
         separate_vocals_flag
         and mix_accompaniment_flag
 # Example presets / 预设示例
 # ---------------------------------------------------------------------------
 EXAMPLES_MELODY_CONTROL = [
     [
+        "examples/hf_space/melody_control/melody_control_ZH_01_timbre.wav",
+        "examples/hf_space/melody_control/melody_control_ZH_01_melody.wav",
+        "人和人的沟通|有时候没有用",
+        "此刻记忆中的点滴啊|能否再次被珍藏",
+        True, True, 0.5, 0.5, 32, 3.0, -1,
     ],
     [
+        "examples/hf_space/melody_control/melody_control_EN_01_timbre.wav",
+        "examples/hf_space/melody_control/melody_control_EN_01_melody.wav",
+        "i don't know feel|but i wanna try",
+        "won't open the door|and say tomorrow",
+        True, True, 0.5, 0.5, 32, 3.0, -1,
     ],
+    [
+        "examples/hf_space/melody_control/melody_control_EN_02_timbre.wav",
+        "examples/hf_space/melody_control/melody_control_EN_02_melody.wav",
+        "and she'll never know your story like|i do",
+        "你将安然无恙|无人能再伤你",
+        False, False, 0.5, 0.5, 32, 3.0, -1,
+    ],
+    [
+        "examples/hf_space/melody_control/melody_control_ZH_02_timbre.wav",
+        "examples/hf_space/melody_control/melody_control_ZH_02_melody.wav",
+        "就让你|在别人怀里|快乐",
+        "Missing you in my mind|missing you in my heart",
+        False, False, 0.5, 0.5, 32, 3.0, -1,
+    ]
 ]
 EXAMPLES_LYRIC_EDIT = [
     [
+        "examples/hf_space/lyric_edit/SingEdit_ZH_01.wav",
+        "examples/hf_space/lyric_edit/SingEdit_ZH_01.wav",
+        "天青色等烟雨|而我在等你|炊烟袅袅升起",
+        "阳光中赏花香|花瓣在飘落|山间幽静致远",
+        True, True, 0.5, 0.5, 32, 3.0, -1,
     ],
     [
+        "examples/hf_space/lyric_edit/SingEdit_EN_01.wav",
+        "examples/hf_space/lyric_edit/SingEdit_EN_01.wav",
+        "can you tell my heart is speaking|my eyes will give you clues",
+        "can you spot the moon is grinning|my lips will show you hints",
+        True, True, 0.5, 0.5, 32, 3.0, -1,
+    ],
+    [
+        "examples/hf_space/lyric_edit/SingEdit_ZH_02.wav",
+        "examples/hf_space/lyric_edit/SingEdit_ZH_02.wav",
+        "歌声是翅膀|唱出了希望|所有的付出只因爱的力量|和你一样",
+        "火锅是梦想|煮出了欢畅|全部的辛劳全因肉的力量|与汤一样",
+        False, False, 0.5, 0.5, 32, 3.0, -1,
+    ],
+    [
+        "examples/hf_space/lyric_edit/SingEdit_EN_02.wav",
+        "examples/hf_space/lyric_edit/SingEdit_EN_02.wav",
+        "i can hear what you say|now i know|why know we can|make it",
+        "i can see where you go|but i say|why not we will|break it",
+        False, False, 0.5, 0.5, 32, 3.0, -1,
     ],
 ]
 # ---------------------------------------------------------------------------
 # Custom CSS / 自定义样式
+# CHANGES:
+#   1. Top gradient bar using palette colors
+#   2. Section titles: left-bar accent instead of bottom border
+#   3. Color palette updated to #FE9EC7 / #F9F6C4 / #89D4FF / #44ACFF
+#   4. Vocal-sep info box: reference/melody text use palette colors
 # ---------------------------------------------------------------------------
 CUSTOM_CSS = """
 @import url('https://fonts.googleapis.com/css2?family=DM+Sans:ital,opsz,wght@0,9..40,300;0,9..40,500;0,9..40,700;1,9..40,400&family=Playfair+Display:wght@600;800&display=swap');
 :root {
+    --primary: #44ACFF;
+    --primary-light: #89D4FF;
+    --primary-warm: #FE9EC7;
+    --palette-yellow: #F9F6C4;
     --bg-dark: #0d1117;
     --surface: #161b22;
     --surface-light: #21262d;
     --text: #f0f6fc;
     --text-muted: #8b949e;
+    --accent-glow: rgba(68, 172, 255, 0.10);
     --border: #30363d;
 }
+/* ========== 1. TOP GRADIENT BAR ========== */
+/* Inject a full-width gradient strip at the very top of the page */
+body > gradio-app::before,
+.gradio-container > *:first-child::before {
+    content: '';
+    display: block;
+    height: 0;
+}
+/* Use a pseudo-element on the outermost wrapper Gradio renders */
+.app.svelte-182fdeq::before,
+footer ~ *::before {
+    display: none;
+}
+/* Reliable approach: prepend a fixed bar via the container's outline */
 .gradio-container {
     font-family: 'DM Sans', sans-serif !important;
     max-width: 1100px !important;
     margin: auto !important;
+    /* top gradient bar using outline trick */
+    outline: none;
+    position: relative;
+}
+.gradio-container::before {
+    content: '';
+    display: block;
+    position: absolute;
+    top: 0;
+    left: 0;
+    right: 0;
+    height: 5px;
+    background: linear-gradient(90deg,
+        #FE9EC7 0%,
+        #F9F6C4 33%,
+        #89D4FF 66%,
+        #44ACFF 100%
+    );
+    border-radius: 3px 3px 0 0;
+    z-index: 100;
+    pointer-events: none;
 }
+/* ---------- Badge links ---------- */
 #app-header .badges a {
     text-decoration: none !important;
     display: inline-block;
     line-height: 1.8;
 }
+/* ---------- Header ---------- */
 #app-header {
     text-align: center;
     padding: 1.8rem 1rem 0.5rem;
     font-weight: 600;
 }
+/* ========== 2. Section labels: left accent bar instead of bottom border ========== */
 .section-title {
     font-family: 'DM Sans', sans-serif !important;
     font-weight: 700 !important;
     font-size: 1rem !important;
     letter-spacing: 0.06em;
     text-transform: uppercase;
+    color: var(--primary) !important;
+    /* Remove the old bottom-border horizontal line */
+    border-bottom: none !important;
+    /* Replace with a left accent bar */
+    border-left: 4px solid var(--primary-warm) !important;
+    padding: 3px 0 3px 10px !important;
+    margin-bottom: 14px !important;
+    background: linear-gradient(90deg, rgba(254,158,199,0.08) 0%, transparent 70%) !important;
+    border-radius: 0 4px 4px 0 !important;
+    display: block;
 }
 /* ---------- Example tabs ---------- */
     font-size: 0.95rem !important;
 }
+/* ========== 3. Run button — palette blue ========== */
 #run-btn {
+    background: linear-gradient(135deg, #44ACFF, #89D4FF) !important;
     border: none !important;
+    color: #0a1628 !important;
     font-weight: 700 !important;
     font-size: 1.1rem !important;
     letter-spacing: 0.04em;
     padding: 12px 0 !important;
     border-radius: 10px !important;
     transition: transform 0.15s, box-shadow 0.25s !important;
+    box-shadow: 0 4px 20px rgba(68, 172, 255, 0.35) !important;
 }
 #run-btn:hover {
     transform: translateY(-1px) !important;
+    box-shadow: 0 6px 28px rgba(68, 172, 255, 0.5) !important;
 }
+/* ---------- Output audio ---------- */
 #output-audio {
+    border: 2px solid #44ACFF !important;
     border-radius: 12px !important;
+    background: rgba(68, 172, 255, 0.07) !important;
+}
+/* ========== 4. Vocal-sep info box: fix highlighted text colors ========== */
+/* Target the <b> tags inside the vocal separation info box */
+.vocal-sep-info b {
+    /* default fallback */
+    color: #c9d1d9;
+    font-weight: 700;
+}
+/* Specifically target ref audio and melody audio highlights via data-color */
+b[data-ref] {
+    color: #FE9EC7 !important;
+}
+b[data-melody] {
+    color: #89D4FF !important;
 }
 """
                 )
         # ================================================================
+        # ROW 2 – 预设示例
         # ================================================================
         gr.HTML("<hr style='border-color:#30363d; margin: 16px 0 12px;'>")
         gr.Markdown("#### 🎵 预设示例 / Example Presets", elem_classes="section-title")
             "<small style='color:#8b949e;'>点击任意行自动填入上方输入区域 / Click any row to auto-fill the inputs above</small>"
         )
         with gr.Row(visible=False):
             _sep_flag_ex   = gr.Checkbox(value=True)
             _mix_flag_ex   = gr.Checkbox(value=False)
                 )
         # ================================================================
+        # ROW 3 – 伴奏分离
         # ================================================================
         gr.HTML("<hr style='border-color:#30363d; margin: 16px 0 12px;'>")
         gr.Markdown("#### 🎚️ 伴奏分离 / Vocal Separation", elem_classes="section-title")
+        # CHANGE 4: Use inline style colors for 参考音频 and 旋律音频
+        # so they render correctly regardless of CSS specificity issues
         gr.HTML("""
 <div style="font-size:0.85rem; color:#8b949e; line-height:1.75; margin: 0 0 12px; padding: 10px 16px;
             background: rgba(255,255,255,0.03); border-radius: 8px; border: 1px solid #21262d;">
   <ul style="margin:0; padding-left:1.2em; list-style: none;">
     <li style="margin-bottom:7px;">
+      💡 若输入的<b style="color:#FE9EC7; font-weight:700;">参考音频</b>或<b style="color:#89D4FF; font-weight:700;">旋律音频</b>中含有伴奏或背景噪音，请开启「分离人声后过模型」—— 模型基于纯人声训练，混合音频会影响合成质量。<br>
       <span style="color:#6e7681; font-size:0.82rem;">If either input contains accompaniment or background noise, enable <i>Separate vocals before synthesis</i> — the model is trained on clean vocals only and mixed audio degrades quality.</span>
     </li>
     <li style="margin-bottom:7px;">
       <span style="color:#6e7681; font-size:0.82rem;">If both inputs are already clean vocals, skip separation — enabling it unnecessarily may introduce artifacts from the separation model.</span>
     </li>
     <li>
+      💡 若<b style="color:#89D4FF; font-weight:700;">旋律音频</b>含有伴奏，开启「分离人声后过模型」后，最终输出是否保留伴奏由「输出时混入伴奏」控制。<br>
       <span style="color:#6e7681; font-size:0.82rem;">If the melody audio contains accompaniment and separation is enabled, use <i>Mix accompaniment into output</i> to decide whether to include it in the final result.</span>
     </li>
   </ul>
                 )
         # ================================================================
+        # ROW 5 – 合成按钮与输出
         # ================================================================
         gr.HTML("<hr style='border-color:#30363d; margin: 12px 0;'>")
         run_btn = gr.Button("🎤  开始合成 / Start Synthesizing", elem_id="run-btn", size="lg")
             elem_id="output-audio",
         )
         _all_inputs = [
             ref_audio, melody_audio, ref_text, target_text,
             separate_vocals_flag, mix_accompaniment_flag,
             sil_len_to_end, t_shift, nfe_step, cfg_strength, seed,
         ]
         separate_vocals_flag.change(
             fn=lambda sep: gr.update(interactive=sep, value=False),
             inputs=[separate_vocals_flag],
             outputs=output_audio,
         )
         gr.HTML(DISCLAIMER_HTML)
     return demo
 # ---------------------------------------------------------------------------
+# Entry point
 # ---------------------------------------------------------------------------
 if __name__ == "__main__":
     demo = build_ui()