Spaces:

Tachyeon
/

Swara-Split

Running

App Files Files Community

Tachyeon commited on Jan 4

Commit

5f6cebb

verified ·

1 Parent(s): b9f4091

Update app.py

Browse files

Files changed (1) hide show

app.py +137 -89

app.py CHANGED Viewed

@@ -6,9 +6,9 @@ import soundfile as sf
 import numpy as np
 from huggingface_hub import hf_hub_download
-# ==========================================
-# 1. ENGINE SETUP (UNCHANGED)
-# ==========================================
 try:
     from bs_roformer import BSRoformer
     from attend import Attend
@@ -18,7 +18,9 @@ except ImportError:
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 def safe_attend_forward(self, q, k, v, mask=None):
-    return F.scaled_dot_product_attention(q, k, v, attn_mask=mask, dropout_p=0., is_causal=False)
 try:
     Attend.forward = safe_attend_forward
@@ -27,7 +29,7 @@ except Exception:
 def load_model():
     print("Connecting to model...")
-    checkpoint_path = hf_hub_download(
         repo_id="Tachyeon/IAM-RoFormer-Model-Weights",
         filename="v11_consensus_epoch_30.pt"
     )
@@ -42,169 +44,215 @@ def load_model():
         flash_attn=True
     ).to(DEVICE)
-    ck = torch.load(checkpoint_path, map_location=DEVICE)
-    model.load_state_dict(ck["model"] if "model" in ck else ck)
     model.eval()
     return model
 model = load_model()
 def separate_audio(audio_path):
-    if model is None or not audio_path:
         return [None] * 4
     mix, sr = librosa.load(audio_path, sr=44100, mono=False)
     if mix.ndim == 1:
-        mix = np.stack([mix, mix], axis=0)
-    chunk_size = 44100 * 10
     overlap = 44100
-    mix_tensor = torch.tensor(mix).float().to(DEVICE).unsqueeze(0)
-    length = mix_tensor.shape[-1]
-    output = torch.zeros(1, 4, 2, length, device=DEVICE)
-    count = torch.zeros_like(output)
     with torch.no_grad(), torch.autocast("cuda", enabled=torch.cuda.is_available()):
-        for start in range(0, length, chunk_size - overlap):
-            end = min(start + chunk_size, length)
-            chunk = mix_tensor[:, :, start:end]
-            if chunk.shape[-1] < chunk_size:
-                chunk = F.pad(chunk, (0, chunk_size - chunk.shape[-1]))
-            pred = model(chunk)
-            valid = end - start
-            output[:, :, :, start:end] += pred[:, :, :, :valid]
-            count[:, :, :, start:end] += 1
-    stems = (output / count.clamp(min=1)).cpu().numpy()[0]
     files = []
     for i in range(4):
-        fname = f"stem_{i}.wav"
-        sf.write(fname, stems[i].T, sr)
-        files.append(fname)
     return files
-# ==========================================
-# 2. UI (Gradio 6 SAFE)
-# ==========================================
 css = """
 @import url('https://fonts.googleapis.com/css2?family=Anton&family=Playfair+Display:ital@1&family=Poppins:wght@400;600;700&display=swap');
-:root{
-    --bg:#2b1620;
-    --panel:#3a2430;
     --ink:#f6efe8;
     --muted:#c7bfbf;
     --accent:#ff73a6;
 }
 html, body, .gradio-container {
     height:100%;
-    background:linear-gradient(180deg,#2b1620,#1b0d14)!important;
-    color:var(--ink)!important;
     font-family:Poppins,sans-serif;
 }
-.contain{
-    height:100vh;
-    max-width:1200px;
-    margin:auto;
-    padding:20px;
     display:grid;
     grid-template-rows:auto 1fr;
-    gap:20px;
 }
-.header{
     display:flex;
     justify-content:space-between;
     align-items:center;
-    border:1px solid rgba(255,255,255,.05);
-    padding:16px;
 }
-.logo{
     font-family:Anton,sans-serif;
-    font-size:42px;
 }
-.subtitle{
     font-family:'Playfair Display',serif;
     font-style:italic;
     color:var(--accent);
 }
-.grid{
     display:grid;
     grid-template-columns:1fr 1fr;
-    gap:20px;
     height:100%;
 }
-.card{
-    border:1px solid rgba(255,255,255,.05);
-    padding:20px;
     display:flex;
     flex-direction:column;
-    gap:16px;
 }
-.input-box{
-    border:1px dashed rgba(255,255,255,.08);
-    padding:30px;
     text-align:center;
 }
-.run-btn{
-    background:linear-gradient(90deg,#ff73a6,#ffd58a)!important;
-    color:#12090b!important;
-    font-weight:800!important;
 }
-.stems{
     display:grid;
     grid-template-columns:1fr 1fr;
-    gap:14px;
 }
-.label{
     font-family:'Playfair Display',serif;
     font-style:italic;
     color:var(--accent);
 }
 """
 with gr.Blocks() as demo:
-    with gr.Column(elem_classes="contain"):
         with gr.Row(elem_classes="header"):
-            gr.HTML('<div class="logo">SWARA STUDIO</div>')
-            gr.HTML('<div class="subtitle">audio source separation</div>')
-        with gr.Row(elem_classes="grid"):
-            with gr.Column(elem_classes="card"):
-                gr.HTML('<div class="input-box"><b>MASTER AUDIO</b><br>Drop or upload WAV / MP3</div>')
-                input_audio = gr.Audio(type="filepath")
-                run_btn = gr.Button("RUN SEPARATION", elem_classes="run-btn")
-            with gr.Column(elem_classes="card"):
                 gr.HTML('<div class="label">STEMS</div>')
                 with gr.Row(elem_classes="stems"):
-                    out_vocals = gr.Audio(label="Vocals", interactive=False)
-                    out_drums = gr.Audio(label="Drums", interactive=False)
-                    out_bass = gr.Audio(label="Bass", interactive=False)
-                    out_other = gr.Audio(label="Other", interactive=False)
-    run_btn.click(
-        separate_audio,
-        input_audio,
-        [out_vocals, out_drums, out_bass, out_other]
-    )
 if __name__ == "__main__":
     demo.launch(css=css, theme=gr.themes.Base())

 import numpy as np
 from huggingface_hub import hf_hub_download
+# =====================================================
+# 1. MODEL LOGIC (UNCHANGED)
+# =====================================================
 try:
     from bs_roformer import BSRoformer
     from attend import Attend
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 def safe_attend_forward(self, q, k, v, mask=None):
+    return F.scaled_dot_product_attention(
+        q, k, v, attn_mask=mask, dropout_p=0.0, is_causal=False
+    )
 try:
     Attend.forward = safe_attend_forward
 def load_model():
     print("Connecting to model...")
+    ckpt = hf_hub_download(
         repo_id="Tachyeon/IAM-RoFormer-Model-Weights",
         filename="v11_consensus_epoch_30.pt"
     )
         flash_attn=True
     ).to(DEVICE)
+    state = torch.load(ckpt, map_location=DEVICE)
+    model.load_state_dict(state["model"] if "model" in state else state)
     model.eval()
     return model
 model = load_model()
 def separate_audio(audio_path):
+    if not audio_path:
         return [None] * 4
     mix, sr = librosa.load(audio_path, sr=44100, mono=False)
     if mix.ndim == 1:
+        mix = np.stack([mix, mix])
+    chunk = 44100 * 10
     overlap = 44100
+    x = torch.tensor(mix).float().to(DEVICE)[None]
+    length = x.shape[-1]
+    out = torch.zeros(1, 4, 2, length, device=DEVICE)
+    cnt = torch.zeros_like(out)
     with torch.no_grad(), torch.autocast("cuda", enabled=torch.cuda.is_available()):
+        for s in range(0, length, chunk - overlap):
+            e = min(s + chunk, length)
+            part = x[:, :, s:e]
+            if part.shape[-1] < chunk:
+                part = F.pad(part, (0, chunk - part.shape[-1]))
+            pred = model(part)
+            out[:, :, :, s:e] += pred[:, :, :, : e - s]
+            cnt[:, :, :, s:e] += 1
+    stems = (out / cnt.clamp(min=1)).cpu().numpy()[0]
     files = []
     for i in range(4):
+        f = f"stem_{i}.wav"
+        sf.write(f, stems[i].T, sr)
+        files.append(f)
     return files
+# =====================================================
+# 2. POLISHED UI (FIXED LAYOUT, NO SCROLL)
+# =====================================================
 css = """
 @import url('https://fonts.googleapis.com/css2?family=Anton&family=Playfair+Display:ital@1&family=Poppins:wght@400;600;700&display=swap');
+:root {
+    --bg1:#2b1620;
+    --bg2:#1c0d14;
+    --panel:rgba(255,255,255,0.04);
+    --border:rgba(255,255,255,0.08);
     --ink:#f6efe8;
     --muted:#c7bfbf;
     --accent:#ff73a6;
 }
+/* HARD RESET */
 html, body, .gradio-container {
     height:100%;
+    width:100%;
+    margin:0;
+    padding:0;
+    overflow:hidden !important;
+    background:linear-gradient(180deg,var(--bg1),var(--bg2)) !important;
+    color:var(--ink);
     font-family:Poppins,sans-serif;
 }
+/* CENTERED APP */
+.app {
+    max-width:1100px;
+    height:100%;
+    margin:0 auto;
+    padding:32px;
     display:grid;
     grid-template-rows:auto 1fr;
+    gap:28px;
+    box-sizing:border-box;
 }
+/* HEADER */
+.header {
     display:flex;
     justify-content:space-between;
     align-items:center;
 }
+.title {
     font-family:Anton,sans-serif;
+    font-size:44px;
+    letter-spacing:1px;
 }
+.subtitle {
     font-family:'Playfair Display',serif;
     font-style:italic;
     color:var(--accent);
+    margin-left:14px;
 }
+/* MAIN GRID */
+.main {
     display:grid;
     grid-template-columns:1fr 1fr;
+    gap:32px;
     height:100%;
 }
+/* PANELS */
+.panel {
+    background:var(--panel);
+    border:1px solid var(--border);
+    border-radius:16px;
+    padding:28px;
     display:flex;
     flex-direction:column;
+    gap:22px;
+    box-sizing:border-box;
 }
+/* INPUT */
+.drop {
+    border:1px dashed var(--border);
+    border-radius:12px;
+    padding:32px;
     text-align:center;
 }
+.drop h3 {
+    margin:0;
+    font-size:18px;
+    letter-spacing:1px;
+}
+/* BUTTON */
+.run {
+    background:linear-gradient(90deg,#ff73a6,#ffd58a) !important;
+    color:#160c10 !important;
+    font-weight:800 !important;
+    border-radius:10px !important;
+    border:none !important;
 }
+/* STEMS */
+.stems {
     display:grid;
     grid-template-columns:1fr 1fr;
+    gap:18px;
 }
+.stem {
+    background:rgba(255,255,255,0.03);
+    border:1px solid var(--border);
+    border-radius:12px;
+    padding:16px;
+}
+.label {
     font-family:'Playfair Display',serif;
     font-style:italic;
     color:var(--accent);
+    margin-bottom:6px;
+}
+/* AUDIO FIX */
+audio {
+    width:100%;
+    max-height:36px;
 }
 """
 with gr.Blocks() as demo:
+    with gr.Column(elem_classes="app"):
         with gr.Row(elem_classes="header"):
+            gr.HTML('<div class="title">SWARA STUDIO</div>')
+            gr.HTML('<div class="subtitle">Audio Source Separation</div>')
+        with gr.Row(elem_classes="main"):
+            with gr.Column(elem_classes="panel"):
+                gr.HTML("""
+                    <div class="drop">
+                        <h3>MASTER AUDIO</h3>
+                        <p>Drop or upload WAV / MP3</p>
+                    </div>
+                """)
+                inp = gr.Audio(type="filepath")
+                btn = gr.Button("RUN SEPARATION", elem_classes="run")
+            with gr.Column(elem_classes="panel"):
                 gr.HTML('<div class="label">STEMS</div>')
                 with gr.Row(elem_classes="stems"):
+                    with gr.Column(elem_classes="stem"):
+                        gr.HTML('<div class="label">Vocals</div>')
+                        o1 = gr.Audio(interactive=False)
+                    with gr.Column(elem_classes="stem"):
+                        gr.HTML('<div class="label">Drums</div>')
+                        o2 = gr.Audio(interactive=False)
+                    with gr.Column(elem_classes="stem"):
+                        gr.HTML('<div class="label">Bass</div>')
+                        o3 = gr.Audio(interactive=False)
+                    with gr.Column(elem_classes="stem"):
+                        gr.HTML('<div class="label">Other</div>')
+                        o4 = gr.Audio(interactive=False)
+    btn.click(separate_audio, inp, [o1, o2, o3, o4])
 if __name__ == "__main__":
     demo.launch(css=css, theme=gr.themes.Base())