Tachyeon commited on
Commit
281909f
·
verified ·
1 Parent(s): eb3243f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +252 -115
app.py CHANGED
@@ -1,154 +1,291 @@
1
  import gradio as gr
 
 
 
 
 
 
2
 
3
- # -----------------------------
4
- # CSS (Gradio 6 compatible)
5
- # -----------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  css = """
7
- :root {
8
- --bg1: #1a0c12;
9
- --bg2: #3a1523;
10
- --panel: rgba(255,255,255,0.04);
11
- --panel-soft: rgba(255,255,255,0.03);
12
- --border: rgba(255,255,255,0.10);
13
- --text: #f4f4f4;
14
- --muted: rgba(255,255,255,0.65);
15
- --accent: #ff5c8a;
 
 
16
  }
17
 
18
- body {
19
- background: linear-gradient(135deg, var(--bg1), var(--bg2));
20
- font-family: Inter, system-ui, sans-serif;
 
 
 
 
 
 
21
  }
22
 
23
- .gradio-container {
24
- max-width: 1200px !important;
 
25
  margin: 0 auto;
 
 
 
 
 
26
  }
27
 
28
- /* Header */
29
- .header {
30
- margin-bottom: 26px;
 
 
31
  }
32
  .logo {
33
- font-size: 42px;
34
- font-weight: 800;
35
- letter-spacing: 1px;
36
  }
37
  .tagline {
38
- margin-top: 6px;
39
- font-size: 14px;
40
- color: var(--accent);
 
41
  }
42
 
43
- /* Section */
44
- .section-title {
45
- font-size: 20px;
46
- font-weight: 600;
 
 
47
  }
48
- .section-sub {
49
- font-size: 14px;
50
- color: var(--muted);
51
- margin-bottom: 14px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  }
53
 
54
- /* GRID */
55
- .main-grid {
56
  display: grid;
57
- grid-template-columns: 2fr 1fr 1fr;
58
- grid-template-rows: auto auto;
59
  gap: 22px;
 
60
  }
61
 
62
- /* Upload */
63
- .upload-box {
64
- grid-row: span 2;
65
- background: var(--panel);
66
- border: 1px solid var(--border);
67
  border-radius: 14px;
68
- padding: 18px;
 
 
 
 
 
 
 
69
  }
70
 
71
- /* Stems */
72
- .stem-box {
73
- background: var(--panel-soft);
74
- border: 1px solid var(--border);
75
- border-radius: 14px;
76
- padding: 14px;
77
  }
78
 
79
- .stem-title {
80
- font-size: 14px;
81
- font-weight: 500;
82
- color: var(--accent);
83
- margin-bottom: 6px;
84
  }
85
 
86
- /* Audio cleanup */
87
- .gradio-audio {
88
  background: transparent !important;
89
- border: none !important;
 
 
 
 
 
90
  }
91
 
92
- /* Button */
93
- .primary-btn {
94
- height: 48px;
95
- border-radius: 12px;
96
- background: linear-gradient(135deg, #ff7aa2, #ffb36b);
97
- color: #111;
98
- font-weight: 700;
99
- letter-spacing: 0.3px;
100
  }
101
 
102
- footer { display: none !important; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  """
104
 
105
- # -----------------------------
106
- # APP
107
- # -----------------------------
108
  with gr.Blocks() as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
- # Header
111
- gr.HTML("""
112
- <div class="header">
113
- <div class="logo">SWARA STUDIO</div>
114
- <div class="tagline">Separating Music Into Its elements</div>
115
- </div>
116
- """)
117
-
118
- # Intro
119
- gr.HTML("""
120
- <div class="section-title">Select a track</div>
121
- <div class="section-sub">We’ll break it down into individual parts</div>
122
- """)
123
-
124
- # Main grid
125
- with gr.Row(elem_classes="main-grid"):
126
-
127
- # Upload (aligned by grid, not hacks)
128
- with gr.Column(elem_classes="upload-box"):
129
- audio_in = gr.Audio(label="Audio", type="filepath")
130
-
131
- # Vocals
132
- with gr.Column(elem_classes="stem-box"):
133
- gr.HTML('<div class="stem-title">Vocals</div>')
134
- vocals = gr.Audio(label=None)
135
-
136
- # Drums
137
- with gr.Column(elem_classes="stem-box"):
138
- gr.HTML('<div class="stem-title">Drums</div>')
139
- drums = gr.Audio(label=None)
140
-
141
- # Bass
142
- with gr.Column(elem_classes="stem-box"):
143
- gr.HTML('<div class="stem-title">Bass</div>')
144
- bass = gr.Audio(label=None)
145
-
146
- # Other
147
- with gr.Column(elem_classes="stem-box"):
148
- gr.HTML('<div class="stem-title">Other</div>')
149
- other = gr.Audio(label=None)
150
-
151
- # Action
152
- separate = gr.Button("Separate", elem_classes="primary-btn")
153
-
154
- demo.launch(css=css)
 
1
  import gradio as gr
2
+ import torch
3
+ import torch.nn.functional as F
4
+ import librosa
5
+ import soundfile as sf
6
+ import numpy as np
7
+ from huggingface_hub import hf_hub_download
8
 
9
+ # ================= MODEL (unchanged) =================
10
+ try:
11
+ from bs_roformer import BSRoformer
12
+ from attend import Attend
13
+ except ImportError:
14
+ pass
15
+
16
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
17
+
18
+ def safe_attend_forward(self, q, k, v, mask=None):
19
+ return F.scaled_dot_product_attention(q, k, v, attn_mask=mask, dropout_p=0.0)
20
+
21
+ try:
22
+ Attend.forward = safe_attend_forward
23
+ except Exception:
24
+ pass
25
+
26
+ def load_model():
27
+ ckpt = hf_hub_download(
28
+ repo_id="Tachyeon/IAM-RoFormer-Model-Weights",
29
+ filename="v11_consensus_epoch_30.pt"
30
+ )
31
+ model = BSRoformer(
32
+ dim=512, depth=12, stereo=True, num_stems=4,
33
+ time_transformer_depth=1, freq_transformer_depth=1,
34
+ flash_attn=True
35
+ ).to(DEVICE)
36
+
37
+ state = torch.load(ckpt, map_location=DEVICE)
38
+ model.load_state_dict(state["model"] if "model" in state else state)
39
+ model.eval()
40
+ return model
41
+
42
+ model = load_model()
43
+
44
+ def separate_audio(path):
45
+ if not path:
46
+ return [None]*4
47
+
48
+ mix, sr = librosa.load(path, sr=44100, mono=False)
49
+ if mix.ndim == 1:
50
+ mix = np.stack([mix, mix])
51
+
52
+ x = torch.tensor(mix).float().to(DEVICE)[None]
53
+ L = x.shape[-1]
54
+
55
+ out = torch.zeros(1,4,2,L, device=DEVICE)
56
+ cnt = torch.zeros_like(out)
57
+
58
+ chunk = 44100*10
59
+ hop = chunk - 44100
60
+
61
+ with torch.no_grad(), torch.autocast("cuda", enabled=torch.cuda.is_available()):
62
+ for s in range(0, L, hop):
63
+ e = min(s+chunk, L)
64
+ part = x[:,:,s:e]
65
+ if part.shape[-1] < chunk:
66
+ part = F.pad(part,(0,chunk-part.shape[-1]))
67
+ pred = model(part)
68
+ out[:,:,:,s:e] += pred[:,:,:,:e-s]
69
+ cnt[:,:,:,s:e] += 1
70
+
71
+ stems = (out / cnt.clamp(min=1)).cpu().numpy()[0]
72
+ files=[]
73
+ for i in range(4):
74
+ f=f"stem_{i}.wav"
75
+ sf.write(f, stems[i].T, sr)
76
+ files.append(f)
77
+ return files
78
+
79
+ # ================= UI CSS + layout (polished) =================
80
  css = """
81
+ @import url('https://fonts.googleapis.com/css2?family=Anton&family=Poppins:wght@400;500;600&display=swap');
82
+
83
+ :root{
84
+ --bg1:#2a141d;
85
+ --bg2:#14080d;
86
+ --ink:#f3ece6;
87
+ --muted:#b6aeb0;
88
+ --accent:#ff6f9f;
89
+ --panel: rgba(255,255,255,0.03);
90
+ --panel-2: rgba(255,255,255,0.02);
91
+ --radius: 12px;
92
  }
93
 
94
+ /* page reset */
95
+ html, body, .gradio-container {
96
+ height: 100%;
97
+ margin: 0;
98
+ padding: 0;
99
+ background: linear-gradient(180deg, var(--bg1), var(--bg2)) !important;
100
+ color: var(--ink);
101
+ font-family: Poppins, sans-serif;
102
+ overflow: auto;
103
  }
104
 
105
+ /* wrapper */
106
+ .app {
107
+ max-width: 1160px;
108
  margin: 0 auto;
109
+ padding: 48px 40px;
110
+ box-sizing: border-box;
111
+ display: grid;
112
+ grid-template-rows: auto 1fr;
113
+ gap: 36px;
114
  }
115
 
116
+ /* logo + tagline (left-anchored brand lockup) */
117
+ .brand {
118
+ display:flex;
119
+ flex-direction:column;
120
+ gap:6px;
121
  }
122
  .logo {
123
+ font-family: Anton, sans-serif;
124
+ font-size:46px;
125
+ letter-spacing:1px;
126
  }
127
  .tagline {
128
+ font-size:14px;
129
+ color:var(--accent);
130
+ opacity:0.9;
131
+ text-transform: none; /* keep exact caps as user wanted */
132
  }
133
 
134
+ /* main grid: left upload (dominant) and right stems column */
135
+ .main {
136
+ display:grid;
137
+ grid-template-columns: 1fr 420px; /* fixed right column to keep alignment predictable */
138
+ gap: 48px;
139
+ align-items: start; /* ensure top baseline alignment */
140
  }
141
+
142
+ /* left column content */
143
+ .left h3 { margin: 0; font-size:18px; font-weight:600; }
144
+ .left p { margin:6px 0 18px; color:var(--muted); font-size:13px; }
145
+
146
+ /* style the Gradio audio drop area so it's visually lighter and slightly shorter */
147
+ .left .gradio-audio {
148
+ background: var(--panel) !important;
149
+ border-radius: var(--radius) !important;
150
+ border: 1px solid rgba(255,255,255,0.04) !important;
151
+ padding: 12px !important;
152
+ box-sizing: border-box;
153
+ min-height: 260px; /* reduced height so stems line up better */
154
+ display:flex;
155
+ align-items:center;
156
+ justify-content:center;
157
+ }
158
+
159
+ /* tone-down the upload inner texts to make them less dominant */
160
+ .left .gradio-audio .dropzone, .left .gradio-audio .component, .left .gradio-audio .file {
161
+ color: var(--muted) !important;
162
+ }
163
+
164
+ /* Make the CTA authoritative but balanced */
165
+ .button-primary {
166
+ margin-top: 18px;
167
+ height:46px;
168
+ width:100%;
169
+ font-size:15px !important;
170
+ font-weight:600 !important;
171
+ background: linear-gradient(90deg,#ff6f9f,#ffbf7a) !important;
172
+ color: #14080d !important;
173
+ border-radius: 10px !important;
174
+ border: none !important;
175
+ box-shadow: 0 8px 30px rgba(0,0,0,0.25);
176
  }
177
 
178
+ /* RIGHT COLUMN (stems) - keep fixed width, grid with 2 columns */
179
+ .stems {
180
  display: grid;
181
+ grid-template-columns: 1fr 1fr;
 
182
  gap: 22px;
183
+ align-items: start;
184
  }
185
 
186
+ /* each stem slot now reads like a subdued surface */
187
+ .stem-surface {
188
+ background: var(--panel-2);
 
 
189
  border-radius: 14px;
190
+ padding: 12px;
191
+ border: 1px solid rgba(255,255,255,0.03);
192
+ box-sizing: border-box;
193
+ min-height: 140px; /* consistent stem surface height */
194
+ display: flex;
195
+ flex-direction: column;
196
+ gap: 10px;
197
+ justify-content: center;
198
  }
199
 
200
+ /* stem label: same system rhythm as body (no competing font) */
201
+ .stem-label {
202
+ font-size:13px;
203
+ font-weight:500;
204
+ color: var(--accent);
205
+ margin-left: 4px;
206
  }
207
 
208
+ /* neutralize Gradio's internal "Audio" label visually so it doesn't fight the stem label */
209
+ .stem-surface .gradio-audio label {
210
+ display:none !important;
 
 
211
  }
212
 
213
+ /* smaller inner audio player to avoid odd spacing */
214
+ .stem-surface .gradio-audio {
215
  background: transparent !important;
216
+ border: 0 !important;
217
+ padding: 0 !important;
218
+ min-height: 88px !important;
219
+ display:flex;
220
+ align-items:center;
221
+ justify-content:center;
222
  }
223
 
224
+ /* ensure the internal audio element doesn't expand */
225
+ .stem-surface audio {
226
+ width: 92%;
227
+ max-height: 36px;
228
+ opacity: 0.95;
 
 
 
229
  }
230
 
231
+ /* remove any extra left margin coming from gradio wrapper */
232
+ .gradio-container .wrap { margin: 0; padding: 0; }
233
+
234
+ /* footer info (small) */
235
+ .footer {
236
+ margin-top: 28px;
237
+ text-align: center;
238
+ color: var(--muted);
239
+ font-size: 12px;
240
+ opacity: 0.95;
241
+ }
242
+
243
+ /* small responsive fallback */
244
+ @media (max-width: 980px) {
245
+ .main { grid-template-columns: 1fr; }
246
+ .stems { grid-template-columns: 1fr 1fr; gap:16px; }
247
+ .app { padding: 28px 20px; }
248
+ }
249
  """
250
 
 
 
 
251
  with gr.Blocks() as demo:
252
+ with gr.Column(elem_classes="app"):
253
+
254
+ # Brand lockup (left-anchored)
255
+ with gr.Row(elem_classes="brand"):
256
+ gr.HTML('<div class="logo">SWARA STUDIO</div>')
257
+ # exact user-caps as requested
258
+ gr.HTML('<div class="tagline">Separating Music Into Its elements</div>')
259
+
260
+ # Main content (left upload, right stems). Top baselines match now.
261
+ with gr.Row(elem_classes="main"):
262
+ # LEFT: upload + CTA
263
+ with gr.Column(elem_classes="left"):
264
+ gr.HTML("""
265
+ <h3>Select a track</h3>
266
+ <p>We’ll break it down into individual parts</p>
267
+ """)
268
+ input_audio = gr.Audio(type="filepath")
269
+ run_btn = gr.Button("Separate", elem_classes="button-primary")
270
+
271
+ # RIGHT: stems grid (fixed width column)
272
+ with gr.Column():
273
+ with gr.Row(elem_classes="stems"):
274
+ with gr.Column(elem_classes="stem-surface"):
275
+ gr.HTML('<div class="stem-label">Vocals</div>')
276
+ out_vocals = gr.Audio(interactive=False)
277
+ with gr.Column(elem_classes="stem-surface"):
278
+ gr.HTML('<div class="stem-label">Drums</div>')
279
+ out_drums = gr.Audio(interactive=False)
280
+ with gr.Column(elem_classes="stem-surface"):
281
+ gr.HTML('<div class="stem-label">Bass</div>')
282
+ out_bass = gr.Audio(interactive=False)
283
+ with gr.Column(elem_classes="stem-surface"):
284
+ gr.HTML('<div class="stem-label">Other</div>')
285
+ out_other = gr.Audio(interactive=False)
286
+
287
+ # wiring — unchanged logic
288
+ run_btn.click(separate_audio, input_audio, [out_vocals, out_drums, out_bass, out_other])
289
 
290
+ if __name__ == "__main__":
291
+ demo.launch(css=css, theme=gr.themes.Base())