Tachyeon commited on
Commit
5e1d6d0
·
verified ·
1 Parent(s): 70b2d07

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +143 -247
app.py CHANGED
@@ -1,291 +1,187 @@
1
  import gradio as gr
2
- import torch
3
- import torch.nn.functional as F
4
- import librosa
5
- import soundfile as sf
6
- import numpy as np
7
- from huggingface_hub import hf_hub_download
8
 
9
- # ================= MODEL (unchanged) =================
10
- try:
11
- from bs_roformer import BSRoformer
12
- from attend import Attend
13
- except ImportError:
14
- pass
15
-
16
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
17
-
18
- def safe_attend_forward(self, q, k, v, mask=None):
19
- return F.scaled_dot_product_attention(q, k, v, attn_mask=mask, dropout_p=0.0)
20
-
21
- try:
22
- Attend.forward = safe_attend_forward
23
- except Exception:
24
- pass
25
-
26
- def load_model():
27
- ckpt = hf_hub_download(
28
- repo_id="Tachyeon/IAM-RoFormer-Model-Weights",
29
- filename="v11_consensus_epoch_30.pt"
30
- )
31
- model = BSRoformer(
32
- dim=512, depth=12, stereo=True, num_stems=4,
33
- time_transformer_depth=1, freq_transformer_depth=1,
34
- flash_attn=True
35
- ).to(DEVICE)
36
-
37
- state = torch.load(ckpt, map_location=DEVICE)
38
- model.load_state_dict(state["model"] if "model" in state else state)
39
- model.eval()
40
- return model
41
-
42
- model = load_model()
43
-
44
- def separate_audio(path):
45
- if not path:
46
- return [None]*4
47
-
48
- mix, sr = librosa.load(path, sr=44100, mono=False)
49
- if mix.ndim == 1:
50
- mix = np.stack([mix, mix])
51
-
52
- x = torch.tensor(mix).float().to(DEVICE)[None]
53
- L = x.shape[-1]
54
-
55
- out = torch.zeros(1,4,2,L, device=DEVICE)
56
- cnt = torch.zeros_like(out)
57
-
58
- chunk = 44100*10
59
- hop = chunk - 44100
60
-
61
- with torch.no_grad(), torch.autocast("cuda", enabled=torch.cuda.is_available()):
62
- for s in range(0, L, hop):
63
- e = min(s+chunk, L)
64
- part = x[:,:,s:e]
65
- if part.shape[-1] < chunk:
66
- part = F.pad(part,(0,chunk-part.shape[-1]))
67
- pred = model(part)
68
- out[:,:,:,s:e] += pred[:,:,:,:e-s]
69
- cnt[:,:,:,s:e] += 1
70
-
71
- stems = (out / cnt.clamp(min=1)).cpu().numpy()[0]
72
- files=[]
73
- for i in range(4):
74
- f=f"stem_{i}.wav"
75
- sf.write(f, stems[i].T, sr)
76
- files.append(f)
77
- return files
78
-
79
- # ================= UI CSS + layout (polished) =================
80
  css = """
81
- @import url('https://fonts.googleapis.com/css2?family=Anton&family=Poppins:wght@400;500;600&display=swap');
82
-
83
- :root{
84
- --bg1:#2a141d;
85
- --bg2:#14080d;
86
- --ink:#f3ece6;
87
- --muted:#b6aeb0;
88
- --accent:#ff6f9f;
89
- --panel: rgba(255,255,255,0.03);
90
- --panel-2: rgba(255,255,255,0.02);
91
- --radius: 12px;
92
  }
93
 
94
- /* page reset */
95
- html, body, .gradio-container {
96
- height: 100%;
97
- margin: 0;
98
- padding: 0;
99
- background: linear-gradient(180deg, var(--bg1), var(--bg2)) !important;
100
- color: var(--ink);
101
- font-family: Poppins, sans-serif;
102
- overflow: auto;
103
  }
104
 
105
- /* wrapper */
106
- .app {
107
- max-width: 1160px;
108
  margin: 0 auto;
109
- padding: 48px 40px;
110
- box-sizing: border-box;
111
- display: grid;
112
- grid-template-rows: auto 1fr;
113
- gap: 36px;
114
  }
115
 
116
- /* logo + tagline (left-anchored brand lockup) */
117
- .brand {
118
- display:flex;
119
- flex-direction:column;
120
- gap:6px;
121
  }
 
122
  .logo {
123
- font-family: Anton, sans-serif;
124
- font-size:46px;
125
- letter-spacing:1px;
 
126
  }
 
127
  .tagline {
128
- font-size:14px;
129
- color:var(--accent);
130
- opacity:0.9;
131
- text-transform: none; /* keep exact caps as user wanted */
132
  }
133
 
134
- /* main grid: left upload (dominant) and right stems column */
135
- .main {
136
- display:grid;
137
- grid-template-columns: 1fr 420px; /* fixed right column to keep alignment predictable */
138
- gap: 48px;
139
- align-items: start; /* ensure top baseline alignment */
140
  }
141
 
142
- /* left column content */
143
- .left h3 { margin: 0; font-size:18px; font-weight:600; }
144
- .left p { margin:6px 0 18px; color:var(--muted); font-size:13px; }
145
-
146
- /* style the Gradio audio drop area so it's visually lighter and slightly shorter */
147
- .left .gradio-audio {
148
- background: var(--panel) !important;
149
- border-radius: var(--radius) !important;
150
- border: 1px solid rgba(255,255,255,0.04) !important;
151
- padding: 12px !important;
152
- box-sizing: border-box;
153
- min-height: 260px; /* reduced height so stems line up better */
154
- display:flex;
155
- align-items:center;
156
- justify-content:center;
157
  }
158
 
159
- /* tone-down the upload inner texts to make them less dominant */
160
- .left .gradio-audio .dropzone, .left .gradio-audio .component, .left .gradio-audio .file {
161
- color: var(--muted) !important;
 
 
 
 
162
  }
163
 
164
- /* Make the CTA authoritative but balanced */
165
- .button-primary {
166
- margin-top: 18px;
167
- height:46px;
168
- width:100%;
169
- font-size:15px !important;
170
- font-weight:600 !important;
171
- background: linear-gradient(90deg,#ff6f9f,#ffbf7a) !important;
172
- color: #14080d !important;
173
- border-radius: 10px !important;
174
- border: none !important;
175
- box-shadow: 0 8px 30px rgba(0,0,0,0.25);
176
  }
177
 
178
- /* RIGHT COLUMN (stems) - keep fixed width, grid with 2 columns */
179
- .stems {
180
- display: grid;
181
- grid-template-columns: 1fr 1fr;
182
- gap: 22px;
183
- align-items: start;
184
  }
185
 
186
- /* each stem slot now reads like a subdued surface */
187
- .stem-surface {
188
- background: var(--panel-2);
 
189
  border-radius: 14px;
190
- padding: 12px;
191
- border: 1px solid rgba(255,255,255,0.03);
192
- box-sizing: border-box;
193
- min-height: 140px; /* consistent stem surface height */
194
  display: flex;
195
  flex-direction: column;
196
- gap: 10px;
197
- justify-content: center;
198
  }
199
 
200
- /* stem label: same system rhythm as body (no competing font) */
201
- .stem-label {
202
- font-size:13px;
203
- font-weight:500;
204
  color: var(--accent);
205
- margin-left: 4px;
206
  }
207
 
208
- /* neutralize Gradio's internal "Audio" label visually so it doesn't fight the stem label */
209
- .stem-surface .gradio-audio label {
210
- display:none !important;
211
- }
212
-
213
- /* smaller inner audio player to avoid odd spacing */
214
- .stem-surface .gradio-audio {
215
  background: transparent !important;
216
- border: 0 !important;
217
- padding: 0 !important;
218
- min-height: 88px !important;
219
- display:flex;
220
- align-items:center;
221
- justify-content:center;
222
  }
223
 
224
- /* ensure the internal audio element doesn't expand */
225
- .stem-surface audio {
226
- width: 92%;
227
- max-height: 36px;
228
- opacity: 0.95;
229
  }
230
 
231
- /* remove any extra left margin coming from gradio wrapper */
232
- .gradio-container .wrap { margin: 0; padding: 0; }
233
-
234
- /* footer info (small) */
235
- .footer {
236
- margin-top: 28px;
237
- text-align: center;
238
- color: var(--muted);
239
- font-size: 12px;
240
- opacity: 0.95;
241
  }
242
 
243
- /* small responsive fallback */
244
- @media (max-width: 980px) {
245
- .main { grid-template-columns: 1fr; }
246
- .stems { grid-template-columns: 1fr 1fr; gap:16px; }
247
- .app { padding: 28px 20px; }
248
  }
249
  """
250
 
251
- with gr.Blocks() as demo:
252
- with gr.Column(elem_classes="app"):
253
-
254
- # Brand lockup (left-anchored)
255
- with gr.Row(elem_classes="brand"):
256
- gr.HTML('<div class="logo">SWARA STUDIO</div>')
257
- # exact user-caps as requested
258
- gr.HTML('<div class="tagline">Separating Music Into Its elements</div>')
259
-
260
- # Main content (left upload, right stems). Top baselines match now.
261
- with gr.Row(elem_classes="main"):
262
- # LEFT: upload + CTA
263
- with gr.Column(elem_classes="left"):
264
- gr.HTML("""
265
- <h3>Select a track</h3>
266
- <p>We’ll break it down into individual parts</p>
267
- """)
268
- input_audio = gr.Audio(type="filepath")
269
- run_btn = gr.Button("Separate", elem_classes="button-primary")
270
-
271
- # RIGHT: stems grid (fixed width column)
272
- with gr.Column():
273
- with gr.Row(elem_classes="stems"):
274
- with gr.Column(elem_classes="stem-surface"):
275
- gr.HTML('<div class="stem-label">Vocals</div>')
276
- out_vocals = gr.Audio(interactive=False)
277
- with gr.Column(elem_classes="stem-surface"):
278
- gr.HTML('<div class="stem-label">Drums</div>')
279
- out_drums = gr.Audio(interactive=False)
280
- with gr.Column(elem_classes="stem-surface"):
281
- gr.HTML('<div class="stem-label">Bass</div>')
282
- out_bass = gr.Audio(interactive=False)
283
- with gr.Column(elem_classes="stem-surface"):
284
- gr.HTML('<div class="stem-label">Other</div>')
285
- out_other = gr.Audio(interactive=False)
286
-
287
- # wiring — unchanged logic
288
- run_btn.click(separate_audio, input_audio, [out_vocals, out_drums, out_bass, out_other])
289
-
290
- if __name__ == "__main__":
291
- demo.launch(css=css, theme=gr.themes.Base())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
2
 
3
+ # -----------------------------
4
+ # CSS (FINAL, POLISHED)
5
+ # -----------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  css = """
7
+ :root {
8
+ --bg-1: #1a0c12;
9
+ --bg-2: #3a1523;
10
+ --surface: rgba(255,255,255,0.035);
11
+ --surface-soft: rgba(255,255,255,0.025);
12
+ --border: rgba(255,255,255,0.08);
13
+ --text: #f5f5f5;
14
+ --muted: rgba(255,255,255,0.65);
15
+ --accent: #ff5c8a;
 
 
16
  }
17
 
18
+ body {
19
+ background: linear-gradient(135deg, var(--bg-1), var(--bg-2));
20
+ font-family: Poppins, system-ui, sans-serif;
 
 
 
 
 
 
21
  }
22
 
23
+ .gradio-container {
24
+ max-width: 1200px !important;
 
25
  margin: 0 auto;
 
 
 
 
 
26
  }
27
 
28
+ /* Header */
29
+ .header {
30
+ margin-bottom: 28px;
 
 
31
  }
32
+
33
  .logo {
34
+ font-size: 42px;
35
+ font-weight: 800;
36
+ letter-spacing: 1px;
37
+ color: var(--text);
38
  }
39
+
40
  .tagline {
41
+ margin-top: 6px;
42
+ font-size: 14px;
43
+ color: var(--accent);
44
+ letter-spacing: 0.3px;
45
  }
46
 
47
+ /* Section text */
48
+ .section-title {
49
+ font-size: 20px;
50
+ font-weight: 600;
51
+ margin-bottom: 4px;
 
52
  }
53
 
54
+ .section-sub {
55
+ font-size: 14px;
56
+ color: var(--muted);
57
+ margin-bottom: 14px;
 
 
 
 
 
 
 
 
 
 
 
58
  }
59
 
60
+ /* Main grid THIS FIXES ALIGNMENT */
61
+ .main-grid {
62
+ display: grid;
63
+ grid-template-columns: 2fr 1fr 1fr;
64
+ grid-template-rows: auto auto;
65
+ gap: 22px;
66
+ align-items: stretch;
67
  }
68
 
69
+ /* Upload area */
70
+ .upload-box {
71
+ grid-row: 1 / span 2;
72
+ background: var(--surface);
73
+ border: 1px solid var(--border);
74
+ border-radius: 14px;
75
+ padding: 20px;
76
+ display: flex;
77
+ flex-direction: column;
78
+ justify-content: space-between;
 
 
79
  }
80
 
81
+ .upload-box .gradio-audio {
82
+ background: transparent !important;
83
+ border: none !important;
 
 
 
84
  }
85
 
86
+ /* Stem boxes */
87
+ .stem {
88
+ background: var(--surface-soft);
89
+ border: 1px solid var(--border);
90
  border-radius: 14px;
91
+ padding: 14px;
 
 
 
92
  display: flex;
93
  flex-direction: column;
94
+ justify-content: space-between;
 
95
  }
96
 
97
+ .stem-title {
98
+ font-size: 14px;
99
+ font-weight: 500;
 
100
  color: var(--accent);
101
+ margin-bottom: 6px;
102
  }
103
 
104
+ .stem .gradio-audio {
 
 
 
 
 
 
105
  background: transparent !important;
106
+ border: none !important;
 
 
 
 
 
107
  }
108
 
109
+ /* Button */
110
+ .action-row {
111
+ margin-top: 18px;
 
 
112
  }
113
 
114
+ .primary-btn {
115
+ width: 100%;
116
+ height: 48px;
117
+ border-radius: 12px;
118
+ background: linear-gradient(135deg, #ff7aa2, #ffb36b);
119
+ color: #111;
120
+ font-weight: 700;
121
+ letter-spacing: 0.3px;
 
 
122
  }
123
 
124
+ /* Footer cleanup */
125
+ footer {
126
+ display: none !important;
 
 
127
  }
128
  """
129
 
130
+ # -----------------------------
131
+ # APP
132
+ # -----------------------------
133
+ with gr.Blocks(css=css) as demo:
134
+
135
+ # Header
136
+ gr.HTML("""
137
+ <div class="header">
138
+ <div class="logo">SWARA STUDIO</div>
139
+ <div class="tagline">Separating Music Into Its elements</div>
140
+ </div>
141
+ """)
142
+
143
+ # Section intro
144
+ gr.HTML("""
145
+ <div class="section-title">Select a track</div>
146
+ <div class="section-sub">We’ll break it down into individual parts</div>
147
+ """)
148
+
149
+ # Main aligned grid
150
+ with gr.HTML('<div class="main-grid">'):
151
+
152
+ # Upload box (left, spans 2 rows)
153
+ with gr.HTML('<div class="upload-box">'):
154
+ audio_in = gr.Audio(label="Audio", type="filepath")
155
+ gr.HTML('</div>')
156
+
157
+ # Vocals
158
+ with gr.HTML('<div class="stem">'):
159
+ gr.HTML('<div class="stem-title">Vocals</div>')
160
+ vocals = gr.Audio(label=None)
161
+ gr.HTML('</div>')
162
+
163
+ # Drums
164
+ with gr.HTML('<div class="stem">'):
165
+ gr.HTML('<div class="stem-title">Drums</div>')
166
+ drums = gr.Audio(label=None)
167
+ gr.HTML('</div>')
168
+
169
+ # Bass
170
+ with gr.HTML('<div class="stem">'):
171
+ gr.HTML('<div class="stem-title">Bass</div>')
172
+ bass = gr.Audio(label=None)
173
+ gr.HTML('</div>')
174
+
175
+ # Other
176
+ with gr.HTML('<div class="stem">'):
177
+ gr.HTML('<div class="stem-title">Other</div>')
178
+ other = gr.Audio(label=None)
179
+ gr.HTML('</div>')
180
+
181
+ gr.HTML('</div>') # end grid
182
+
183
+ # Action button
184
+ with gr.Row(elem_classes="action-row"):
185
+ separate = gr.Button("Separate", elem_classes="primary-btn")
186
+
187
+ demo.launch()