Spaces:

Tachyeon
/

Swara-Split

Running

App Files Files Community

Tachyeon commited on Jan 3

Commit

dc8229b

verified ·

1 Parent(s): 5a98f35

Create app.py

Browse files

Files changed (1) hide show

app.py +116 -0

app.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import gradio as gr
+import torch
+import librosa
+import soundfile as sf
+import numpy as np
+import os
+import sys
+from huggingface_hub import hf_hub_download
+# 1. SETUP
+# Import the architecture directly since we installed the repo via requirements.txt
+from models.bs_roformer.bs_roformer import BSRoformer
+DEVICE = "cpu" # Free Tier uses CPU
+# 2. DOWNLOAD & LOAD MODEL
+# 👇 REPLACE THIS with your actual Model Repo ID (e.g. "Rahul/IAM-RoFormer-Weights")
+REPO_ID = "Tachyeon/IAM-RoFormer-Model-Weights"
+FILENAME = "v11_consensus_epoch_30.pt"
+print(f">>> ⏳ Downloading Model from {REPO_ID}...")
+try:
+    # This downloads the 4.5GB file from your storage repo to the Space
+    model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
+    print(f">>> ✅ Download Complete: {model_path}")
+    # Initialize Architecture
+    model = BSRoformer(
+        dim=512, depth=12, stereo=True, num_stems=4,
+        time_transformer_depth=1, freq_transformer_depth=1,
+        flash_attn=False
+    ).to(DEVICE)
+    # Load Weights
+    state = torch.load(model_path, map_location=DEVICE)
+    if 'model' in state: state = state['model']
+    model.load_state_dict(state, strict=False)
+    model.eval()
+    print(">>> ✅ Model Loaded Successfully!")
+except Exception as e:
+    print(f"❌ Error: {e}")
+    raise e
+# 3. INFERENCE LOGIC (V15 PURE FIDELITY)
+def separate_audio(audio_file, progress=gr.Progress()):
+    if audio_file is None: return None, None, None, None
+    progress(0, desc="Loading Audio...")
+    print(f">>> 🪄 Processing: {audio_file}")
+    mix, sr = librosa.load(audio_file, sr=44100, mono=False)
+    if len(mix.shape) == 1: mix = np.stack([mix, mix], axis=0)
+    # Chunking (Safe for CPU)
+    chunk_size = 44100 * 10
+    overlap = 44100 * 1
+    mix_tensor = torch.tensor(mix, dtype=torch.float32).to(DEVICE)
+    if mix_tensor.dim() == 2: mix_tensor = mix_tensor.unsqueeze(0)
+    length = mix_tensor.shape[-1]
+    final_output = torch.zeros(1, 4, 2, length).to(DEVICE)
+    counts = torch.zeros(1, 4, 2, length).to(DEVICE)
+    progress(0.1, desc="Separating Stems...")
+    with torch.no_grad():
+        for start in range(0, length, int(chunk_size - overlap)):
+            end = min(start + int(chunk_size), length)
+            chunk = mix_tensor[:, :, start:end]
+            if chunk.shape[-1] < chunk_size:
+                pad_len = int(chunk_size - chunk.shape[-1])
+                chunk = torch.nn.functional.pad(chunk, (0, pad_len))
+            pred = model(chunk)
+            valid_len = end - start
+            final_output[:, :, :, start:end] += pred[:, :, :, :valid_len]
+            counts[:, :, :, start:end] += 1.0
+            current_progress = 0.1 + (0.8 * (end / length))
+            progress(current_progress, desc="Processing...")
+    stems = (final_output / torch.clamp(counts, min=1.0)).cpu().numpy()[0]
+    # V15 Safety Normalization
+    peak = np.max(np.abs(stems))
+    if peak > 0.99: stems = stems / peak
+    outputs = []
+    for i in range(4):
+        outfile = f"stem_{i}.wav"
+        sf.write(outfile, stems[i].T, sr)
+        outputs.append(outfile)
+    return outputs[0], outputs[1], outputs[2], outputs[3]
+# 4. UI
+custom_css = "#title {text-align: center} #desc {text-align: center}"
+with gr.Blocks(css=custom_css, title="IAM Source Separation") as demo:
+    gr.Markdown("# 🎻 Indian Art Music Source Separator", elem_id="title")
+    gr.Markdown("### Powered by RoFormer | Epoch 30 Consensus Model", elem_id="desc")
+    with gr.Row():
+        with gr.Column():
+            input_audio = gr.Audio(label="Input Mixture", type="filepath")
+            submit_btn = gr.Button("✨ Separate Audio", variant="primary", size="lg")
+        with gr.Column():
+            out_vocals = gr.Audio(label="Vocals", interactive=False)
+            out_drums = gr.Audio(label="Mridangam", interactive=False)
+            out_bass = gr.Audio(label="Tanpura", interactive=False)
+            out_other = gr.Audio(label="Violin", interactive=False)
+    submit_btn.click(separate_audio, inputs=input_audio, outputs=[out_vocals, out_drums, out_bass, out_other])
+demo.launch()