Spaces:

shethjenil
/

Spleeter

Sleeping

App Files Files Community

shethjenil commited on 25 days ago

Commit

d5783f9

verified ·

1 Parent(s): 2a96a7c

Update spleeter.py

Browse files

Files changed (1) hide show

spleeter.py +17 -15

spleeter.py CHANGED Viewed

@@ -5,7 +5,7 @@ import torch
 from torch import nn, Tensor
 from torch.nn import functional as F
 from tqdm import tqdm
 def batchify(tensor: Tensor, T: int) -> Tensor:
     orig_size = tensor.size(-1)
     new_size = math.ceil(orig_size / T) * T
@@ -122,7 +122,7 @@ class Splitter(nn.Module):
     def __init__(self, stem_num=2):
         super(Splitter, self).__init__()
         if stem_num == 2:
-            stem_names = ["vocals","accompaniment"]
         if stem_num == 4:
             stem_names = ["vocals", "drums", "bass", "other"]
         if stem_num == 5:
@@ -134,7 +134,7 @@ class Splitter(nn.Module):
         self.hop_length = 1024
         self.win = nn.Parameter(torch.hann_window(self.win_length), requires_grad=False)
         self.stems = nn.ModuleDict({name: UNet(in_channels=2) for name in stem_names})
-        self.load_state_dict(torch.load(hf_hub_download("shethjenil/spleeter-torch",f"{stem_num}.pt")))
         self.eval()
     def compute_stft(self, wav: Tensor) -> Tuple[Tensor, Tensor]:
@@ -189,7 +189,8 @@ class Splitter(nn.Module):
         return wav.detach()
-    def forward(self, wav: Tensor,batch_size=16) -> Dict[str, Tensor]:
         # stft - 2 X F x L x 2
         # stft_mag - 2 X F x L
         stft, stft_mag = self.compute_stft(wav.squeeze())
@@ -199,7 +200,7 @@ class Splitter(nn.Module):
         stft_mag = batchify(stft_mag, self.T)  # B x 2 x F x T
         stft_mag = stft_mag.transpose(2, 3)  # B x 2 x T x F
         # compute stems' mask
-        masks = self.infer_with_batches(stft_mag,batch_size)
         # compute denominator
         mask_sum = sum([m**2 for m in masks.values()])
         mask_sum += 1e-10
@@ -212,14 +213,15 @@ class Splitter(nn.Module):
             return stft_masked
         return {name: self.inverse_stft(apply_mask(m)) for name, m in masks.items()}
-    def infer_with_batches(self, stft_mag, batch_size):
         masks = {name: [] for name in self.stems.keys()}
-        with torch.inference_mode():
-            for i in tqdm(range(0, stft_mag.shape[0], batch_size)):
-                batch = stft_mag[i:i + batch_size]
-                batch_outputs = {name: net(batch) for name, net in self.stems.items()}
-                for name in self.stems.keys():
-                    masks[name].append(batch_outputs[name])
-        masks = {name: torch.cat(masks[name], dim=0) for name in masks}
-        return masks

 from torch import nn, Tensor
 from torch.nn import functional as F
 from tqdm import tqdm
+from safetensors.torch import load_file
 def batchify(tensor: Tensor, T: int) -> Tensor:
     orig_size = tensor.size(-1)
     new_size = math.ceil(orig_size / T) * T
     def __init__(self, stem_num=2):
         super(Splitter, self).__init__()
         if stem_num == 2:
+            stem_names = ["vocals","other"]
         if stem_num == 4:
             stem_names = ["vocals", "drums", "bass", "other"]
         if stem_num == 5:
         self.hop_length = 1024
         self.win = nn.Parameter(torch.hann_window(self.win_length), requires_grad=False)
         self.stems = nn.ModuleDict({name: UNet(in_channels=2) for name in stem_names})
+        self.load_state_dict(load_file(hf_hub_download("shethjenil/spleeter",f"{stem_num}.safetensors")))
         self.eval()
     def compute_stft(self, wav: Tensor) -> Tuple[Tensor, Tensor]:
         return wav.detach()
+    @torch.inference_mode()
+    def forward(self, wav: Tensor,batch_size=16,allow=['vocals']) -> Dict[str, Tensor]:
         # stft - 2 X F x L x 2
         # stft_mag - 2 X F x L
         stft, stft_mag = self.compute_stft(wav.squeeze())
         stft_mag = batchify(stft_mag, self.T)  # B x 2 x F x T
         stft_mag = stft_mag.transpose(2, 3)  # B x 2 x T x F
         # compute stems' mask
+        masks = self.infer_with_batches(stft_mag,batch_size,allow)
         # compute denominator
         mask_sum = sum([m**2 for m in masks.values()])
         mask_sum += 1e-10
             return stft_masked
         return {name: self.inverse_stft(apply_mask(m)) for name, m in masks.items()}
+    def infer_with_batches(self, stft_mag, batch_size, allow):
         masks = {name: [] for name in self.stems.keys()}
+        for i in tqdm(range(0, stft_mag.shape[0], batch_size)):
+            batch = stft_mag[i:i + batch_size]
+            batch_outputs = {name: net(batch) for name, net in self.stems.items() if name in allow}
+            for name in batch_outputs:
+                masks[name].append(batch_outputs[name])
+        return {
+            name: torch.cat(masks[name], dim=0)
+            for name in masks
+            if masks[name]
+        }