dodo12

Runtime error

App Files Files Community

pengdaqian commited on May 13, 2023

Commit

d7659a0

1 Parent(s): d853526

fix

Browse files

Files changed (2) hide show

app.py +1 -1
torchspleeter/estimator.py +16 -13

app.py CHANGED Viewed

@@ -196,7 +196,7 @@ def svc_main(sid, input_audio):
     if not os.path.exists(tmpfile_path):
         os.makedirs(tmpfile_path)
-    split_to_parts(input_audio_tmp_file, tmpfile_path, models='torchspleeter/checkpoints/2stems/testcheckpoint1.ckpt')
     curr_tmp_path = os.path.join(tmpfile_path, os.path.splitext(input_audio_tmp_file)[0])
     vocals_filepath = os.path.join(curr_tmp_path, 'vocals.wav')

     if not os.path.exists(tmpfile_path):
         os.makedirs(tmpfile_path)
+    split_to_parts(input_audio_tmp_file, tmpfile_path)
     curr_tmp_path = os.path.join(tmpfile_path, os.path.splitext(input_audio_tmp_file)[0])
     vocals_filepath = os.path.join(curr_tmp_path, 'vocals.wav')

torchspleeter/estimator.py CHANGED Viewed

@@ -7,12 +7,15 @@ import tqdm
 # from torchaudio.functional import istft
 from torchspleeter.unet import UNet
-#from .util import tf2pytorch
 import os
 dirname = os.path.dirname(__file__)
 defaultmodel0 = os.path.join(dirname, 'checkpoints/2stems/testcheckpoint0.ckpt')
 defaultmodel1 = os.path.join(dirname, 'checkpoints/2stems/testcheckpoint1.ckpt')
 def load_ckpt(model, ckpt):
     state_dict = model.state_dict()
@@ -39,7 +42,7 @@ def pad_and_partition(tensor, T):
         tensor of size (B*[L/T] x C x F x T)
     """
     old_size = tensor.size(3)
-    new_size = math.ceil(old_size/T) * T
     tensor = F.pad(tensor, [0, new_size - old_size])
     [b, c, t, f] = tensor.shape
     split = new_size // T
@@ -50,29 +53,29 @@ class Estimator(nn.Module):
     def __init__(self, num_instrumments=2, checkpoint_path=None):
         super(Estimator, self).__init__()
         if checkpoint_path is None:
-            checkpoint_path=[defaultmodel0,defaultmodel1]
         else:
-            if len(checkpoint_path)<1:
-                checkpoint_path=[defaultmodel0,defaultmodel1]
         # stft config
         self.F = 1024
         self.T = 512
         self.win_length = 4096
         self.hop_length = 1024
         self.win = nn.Parameter(
-            torch.hann_window(self.win_length),
             requires_grad=False
         )
-        ckpts=[]
         if len(checkpoint_path) != num_instrumments:
             raise ValueError("You must submit as many models as there are instruments!")
         for ckpt_path in checkpoint_path:
             ckpts.append(torch.load(ckpt_path))
-        #self.ckpts = ckpt #torch.load(checkpoint_path)#, num_instrumments)
-        #ckpts = #tf2pytorch(checkpoint_path, num_instrumments)
         # filter
         self.instruments = nn.ModuleList()
@@ -109,7 +112,7 @@ class Estimator(nn.Module):
         pad = self.win_length // 2 + 1 - stft.size(1)
         stft = F.pad(stft, (0, 0, 0, 0, 0, pad))
         wav = torch.istft(stft, self.win_length, hop_length=self.hop_length, center=True,
-                    window=self.win)
         return wav.detach()
     def separate(self, wav):
@@ -145,14 +148,14 @@ class Estimator(nn.Module):
         wavs = []
         for mask in tqdm.tqdm(masks):
-            mask = (mask ** 2 + 1e-10/2)/(mask_sum)
             mask = mask.transpose(2, 3)  # B x 2 X F x T
             mask = torch.cat(
                 torch.split(mask, 1, dim=0), dim=3)
-            mask = mask.squeeze(0)[:,:,:L].unsqueeze(-1) # 2 x F x L x 1
-            stft_masked = stft *  mask
             wav_masked = self.inverse_stft(stft_masked)
             wavs.append(wav_masked)

 # from torchaudio.functional import istft
 from torchspleeter.unet import UNet
+# from .util import tf2pytorch
 import os
 dirname = os.path.dirname(__file__)
 defaultmodel0 = os.path.join(dirname, 'checkpoints/2stems/testcheckpoint0.ckpt')
 defaultmodel1 = os.path.join(dirname, 'checkpoints/2stems/testcheckpoint1.ckpt')
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 def load_ckpt(model, ckpt):
     state_dict = model.state_dict()
         tensor of size (B*[L/T] x C x F x T)
     """
     old_size = tensor.size(3)
+    new_size = math.ceil(old_size / T) * T
     tensor = F.pad(tensor, [0, new_size - old_size])
     [b, c, t, f] = tensor.shape
     split = new_size // T
     def __init__(self, num_instrumments=2, checkpoint_path=None):
         super(Estimator, self).__init__()
         if checkpoint_path is None:
+            checkpoint_path = [defaultmodel0, defaultmodel1]
         else:
+            if len(checkpoint_path) < 1:
+                checkpoint_path = [defaultmodel0, defaultmodel1]
         # stft config
         self.F = 1024
         self.T = 512
         self.win_length = 4096
         self.hop_length = 1024
         self.win = nn.Parameter(
+            torch.hann_window(self.win_length, device=device),
             requires_grad=False
         )
+        ckpts = []
         if len(checkpoint_path) != num_instrumments:
             raise ValueError("You must submit as many models as there are instruments!")
         for ckpt_path in checkpoint_path:
             ckpts.append(torch.load(ckpt_path))
+        # self.ckpts = ckpt #torch.load(checkpoint_path)#, num_instrumments)
+        # ckpts = #tf2pytorch(checkpoint_path, num_instrumments)
         # filter
         self.instruments = nn.ModuleList()
         pad = self.win_length // 2 + 1 - stft.size(1)
         stft = F.pad(stft, (0, 0, 0, 0, 0, pad))
         wav = torch.istft(stft, self.win_length, hop_length=self.hop_length, center=True,
+                          window=self.win)
         return wav.detach()
     def separate(self, wav):
         wavs = []
         for mask in tqdm.tqdm(masks):
+            mask = (mask ** 2 + 1e-10 / 2) / (mask_sum)
             mask = mask.transpose(2, 3)  # B x 2 X F x T
             mask = torch.cat(
                 torch.split(mask, 1, dim=0), dim=3)
+            mask = mask.squeeze(0)[:, :, :L].unsqueeze(-1)  # 2 x F x L x 1
+            stft_masked = stft * mask
             wav_masked = self.inverse_stft(stft_masked)
             wavs.append(wav_masked)