Upload folder using huggingface_hub

Files changed (2) hide show

bs_roformer/bs_inst_large2_unwa.ckpt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4c9c55ba413a5497cf7d0b1275ccd8f5a32a34f4909f33ee111606ce2a3e0649
-size 242408675

 version https://git-lfs.github.com/spec/v1
+oid sha256:09251ab8b5bb892414a6ab8aa80a1be30c17852d5e7f4e76943610de049e4bc4
+size 238214371

bs_roformer/bs_inst_large2_unwa_config.yaml CHANGED Viewed

@@ -2,13 +2,12 @@ unwa_inst_large_2: true
 audio:
   chunk_size: 960000
   dim_f: 1024
-  dim_t: 801 # don't work (use in model)
-  hop_length: 441 # don't work (use in model)
   n_fft: 2048
   num_channels: 2
   sample_rate: 44100
   min_mean_abs: 0.0001
 model:
   dim: 256
   depth: 12
@@ -93,24 +92,24 @@ model:
   mask_estimator_depth: 2
   multi_stft_resolution_loss_weight: 1.0
   multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
   multi_stft_hop_size: 147
-  multi_stft_normalized: False
   mlp_expansion_factor: 4
-  use_torch_checkpoint: True
-  skip_connection: False
 training:
   batch_size: 1
   gradient_accumulation_steps: 1
   grad_clip: 0
-  instruments: ['vocals', 'instrument']
-  lr: 1.0e-5
   patience: 5
   reduce_factor: 0.9
   target_instrument: instrument
@@ -120,11 +119,9 @@ training:
   coarse_loss_clip: true
   ema_momentum: 0.999
   optimizer: adam
-  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
-  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
 inference:
-  batch_size: 2
   dim_t: 1876
-  num_overlap: 4

 audio:
   chunk_size: 960000
   dim_f: 1024
+  dim_t: 801
+  hop_length: 441
   n_fft: 2048
   num_channels: 2
   sample_rate: 44100
   min_mean_abs: 0.0001
 model:
   dim: 256
   depth: 12
   mask_estimator_depth: 2
   multi_stft_resolution_loss_weight: 1.0
   multi_stft_resolutions_window_sizes: !!python/tuple
+    - 4096
+    - 2048
+    - 1024
+    - 512
+    - 256
   multi_stft_hop_size: 147
+  multi_stft_normalized: false
   mlp_expansion_factor: 4
+  use_torch_checkpoint: true
+  skip_connection: false
 training:
   batch_size: 1
   gradient_accumulation_steps: 1
   grad_clip: 0
+  instruments:
+    - vocals
+    - instrument
+  lr: 1.0e-05
   patience: 5
   reduce_factor: 0.9
   target_instrument: instrument
   coarse_loss_clip: true
   ema_momentum: 0.999
   optimizer: adam
+  other_fix: false
+  use_amp: true
 inference:
+  batch_size: 1
   dim_t: 1876
+  num_overlap: 2