Upload folder using huggingface_hub
Browse files
bs_roformer/bs_inst_large2_unwa.ckpt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09251ab8b5bb892414a6ab8aa80a1be30c17852d5e7f4e76943610de049e4bc4
|
| 3 |
+
size 238214371
|
bs_roformer/bs_inst_large2_unwa_config.yaml
CHANGED
|
@@ -2,13 +2,12 @@ unwa_inst_large_2: true
|
|
| 2 |
audio:
|
| 3 |
chunk_size: 960000
|
| 4 |
dim_f: 1024
|
| 5 |
-
dim_t: 801
|
| 6 |
-
hop_length: 441
|
| 7 |
n_fft: 2048
|
| 8 |
num_channels: 2
|
| 9 |
sample_rate: 44100
|
| 10 |
min_mean_abs: 0.0001
|
| 11 |
-
|
| 12 |
model:
|
| 13 |
dim: 256
|
| 14 |
depth: 12
|
|
@@ -93,24 +92,24 @@ model:
|
|
| 93 |
mask_estimator_depth: 2
|
| 94 |
multi_stft_resolution_loss_weight: 1.0
|
| 95 |
multi_stft_resolutions_window_sizes: !!python/tuple
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
multi_stft_hop_size: 147
|
| 102 |
-
multi_stft_normalized:
|
| 103 |
mlp_expansion_factor: 4
|
| 104 |
-
use_torch_checkpoint:
|
| 105 |
-
skip_connection:
|
| 106 |
-
|
| 107 |
-
|
| 108 |
training:
|
| 109 |
batch_size: 1
|
| 110 |
gradient_accumulation_steps: 1
|
| 111 |
grad_clip: 0
|
| 112 |
-
instruments:
|
| 113 |
-
|
|
|
|
|
|
|
| 114 |
patience: 5
|
| 115 |
reduce_factor: 0.9
|
| 116 |
target_instrument: instrument
|
|
@@ -120,11 +119,9 @@ training:
|
|
| 120 |
coarse_loss_clip: true
|
| 121 |
ema_momentum: 0.999
|
| 122 |
optimizer: adam
|
| 123 |
-
other_fix: false
|
| 124 |
-
use_amp: true
|
| 125 |
-
|
| 126 |
-
|
| 127 |
inference:
|
| 128 |
-
batch_size:
|
| 129 |
dim_t: 1876
|
| 130 |
-
num_overlap:
|
|
|
|
| 2 |
audio:
|
| 3 |
chunk_size: 960000
|
| 4 |
dim_f: 1024
|
| 5 |
+
dim_t: 801
|
| 6 |
+
hop_length: 441
|
| 7 |
n_fft: 2048
|
| 8 |
num_channels: 2
|
| 9 |
sample_rate: 44100
|
| 10 |
min_mean_abs: 0.0001
|
|
|
|
| 11 |
model:
|
| 12 |
dim: 256
|
| 13 |
depth: 12
|
|
|
|
| 92 |
mask_estimator_depth: 2
|
| 93 |
multi_stft_resolution_loss_weight: 1.0
|
| 94 |
multi_stft_resolutions_window_sizes: !!python/tuple
|
| 95 |
+
- 4096
|
| 96 |
+
- 2048
|
| 97 |
+
- 1024
|
| 98 |
+
- 512
|
| 99 |
+
- 256
|
| 100 |
multi_stft_hop_size: 147
|
| 101 |
+
multi_stft_normalized: false
|
| 102 |
mlp_expansion_factor: 4
|
| 103 |
+
use_torch_checkpoint: true
|
| 104 |
+
skip_connection: false
|
|
|
|
|
|
|
| 105 |
training:
|
| 106 |
batch_size: 1
|
| 107 |
gradient_accumulation_steps: 1
|
| 108 |
grad_clip: 0
|
| 109 |
+
instruments:
|
| 110 |
+
- vocals
|
| 111 |
+
- instrument
|
| 112 |
+
lr: 1.0e-05
|
| 113 |
patience: 5
|
| 114 |
reduce_factor: 0.9
|
| 115 |
target_instrument: instrument
|
|
|
|
| 119 |
coarse_loss_clip: true
|
| 120 |
ema_momentum: 0.999
|
| 121 |
optimizer: adam
|
| 122 |
+
other_fix: false
|
| 123 |
+
use_amp: true
|
|
|
|
|
|
|
| 124 |
inference:
|
| 125 |
+
batch_size: 1
|
| 126 |
dim_t: 1876
|
| 127 |
+
num_overlap: 2
|