noblebarkrr commited on
Commit
7816cc8
·
verified ·
1 Parent(s): 8994ed0

Upload folder using huggingface_hub

Browse files
bs_roformer/bs_inst_large2_unwa.ckpt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c9c55ba413a5497cf7d0b1275ccd8f5a32a34f4909f33ee111606ce2a3e0649
3
- size 242408675
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09251ab8b5bb892414a6ab8aa80a1be30c17852d5e7f4e76943610de049e4bc4
3
+ size 238214371
bs_roformer/bs_inst_large2_unwa_config.yaml CHANGED
@@ -2,13 +2,12 @@ unwa_inst_large_2: true
2
  audio:
3
  chunk_size: 960000
4
  dim_f: 1024
5
- dim_t: 801 # don't work (use in model)
6
- hop_length: 441 # don't work (use in model)
7
  n_fft: 2048
8
  num_channels: 2
9
  sample_rate: 44100
10
  min_mean_abs: 0.0001
11
-
12
  model:
13
  dim: 256
14
  depth: 12
@@ -93,24 +92,24 @@ model:
93
  mask_estimator_depth: 2
94
  multi_stft_resolution_loss_weight: 1.0
95
  multi_stft_resolutions_window_sizes: !!python/tuple
96
- - 4096
97
- - 2048
98
- - 1024
99
- - 512
100
- - 256
101
  multi_stft_hop_size: 147
102
- multi_stft_normalized: False
103
  mlp_expansion_factor: 4
104
- use_torch_checkpoint: True
105
- skip_connection: False
106
-
107
-
108
  training:
109
  batch_size: 1
110
  gradient_accumulation_steps: 1
111
  grad_clip: 0
112
- instruments: ['vocals', 'instrument']
113
- lr: 1.0e-5
 
 
114
  patience: 5
115
  reduce_factor: 0.9
116
  target_instrument: instrument
@@ -120,11 +119,9 @@ training:
120
  coarse_loss_clip: true
121
  ema_momentum: 0.999
122
  optimizer: adam
123
- other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
124
- use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
125
-
126
-
127
  inference:
128
- batch_size: 2
129
  dim_t: 1876
130
- num_overlap: 4
 
2
  audio:
3
  chunk_size: 960000
4
  dim_f: 1024
5
+ dim_t: 801
6
+ hop_length: 441
7
  n_fft: 2048
8
  num_channels: 2
9
  sample_rate: 44100
10
  min_mean_abs: 0.0001
 
11
  model:
12
  dim: 256
13
  depth: 12
 
92
  mask_estimator_depth: 2
93
  multi_stft_resolution_loss_weight: 1.0
94
  multi_stft_resolutions_window_sizes: !!python/tuple
95
+ - 4096
96
+ - 2048
97
+ - 1024
98
+ - 512
99
+ - 256
100
  multi_stft_hop_size: 147
101
+ multi_stft_normalized: false
102
  mlp_expansion_factor: 4
103
+ use_torch_checkpoint: true
104
+ skip_connection: false
 
 
105
  training:
106
  batch_size: 1
107
  gradient_accumulation_steps: 1
108
  grad_clip: 0
109
+ instruments:
110
+ - vocals
111
+ - instrument
112
+ lr: 1.0e-05
113
  patience: 5
114
  reduce_factor: 0.9
115
  target_instrument: instrument
 
119
  coarse_loss_clip: true
120
  ema_momentum: 0.999
121
  optimizer: adam
122
+ other_fix: false
123
+ use_amp: true
 
 
124
  inference:
125
+ batch_size: 1
126
  dim_t: 1876
127
+ num_overlap: 2