noblebarkrr commited on
Commit
e52ead1
·
verified ·
1 Parent(s): 9bdf3cc

Upload folder using huggingface_hub

Browse files
bs_instvoc_hyperace2_unwa_merged.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0e43ed9016a73ab86cfa04d7a4266f96a9e91c9959462a88bf8eebe8f78c24e
3
+ size 471684533
bs_instvoc_hyperace2_unwa_merged_config.yaml ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hyperace2: true
2
+ audio:
3
+ chunk_size: 960000
4
+ dim_f: 1024
5
+ dim_t: 801
6
+ hop_length: 441
7
+ n_fft: 2048
8
+ num_channels: 2
9
+ sample_rate: 44100
10
+ min_mean_abs: 0.0001
11
+ model:
12
+ dim: 256
13
+ depth: 12
14
+ stereo: true
15
+ num_stems: 2
16
+ time_transformer_depth: 1
17
+ freq_transformer_depth: 1
18
+ linear_transformer_depth: 0
19
+ freqs_per_bands: !!python/tuple
20
+ - 2
21
+ - 2
22
+ - 2
23
+ - 2
24
+ - 2
25
+ - 2
26
+ - 2
27
+ - 2
28
+ - 2
29
+ - 2
30
+ - 2
31
+ - 2
32
+ - 2
33
+ - 2
34
+ - 2
35
+ - 2
36
+ - 2
37
+ - 2
38
+ - 2
39
+ - 2
40
+ - 2
41
+ - 2
42
+ - 2
43
+ - 2
44
+ - 4
45
+ - 4
46
+ - 4
47
+ - 4
48
+ - 4
49
+ - 4
50
+ - 4
51
+ - 4
52
+ - 4
53
+ - 4
54
+ - 4
55
+ - 4
56
+ - 12
57
+ - 12
58
+ - 12
59
+ - 12
60
+ - 12
61
+ - 12
62
+ - 12
63
+ - 12
64
+ - 24
65
+ - 24
66
+ - 24
67
+ - 24
68
+ - 24
69
+ - 24
70
+ - 24
71
+ - 24
72
+ - 48
73
+ - 48
74
+ - 48
75
+ - 48
76
+ - 48
77
+ - 48
78
+ - 48
79
+ - 48
80
+ - 128
81
+ - 129
82
+ dim_head: 64
83
+ heads: 8
84
+ attn_dropout: 0.0
85
+ ff_dropout: 0.0
86
+ flash_attn: true
87
+ dim_freqs_in: 1025
88
+ stft_n_fft: 2048
89
+ stft_hop_length: 512
90
+ stft_win_length: 2048
91
+ stft_normalized: false
92
+ mask_estimator_depth: 2
93
+ multi_stft_resolution_loss_weight: 1.0
94
+ multi_stft_resolutions_window_sizes: !!python/tuple
95
+ - 4096
96
+ - 2048
97
+ - 1024
98
+ - 512
99
+ - 256
100
+ multi_stft_hop_size: 147
101
+ multi_stft_normalized: false
102
+ mlp_expansion_factor: 4
103
+ use_torch_checkpoint: true
104
+ skip_connection: false
105
+ training:
106
+ batch_size: 1
107
+ gradient_accumulation_steps: 1
108
+ grad_clip: 0
109
+ instruments:
110
+ - instrument
111
+ - vocals
112
+ lr: 1.0e-05
113
+ patience: 5
114
+ reduce_factor: 0.9
115
+ target_instrument: null
116
+ num_epochs: 1000
117
+ num_steps: 1000
118
+ q: 0.95
119
+ coarse_loss_clip: true
120
+ ema_momentum: 0.999
121
+ optimizer: adam
122
+ other_fix: false
123
+ use_amp: true
124
+ inference:
125
+ batch_size: 1
126
+ dim_t: 1876
127
+ num_overlap: 2
bs_resurrection_instvoc_unwa_merged.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44945fba879315bf3c967d860127cfe1eb02375bfc6ddec72352f9dd4b45ebfe
3
+ size 303400601
bs_resurrection_instvoc_unwa_merged_config.yaml ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 749259
3
+ dim_f: 1024
4
+ dim_t: 1700
5
+ hop_length: 441
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.0
10
+ model:
11
+ dim: 256
12
+ depth: 12
13
+ stereo: true
14
+ num_stems: 2
15
+ time_transformer_depth: 1
16
+ freq_transformer_depth: 1
17
+ linear_transformer_depth: 0
18
+ freqs_per_bands: !!python/tuple
19
+ - 2
20
+ - 2
21
+ - 2
22
+ - 2
23
+ - 2
24
+ - 2
25
+ - 2
26
+ - 2
27
+ - 2
28
+ - 2
29
+ - 2
30
+ - 2
31
+ - 2
32
+ - 2
33
+ - 2
34
+ - 2
35
+ - 2
36
+ - 2
37
+ - 2
38
+ - 2
39
+ - 2
40
+ - 2
41
+ - 2
42
+ - 2
43
+ - 4
44
+ - 4
45
+ - 4
46
+ - 4
47
+ - 4
48
+ - 4
49
+ - 4
50
+ - 4
51
+ - 4
52
+ - 4
53
+ - 4
54
+ - 4
55
+ - 12
56
+ - 12
57
+ - 12
58
+ - 12
59
+ - 12
60
+ - 12
61
+ - 12
62
+ - 12
63
+ - 24
64
+ - 24
65
+ - 24
66
+ - 24
67
+ - 24
68
+ - 24
69
+ - 24
70
+ - 24
71
+ - 48
72
+ - 48
73
+ - 48
74
+ - 48
75
+ - 48
76
+ - 48
77
+ - 48
78
+ - 48
79
+ - 128
80
+ - 129
81
+ dim_head: 64
82
+ heads: 8
83
+ attn_dropout: 0.0
84
+ ff_dropout: 0.0
85
+ flash_attn: true
86
+ dim_freqs_in: 1025
87
+ stft_n_fft: 2048
88
+ stft_hop_length: 441
89
+ stft_win_length: 2048
90
+ stft_normalized: false
91
+ mask_estimator_depth: 2
92
+ multi_stft_resolution_loss_weight: 1.0
93
+ multi_stft_resolutions_window_sizes: !!python/tuple
94
+ - 4096
95
+ - 2048
96
+ - 1024
97
+ - 512
98
+ - 256
99
+ multi_stft_hop_size: 147
100
+ multi_stft_normalized: false
101
+ mlp_expansion_factor: 4
102
+ use_torch_checkpoint: false
103
+ skip_connection: false
104
+ training:
105
+ batch_size: 2
106
+ gradient_accumulation_steps: 1
107
+ grad_clip: 0
108
+ instruments:
109
+ - other
110
+ - vocals
111
+ patience: 3
112
+ reduce_factor: 0.95
113
+ target_instrument: null
114
+ num_epochs: 1000
115
+ num_steps: 1000
116
+ augmentation: false
117
+ augmentation_type: simple1
118
+ use_mp3_compress: false
119
+ augmentation_mix: true
120
+ augmentation_loudness: true
121
+ augmentation_loudness_type: 1
122
+ augmentation_loudness_min: 0.5
123
+ augmentation_loudness_max: 1.5
124
+ q: 0.95
125
+ coarse_loss_clip: true
126
+ ema_momentum: 0.999
127
+ optimizer: adam
128
+ lr: 1.0e-05
129
+ other_fix: false
130
+ use_amp: true
131
+ inference:
132
+ batch_size: 1
133
+ dim_t: 1700
134
+ num_overlap: 2
135
+ normalize: false
mbr_instvoc_becruily_merged.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d295f23a00926fbfbc4416cb8c54b61f85e8113881840372114d24be2002f7d8
3
+ size 1719017037
mbr_instvoc_becruily_merged_config.yaml ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 352800
3
+ dim_f: 1024
4
+ dim_t: 256
5
+ hop_length: 441
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.0
10
+ model:
11
+ dim: 384
12
+ depth: 6
13
+ stereo: true
14
+ num_stems: 2
15
+ time_transformer_depth: 1
16
+ freq_transformer_depth: 1
17
+ num_bands: 60
18
+ dim_head: 64
19
+ heads: 8
20
+ attn_dropout: 0
21
+ ff_dropout: 0
22
+ flash_attn: true
23
+ dim_freqs_in: 1025
24
+ sample_rate: 44100
25
+ stft_n_fft: 2048
26
+ stft_hop_length: 441
27
+ stft_win_length: 2048
28
+ stft_normalized: false
29
+ mask_estimator_depth: 2
30
+ multi_stft_resolution_loss_weight: 1.0
31
+ multi_stft_resolutions_window_sizes: !!python/tuple
32
+ - 4096
33
+ - 2048
34
+ - 1024
35
+ - 512
36
+ - 256
37
+ multi_stft_hop_size: 147
38
+ multi_stft_normalized: false
39
+ training:
40
+ batch_size: 1
41
+ gradient_accumulation_steps: 1
42
+ grad_clip: 0
43
+ instruments:
44
+ - Instrumental
45
+ - Vocals
46
+ lr: 0.0005
47
+ patience: 2
48
+ reduce_factor: 0.95
49
+ target_instrument: null
50
+ num_epochs: 1000
51
+ num_steps: 1000
52
+ augmentation: false
53
+ augmentation_type: null
54
+ use_mp3_compress: false
55
+ augmentation_mix: false
56
+ augmentation_loudness: false
57
+ augmentation_loudness_type: 1
58
+ augmentation_loudness_min: 0
59
+ augmentation_loudness_max: 0
60
+ q: 0.95
61
+ coarse_loss_clip: false
62
+ ema_momentum: 0.999
63
+ optimizer: adamw
64
+ other_fix: false
65
+ use_amp: true
66
+ inference:
67
+ batch_size: 1
68
+ dim_t: 1101
69
+ num_overlap: 2