miercolesv commited on
Commit
22ab82a
·
verified ·
1 Parent(s): e60f27f

Add files using upload-large-folder tool

Browse files
Files changed (49) hide show
  1. bsroformers/bs_karaoke_gabox_IS.ckpt +3 -0
  2. bsroformers/karaoke_bs_roformer.yaml +129 -0
  3. melbandroformers/experimental/BS_ResurrectioN.ckpt +3 -0
  4. melbandroformers/experimental/Fullness.ckpt +3 -0
  5. melbandroformers/experimental/INSTV10.ckpt +3 -0
  6. melbandroformers/experimental/INSTV8.ckpt +3 -0
  7. melbandroformers/experimental/INSTV8N.ckpt +3 -0
  8. melbandroformers/experimental/INSTV9.ckpt +3 -0
  9. melbandroformers/experimental/Inst_FV8b.ckpt +3 -0
  10. melbandroformers/experimental/Inst_Fv8.ckpt +3 -0
  11. melbandroformers/experimental/Inst_Fv9.ckpt +3 -0
  12. melbandroformers/experimental/Karaoke_GaboxV2.ckpt +3 -0
  13. melbandroformers/experimental/Lead_VocalDereverb.ckpt +3 -0
  14. melbandroformers/experimental/inst_fv7b.ckpt +3 -0
  15. melbandroformers/experimental/instv7beta.ckpt +3 -0
  16. melbandroformers/experimental/instv7beta2.ckpt +3 -0
  17. melbandroformers/experimental/instv7beta3.ckpt +3 -0
  18. melbandroformers/experimental/instv7plus.ckpt +3 -0
  19. melbandroformers/experimental/kar_gabox.ckpt +3 -0
  20. melbandroformers/experimental/small_inst.ckpt +3 -0
  21. melbandroformers/experimental/vocfv7beta1.ckpt +3 -0
  22. melbandroformers/experimental/vocfv7beta2.ckpt +3 -0
  23. melbandroformers/experimental/vocfv7beta3.ckpt +3 -0
  24. melbandroformers/instrumental/INSTV5.ckpt +3 -0
  25. melbandroformers/instrumental/INSTV5N.ckpt +3 -0
  26. melbandroformers/instrumental/INSTV6.ckpt +3 -0
  27. melbandroformers/instrumental/INSTV6N.ckpt +3 -0
  28. melbandroformers/instrumental/INSTV7N.ckpt +3 -0
  29. melbandroformers/instrumental/Inst_ExperimentalV1.ckpt +3 -0
  30. melbandroformers/instrumental/Inst_GaboxFVX.ckpt +3 -0
  31. melbandroformers/instrumental/Inst_GaboxFv7z.ckpt +3 -0
  32. melbandroformers/instrumental/Inst_GaboxFv8.ckpt +3 -0
  33. melbandroformers/instrumental/Inst_GaboxFv9.ckpt +3 -0
  34. melbandroformers/instrumental/Inst_GaboxV7.ckpt +3 -0
  35. melbandroformers/instrumental/denoisedebleed.ckpt +3 -0
  36. melbandroformers/instrumental/inst_Fv4.ckpt +3 -0
  37. melbandroformers/instrumental/inst_Fv4Noise.ckpt +3 -0
  38. melbandroformers/instrumental/inst_gabox.ckpt +3 -0
  39. melbandroformers/instrumental/inst_gabox.yaml +51 -0
  40. melbandroformers/instrumental/inst_gaboxBv1.ckpt +3 -0
  41. melbandroformers/instrumental/intrumental_gabox.ckpt +3 -0
  42. melbandroformers/instrumental/v10.yaml +73 -0
  43. melbandroformers/karaoke/karaokegabox_1750911344.yaml +72 -0
  44. melbandroformers/vocals/voc_Fv3.ckpt +3 -0
  45. melbandroformers/vocals/voc_fv4.ckpt +3 -0
  46. melbandroformers/vocals/voc_fv5.ckpt +3 -0
  47. melbandroformers/vocals/voc_gabox.ckpt +3 -0
  48. melbandroformers/vocals/voc_gabox.yaml +51 -0
  49. melbandroformers/vocals/voc_gaboxFv2.ckpt +3 -0
bsroformers/bs_karaoke_gabox_IS.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db8357825398d4231031ad1ab4aa12a94bcaad8d67e8ce5e4b3c5b48fdee1d4f
3
+ size 204483448
bsroformers/karaoke_bs_roformer.yaml ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 352800
3
+ dim_f: 1024
4
+ dim_t: 801 # don't work (use in model)
5
+ hop_length: 441 # don't work (use in model)
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.000
10
+
11
+ model:
12
+ dim: 256
13
+ depth: 12
14
+ stereo: true
15
+ num_stems: 1
16
+ time_transformer_depth: 1
17
+ freq_transformer_depth: 1
18
+ linear_transformer_depth: 0
19
+ freqs_per_bands: !!python/tuple
20
+ - 2
21
+ - 2
22
+ - 2
23
+ - 2
24
+ - 2
25
+ - 2
26
+ - 2
27
+ - 2
28
+ - 2
29
+ - 2
30
+ - 2
31
+ - 2
32
+ - 2
33
+ - 2
34
+ - 2
35
+ - 2
36
+ - 2
37
+ - 2
38
+ - 2
39
+ - 2
40
+ - 2
41
+ - 2
42
+ - 2
43
+ - 2
44
+ - 4
45
+ - 4
46
+ - 4
47
+ - 4
48
+ - 4
49
+ - 4
50
+ - 4
51
+ - 4
52
+ - 4
53
+ - 4
54
+ - 4
55
+ - 4
56
+ - 12
57
+ - 12
58
+ - 12
59
+ - 12
60
+ - 12
61
+ - 12
62
+ - 12
63
+ - 12
64
+ - 24
65
+ - 24
66
+ - 24
67
+ - 24
68
+ - 24
69
+ - 24
70
+ - 24
71
+ - 24
72
+ - 48
73
+ - 48
74
+ - 48
75
+ - 48
76
+ - 48
77
+ - 48
78
+ - 48
79
+ - 48
80
+ - 128
81
+ - 129
82
+ dim_head: 64
83
+ heads: 8
84
+ attn_dropout: 0.0
85
+ ff_dropout: 0.0
86
+ flash_attn: true
87
+ dim_freqs_in: 1025
88
+ stft_n_fft: 2048
89
+ stft_hop_length: 512
90
+ stft_win_length: 2048
91
+ stft_normalized: false
92
+ mask_estimator_depth: 2
93
+ multi_stft_resolution_loss_weight: 1.0
94
+ multi_stft_resolutions_window_sizes: !!python/tuple
95
+ - 4096
96
+ - 2048
97
+ - 1024
98
+ - 512
99
+ - 256
100
+ multi_stft_hop_size: 147
101
+ multi_stft_normalized: False
102
+ mlp_expansion_factor: 4
103
+ use_torch_checkpoint: True
104
+ skip_connection: False
105
+
106
+
107
+ training:
108
+ batch_size: 1
109
+ gradient_accumulation_steps: 999
110
+ grad_clip: 1
111
+ instruments: ['vocals', 'other']
112
+ lr: 1.0e-5
113
+ patience: 1000000
114
+ reduce_factor: 0.75
115
+ target_instrument: vocals
116
+ num_epochs: 1000
117
+ num_steps: 1000
118
+ q: 0.95
119
+ coarse_loss_clip: true
120
+ ema_momentum: 0.999
121
+ optimizer: Fira
122
+ other_fix: True # it's needed for checking on multisong dataset if other is actually instrumental
123
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
124
+ use_torch_checkpoint: True
125
+
126
+ inference:
127
+ batch_size: 6
128
+ dim_t: 1251
129
+ num_overlap: 2
melbandroformers/experimental/BS_ResurrectioN.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d28b3a422531e1ca482d29302a5b67614eb45c09d72ce954258011ce24ef0919
3
+ size 204483033
melbandroformers/experimental/Fullness.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c67f47fa84146c8182f0061c194a51782439fe732e6ffecb810f0e4af7026f6d
3
+ size 913091027
melbandroformers/experimental/INSTV10.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eb987c3fd93c66adfea2e4284d1da668e80a98f75b6436b4be454780a728182
3
+ size 913026650
melbandroformers/experimental/INSTV8.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d51dae045064a5817cd7588c46bfe2f93542d4dbe66c1e1a6e75ca081299a53
3
+ size 913026650
melbandroformers/experimental/INSTV8N.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25920c876e601d4ccbf1684b19ecdac4b9fcfcc7f48d2c095d81040ec3fecbea
3
+ size 913026650
melbandroformers/experimental/INSTV9.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b689ffa942c48353bbad9e8df7b96d61a24b5c50256b0052579b327552565c8
3
+ size 913026650
melbandroformers/experimental/Inst_FV8b.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a28f79ae38b92caaeb37c5ccdf3912316b135414353ec0d5553d507e3de8ef7
3
+ size 913027055
melbandroformers/experimental/Inst_Fv8.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:026caebb240226f64b1611e623735bb14c042f772464292aa1a26e896a759cc5
3
+ size 913027055
melbandroformers/experimental/Inst_Fv9.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6c02cd154bb49d6d08044a8dc935a94ec26d09f2a0e6a03cc80a65d05a98ff2
3
+ size 913090472
melbandroformers/experimental/Karaoke_GaboxV2.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec34be50327aeaf1a996c27977f5c30d1ac80c0076d69683d3e5184c31ea29d3
3
+ size 913090472
melbandroformers/experimental/Lead_VocalDereverb.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:097c13b298414905651867ef315d881bbf2604a5c04be28820ed5946338b3eb4
3
+ size 913031195
melbandroformers/experimental/inst_fv7b.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cf9538616c835575eb07bef1fe4550b2406c852919a5644d22e9ae84a2ec6ba
3
+ size 913030778
melbandroformers/experimental/instv7beta.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b4f12d81fd7fb02f38609216d59f0e42b3dca655fd90ca275ab5321b3e4d9aa
3
+ size 913026650
melbandroformers/experimental/instv7beta2.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11dcd18b2bca5ff189924145a52c9e1a145632c7659aac59b239221dca4f1703
3
+ size 913026650
melbandroformers/experimental/instv7beta3.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc33cc26ab805b057e47a753241bc17102564af4bc1c0e76364f3bb6a31c09d2
3
+ size 913026650
melbandroformers/experimental/instv7plus.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7baf1aab873e534a186e2dcf6e64bebad2dbf3e2c49337729487294539118232
3
+ size 913031195
melbandroformers/experimental/kar_gabox.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:303fc631e7aa587e9dc1e6ac4bb3667c6ba53aacb6b6a90abcfcf57935b92bd8
3
+ size 913026650
melbandroformers/experimental/small_inst.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31c0a91d60aac2bdc6ee4a53bf7ebd0370e6e894f6b22a1e37511e05eecca335
3
+ size 202573672
melbandroformers/experimental/vocfv7beta1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bb98488b80e7797e6f447c0dd8f89f2b7040eebc5e839b5a59871b7a3a19265
3
+ size 913091027
melbandroformers/experimental/vocfv7beta2.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85bcaa03568f96e56b245302c177658e8cb83aa6e2f4ae260e67202db9d56123
3
+ size 913030778
melbandroformers/experimental/vocfv7beta3.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45463b0051651b7ed15677cc41f21bd384d66796938f46955617bf6f1d661988
3
+ size 913030778
melbandroformers/instrumental/INSTV5.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38e935cf1e97afcc1de84d0bdb87dd8090bad530fa0df28e707d16448e1d38e2
3
+ size 913026650
melbandroformers/instrumental/INSTV5N.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:175203923fac3e52ae00e7e37d41e8a7fef5020b6ee4e4144f4786daabc54b34
3
+ size 913026650
melbandroformers/instrumental/INSTV6.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:677951b8556a27abe32e39705640638826e78101fa901a51ad73d20522be6d25
3
+ size 913026650
melbandroformers/instrumental/INSTV6N.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:802f3e5d183d7c4b50dea147c320e61634f5be6ff55fa899fdebeaf0f3cf7f42
3
+ size 913026650
melbandroformers/instrumental/INSTV7N.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0ca36af5d1314be46b56c8a53b6be02f98511fa5d7e3e196fd895755e65be3c
3
+ size 913026650
melbandroformers/instrumental/Inst_ExperimentalV1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb19b968287d31b6d229816342aa7f08f7e4010b7c61be2427fbdb0c04a8a020
3
+ size 913026650
melbandroformers/instrumental/Inst_GaboxFVX.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:545ef13b0cdbac505818a38db98e09c54e7c03ea17b4e0c895a531bfa352fa59
3
+ size 913026650
melbandroformers/instrumental/Inst_GaboxFv7z.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef229f0dcd370c1767e4292981c59e5248814da45f32bfacebcc0f28adaa30a1
3
+ size 913031195
melbandroformers/instrumental/Inst_GaboxFv8.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50bfa127d21f419e0da89730867d28c7ac4484c9473e6f313d036bc8b429df80
3
+ size 913026650
melbandroformers/instrumental/Inst_GaboxFv9.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:068d29a54e71f69ed871ca410e09b9877e153ad1439e825bd093573ea4de5762
3
+ size 913091027
melbandroformers/instrumental/Inst_GaboxV7.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e725a860176acb475d983a1ddd9c1a99a619c69cc9ceda808dd294d10db746a5
3
+ size 913026650
melbandroformers/instrumental/denoisedebleed.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91aa7a546ed2e93482e4629c982d35b0d258bb3de6eeab497fd91658cc86c7fd
3
+ size 913026650
melbandroformers/instrumental/inst_Fv4.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b03d9bf0fc643dabf006dee38fd798140b4884c1b65c573b4488c1a2a876ad84
3
+ size 913091027
melbandroformers/instrumental/inst_Fv4Noise.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f50296e913b9af3b5b3b961e92877ef0d4a74f9a433e796e89960c4c2b1abe53
3
+ size 913026650
melbandroformers/instrumental/inst_gabox.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b3ad6bd8bed3aaaa4d9320ea2ca910d140196a2302186db1754f3a8d8e16fb1
3
+ size 913026650
melbandroformers/instrumental/inst_gabox.yaml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 485100
3
+ dim_f: 1024
4
+ dim_t: 1101
5
+ hop_length: 441
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.000
10
+
11
+ model:
12
+ dim: 384
13
+ depth: 6
14
+ stereo: true
15
+ num_stems: 1
16
+ time_transformer_depth: 1
17
+ freq_transformer_depth: 1
18
+ num_bands: 60
19
+ dim_head: 64
20
+ heads: 8
21
+ attn_dropout: 0
22
+ ff_dropout: 0
23
+ flash_attn: True
24
+ dim_freqs_in: 1025
25
+ sample_rate: 44100 # needed for mel filter bank from librosa
26
+ stft_n_fft: 2048
27
+ stft_hop_length: 441
28
+ stft_win_length: 2048
29
+ stft_normalized: False
30
+ mask_estimator_depth: 2
31
+ multi_stft_resolution_loss_weight: 1.0
32
+ multi_stft_resolutions_window_sizes: !!python/tuple
33
+ - 4096
34
+ - 2048
35
+ - 1024
36
+ - 512
37
+ - 256
38
+ multi_stft_hop_size: 147
39
+ multi_stft_normalized: False
40
+
41
+ training:
42
+ instruments:
43
+ - Instrumental
44
+ - Vocals
45
+ target_instrument: Instrumental
46
+ use_amp: True
47
+
48
+ inference:
49
+ batch_size: 1
50
+ dim_t: 1101
51
+ num_overlap: 2
melbandroformers/instrumental/inst_gaboxBv1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de972fb724601beef237abe94c8b934c73218e9baf3e344ab4c2333276e5cfe7
3
+ size 913026650
melbandroformers/instrumental/intrumental_gabox.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b3ad6bd8bed3aaaa4d9320ea2ca910d140196a2302186db1754f3a8d8e16fb1
3
+ size 913026650
melbandroformers/instrumental/v10.yaml ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 352800
3
+ dim_f: 1024
4
+ dim_t: 256
5
+ hop_length: 441
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.00
10
+
11
+ model:
12
+ dim: 256
13
+ depth: 12
14
+ stereo: true
15
+ num_stems: 1
16
+ time_transformer_depth: 1
17
+ freq_transformer_depth: 1
18
+ num_bands: 60
19
+ dim_head: 64
20
+ heads: 8
21
+ attn_dropout: 0
22
+ ff_dropout: 0
23
+ flash_attn: true
24
+ dim_freqs_in: 1025
25
+ sample_rate: 44100
26
+ stft_n_fft: 2048
27
+ stft_hop_length: 441
28
+ stft_win_length: 2048
29
+ stft_normalized: true
30
+ mask_estimator_depth: 2
31
+ multi_stft_resolution_loss_weight: 1.0
32
+ multi_stft_resolutions_window_sizes: !!python/tuple
33
+ - 4096
34
+ - 2048
35
+ - 1024
36
+ - 512
37
+ - 256
38
+ multi_stft_hop_size: 250
39
+ multi_stft_normalized: false
40
+ use_torch_checkpoint: true
41
+
42
+ training:
43
+ batch_size: 1
44
+ gradient_accumulation_steps: 999999999999999999999999
45
+ grad_clip: 0
46
+ instruments:
47
+ - other
48
+ - vocals
49
+ lr: 0.00001
50
+ patience: 100000000
51
+ reduce_factor: 0.95
52
+ target_instrument: other
53
+ num_epochs: 1000
54
+ num_steps: 1000
55
+ augmentation: false # enable augmentations by audiomentations and pedalboard
56
+ augmentation_type: simple1
57
+ use_mp3_compress: false # Deprecated
58
+ augmentation_mix: false # Mix several stems of the same type with some probability
59
+ augmentation_loudness: true # randomly change loudness of each stem
60
+ augmentation_loudness_type: 1 # Type 1 or 2
61
+ augmentation_loudness_min: 0
62
+ augmentation_loudness_max: 0
63
+ q: 0.95
64
+ coarse_loss_clip: false
65
+ ema_momentum: 0.999
66
+ optimizer: adamw
67
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
68
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
69
+
70
+ inference:
71
+ batch_size: 1
72
+ dim_t: 1101
73
+ num_overlap: 2
melbandroformers/karaoke/karaokegabox_1750911344.yaml ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 485100
3
+ dim_f: 1024
4
+ dim_t: 256
5
+ hop_length: 441
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.000
10
+
11
+ model:
12
+ dim: 384
13
+ depth: 6
14
+ stereo: true
15
+ num_stems: 1
16
+ time_transformer_depth: 1
17
+ freq_transformer_depth: 1
18
+ num_bands: 60
19
+ dim_head: 64
20
+ heads: 8
21
+ attn_dropout: 0
22
+ ff_dropout: 0
23
+ flash_attn: true
24
+ dim_freqs_in: 1025
25
+ sample_rate: 44100 # needed for mel filter bank from librosa
26
+ stft_n_fft: 2048
27
+ stft_hop_length: 441
28
+ stft_win_length: 2048
29
+ stft_normalized: false
30
+ mask_estimator_depth: 2
31
+ multi_stft_resolution_loss_weight: 1.0
32
+ multi_stft_resolutions_window_sizes: !!python/tuple
33
+ - 4096
34
+ - 2048
35
+ - 1024
36
+ - 512
37
+ - 256
38
+ multi_stft_hop_size: 147
39
+ multi_stft_normalized: true
40
+
41
+ training:
42
+ batch_size: 1
43
+ gradient_accumulation_steps: 1
44
+ grad_clip: 0
45
+ instruments:
46
+ - Vocals
47
+ - Instrumental
48
+ lr: 0.0005
49
+ patience: 2
50
+ reduce_factor: 0.95
51
+ target_instrument: Vocals
52
+ num_epochs: 1000
53
+ num_steps: 1000
54
+ augmentation: false # enable augmentations by audiomentations and pedalboard
55
+ augmentation_type:
56
+ use_mp3_compress: false # Deprecated
57
+ augmentation_mix: false # Mix several stems of the same type with some probability
58
+ augmentation_loudness: false # randomly change loudness of each stem
59
+ augmentation_loudness_type: 1 # Type 1 or 2
60
+ augmentation_loudness_min: 0
61
+ augmentation_loudness_max: 0
62
+ q: 0.95
63
+ coarse_loss_clip: false
64
+ ema_momentum: 0.999
65
+ optimizer: adamw
66
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
67
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
68
+
69
+ inference:
70
+ batch_size: 1
71
+ dim_t: 1101
72
+ num_overlap: 8
melbandroformers/vocals/voc_Fv3.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49d81446b34a7848446efde7898b25bdc32fe872c2393617acb5356649f7ea93
3
+ size 913026650
melbandroformers/vocals/voc_fv4.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a9657de5fd3ed87ad4fd1a9d2069743ecb33424836973ad0f3288e2a64e90bc
3
+ size 913026650
melbandroformers/vocals/voc_fv5.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ede0504ddc55cb44b966a8212dac75a364f8157974cc40c8e92b9f5d4f17ce2
3
+ size 913026650
melbandroformers/vocals/voc_gabox.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff802a67501fac70587c3ff4e8dbc89c2558e7d8911c92222dfea2aaac208517
3
+ size 913026650
melbandroformers/vocals/voc_gabox.yaml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 352800
3
+ dim_f: 1024
4
+ dim_t: 256
5
+ hop_length: 441
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.001
10
+
11
+ model:
12
+ dim: 384
13
+ depth: 6
14
+ stereo: true
15
+ num_stems: 1
16
+ time_transformer_depth: 1
17
+ freq_transformer_depth: 1
18
+ num_bands: 60
19
+ dim_head: 64
20
+ heads: 8
21
+ attn_dropout: 0
22
+ ff_dropout: 0
23
+ flash_attn: True
24
+ dim_freqs_in: 1025
25
+ sample_rate: 44100 # needed for mel filter bank from librosa
26
+ stft_n_fft: 2048
27
+ stft_hop_length: 441
28
+ stft_win_length: 2048
29
+ stft_normalized: False
30
+ mask_estimator_depth: 2
31
+ multi_stft_resolution_loss_weight: 1.0
32
+ multi_stft_resolutions_window_sizes: !!python/tuple
33
+ - 4096
34
+ - 2048
35
+ - 1024
36
+ - 512
37
+ - 256
38
+ multi_stft_hop_size: 147
39
+ multi_stft_normalized: False
40
+
41
+ training:
42
+ instruments:
43
+ - Vocals
44
+ - Instrumental
45
+ target_instrument: Vocals
46
+
47
+ inference:
48
+ batch_size: 1
49
+ dim_t: 1101
50
+ num_overlap: 1
51
+ chunk_size: 352800
melbandroformers/vocals/voc_gaboxFv2.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2888813aa5b519941fa8548efc5a4331d63c61909007eb17fe95c367be230196
3
+ size 913026650