Add files using upload-large-folder tool
Browse files- bsroformers/bs_karaoke_gabox_IS.ckpt +3 -0
- bsroformers/karaoke_bs_roformer.yaml +129 -0
- melbandroformers/experimental/BS_ResurrectioN.ckpt +3 -0
- melbandroformers/experimental/Fullness.ckpt +3 -0
- melbandroformers/experimental/INSTV10.ckpt +3 -0
- melbandroformers/experimental/INSTV8.ckpt +3 -0
- melbandroformers/experimental/INSTV8N.ckpt +3 -0
- melbandroformers/experimental/INSTV9.ckpt +3 -0
- melbandroformers/experimental/Inst_FV8b.ckpt +3 -0
- melbandroformers/experimental/Inst_Fv8.ckpt +3 -0
- melbandroformers/experimental/Inst_Fv9.ckpt +3 -0
- melbandroformers/experimental/Karaoke_GaboxV2.ckpt +3 -0
- melbandroformers/experimental/Lead_VocalDereverb.ckpt +3 -0
- melbandroformers/experimental/inst_fv7b.ckpt +3 -0
- melbandroformers/experimental/instv7beta.ckpt +3 -0
- melbandroformers/experimental/instv7beta2.ckpt +3 -0
- melbandroformers/experimental/instv7beta3.ckpt +3 -0
- melbandroformers/experimental/instv7plus.ckpt +3 -0
- melbandroformers/experimental/kar_gabox.ckpt +3 -0
- melbandroformers/experimental/small_inst.ckpt +3 -0
- melbandroformers/experimental/vocfv7beta1.ckpt +3 -0
- melbandroformers/experimental/vocfv7beta2.ckpt +3 -0
- melbandroformers/experimental/vocfv7beta3.ckpt +3 -0
- melbandroformers/instrumental/INSTV5.ckpt +3 -0
- melbandroformers/instrumental/INSTV5N.ckpt +3 -0
- melbandroformers/instrumental/INSTV6.ckpt +3 -0
- melbandroformers/instrumental/INSTV6N.ckpt +3 -0
- melbandroformers/instrumental/INSTV7N.ckpt +3 -0
- melbandroformers/instrumental/Inst_ExperimentalV1.ckpt +3 -0
- melbandroformers/instrumental/Inst_GaboxFVX.ckpt +3 -0
- melbandroformers/instrumental/Inst_GaboxFv7z.ckpt +3 -0
- melbandroformers/instrumental/Inst_GaboxFv8.ckpt +3 -0
- melbandroformers/instrumental/Inst_GaboxFv9.ckpt +3 -0
- melbandroformers/instrumental/Inst_GaboxV7.ckpt +3 -0
- melbandroformers/instrumental/denoisedebleed.ckpt +3 -0
- melbandroformers/instrumental/inst_Fv4.ckpt +3 -0
- melbandroformers/instrumental/inst_Fv4Noise.ckpt +3 -0
- melbandroformers/instrumental/inst_gabox.ckpt +3 -0
- melbandroformers/instrumental/inst_gabox.yaml +51 -0
- melbandroformers/instrumental/inst_gaboxBv1.ckpt +3 -0
- melbandroformers/instrumental/intrumental_gabox.ckpt +3 -0
- melbandroformers/instrumental/v10.yaml +73 -0
- melbandroformers/karaoke/karaokegabox_1750911344.yaml +72 -0
- melbandroformers/vocals/voc_Fv3.ckpt +3 -0
- melbandroformers/vocals/voc_fv4.ckpt +3 -0
- melbandroformers/vocals/voc_fv5.ckpt +3 -0
- melbandroformers/vocals/voc_gabox.ckpt +3 -0
- melbandroformers/vocals/voc_gabox.yaml +51 -0
- melbandroformers/vocals/voc_gaboxFv2.ckpt +3 -0
bsroformers/bs_karaoke_gabox_IS.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:db8357825398d4231031ad1ab4aa12a94bcaad8d67e8ce5e4b3c5b48fdee1d4f
|
| 3 |
+
size 204483448
|
bsroformers/karaoke_bs_roformer.yaml
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
audio:
|
| 2 |
+
chunk_size: 352800
|
| 3 |
+
dim_f: 1024
|
| 4 |
+
dim_t: 801 # don't work (use in model)
|
| 5 |
+
hop_length: 441 # don't work (use in model)
|
| 6 |
+
n_fft: 2048
|
| 7 |
+
num_channels: 2
|
| 8 |
+
sample_rate: 44100
|
| 9 |
+
min_mean_abs: 0.000
|
| 10 |
+
|
| 11 |
+
model:
|
| 12 |
+
dim: 256
|
| 13 |
+
depth: 12
|
| 14 |
+
stereo: true
|
| 15 |
+
num_stems: 1
|
| 16 |
+
time_transformer_depth: 1
|
| 17 |
+
freq_transformer_depth: 1
|
| 18 |
+
linear_transformer_depth: 0
|
| 19 |
+
freqs_per_bands: !!python/tuple
|
| 20 |
+
- 2
|
| 21 |
+
- 2
|
| 22 |
+
- 2
|
| 23 |
+
- 2
|
| 24 |
+
- 2
|
| 25 |
+
- 2
|
| 26 |
+
- 2
|
| 27 |
+
- 2
|
| 28 |
+
- 2
|
| 29 |
+
- 2
|
| 30 |
+
- 2
|
| 31 |
+
- 2
|
| 32 |
+
- 2
|
| 33 |
+
- 2
|
| 34 |
+
- 2
|
| 35 |
+
- 2
|
| 36 |
+
- 2
|
| 37 |
+
- 2
|
| 38 |
+
- 2
|
| 39 |
+
- 2
|
| 40 |
+
- 2
|
| 41 |
+
- 2
|
| 42 |
+
- 2
|
| 43 |
+
- 2
|
| 44 |
+
- 4
|
| 45 |
+
- 4
|
| 46 |
+
- 4
|
| 47 |
+
- 4
|
| 48 |
+
- 4
|
| 49 |
+
- 4
|
| 50 |
+
- 4
|
| 51 |
+
- 4
|
| 52 |
+
- 4
|
| 53 |
+
- 4
|
| 54 |
+
- 4
|
| 55 |
+
- 4
|
| 56 |
+
- 12
|
| 57 |
+
- 12
|
| 58 |
+
- 12
|
| 59 |
+
- 12
|
| 60 |
+
- 12
|
| 61 |
+
- 12
|
| 62 |
+
- 12
|
| 63 |
+
- 12
|
| 64 |
+
- 24
|
| 65 |
+
- 24
|
| 66 |
+
- 24
|
| 67 |
+
- 24
|
| 68 |
+
- 24
|
| 69 |
+
- 24
|
| 70 |
+
- 24
|
| 71 |
+
- 24
|
| 72 |
+
- 48
|
| 73 |
+
- 48
|
| 74 |
+
- 48
|
| 75 |
+
- 48
|
| 76 |
+
- 48
|
| 77 |
+
- 48
|
| 78 |
+
- 48
|
| 79 |
+
- 48
|
| 80 |
+
- 128
|
| 81 |
+
- 129
|
| 82 |
+
dim_head: 64
|
| 83 |
+
heads: 8
|
| 84 |
+
attn_dropout: 0.0
|
| 85 |
+
ff_dropout: 0.0
|
| 86 |
+
flash_attn: true
|
| 87 |
+
dim_freqs_in: 1025
|
| 88 |
+
stft_n_fft: 2048
|
| 89 |
+
stft_hop_length: 512
|
| 90 |
+
stft_win_length: 2048
|
| 91 |
+
stft_normalized: false
|
| 92 |
+
mask_estimator_depth: 2
|
| 93 |
+
multi_stft_resolution_loss_weight: 1.0
|
| 94 |
+
multi_stft_resolutions_window_sizes: !!python/tuple
|
| 95 |
+
- 4096
|
| 96 |
+
- 2048
|
| 97 |
+
- 1024
|
| 98 |
+
- 512
|
| 99 |
+
- 256
|
| 100 |
+
multi_stft_hop_size: 147
|
| 101 |
+
multi_stft_normalized: False
|
| 102 |
+
mlp_expansion_factor: 4
|
| 103 |
+
use_torch_checkpoint: True
|
| 104 |
+
skip_connection: False
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
training:
|
| 108 |
+
batch_size: 1
|
| 109 |
+
gradient_accumulation_steps: 999
|
| 110 |
+
grad_clip: 1
|
| 111 |
+
instruments: ['vocals', 'other']
|
| 112 |
+
lr: 1.0e-5
|
| 113 |
+
patience: 1000000
|
| 114 |
+
reduce_factor: 0.75
|
| 115 |
+
target_instrument: vocals
|
| 116 |
+
num_epochs: 1000
|
| 117 |
+
num_steps: 1000
|
| 118 |
+
q: 0.95
|
| 119 |
+
coarse_loss_clip: true
|
| 120 |
+
ema_momentum: 0.999
|
| 121 |
+
optimizer: Fira
|
| 122 |
+
other_fix: True # it's needed for checking on multisong dataset if other is actually instrumental
|
| 123 |
+
use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
|
| 124 |
+
use_torch_checkpoint: True
|
| 125 |
+
|
| 126 |
+
inference:
|
| 127 |
+
batch_size: 6
|
| 128 |
+
dim_t: 1251
|
| 129 |
+
num_overlap: 2
|
melbandroformers/experimental/BS_ResurrectioN.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d28b3a422531e1ca482d29302a5b67614eb45c09d72ce954258011ce24ef0919
|
| 3 |
+
size 204483033
|
melbandroformers/experimental/Fullness.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c67f47fa84146c8182f0061c194a51782439fe732e6ffecb810f0e4af7026f6d
|
| 3 |
+
size 913091027
|
melbandroformers/experimental/INSTV10.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6eb987c3fd93c66adfea2e4284d1da668e80a98f75b6436b4be454780a728182
|
| 3 |
+
size 913026650
|
melbandroformers/experimental/INSTV8.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d51dae045064a5817cd7588c46bfe2f93542d4dbe66c1e1a6e75ca081299a53
|
| 3 |
+
size 913026650
|
melbandroformers/experimental/INSTV8N.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25920c876e601d4ccbf1684b19ecdac4b9fcfcc7f48d2c095d81040ec3fecbea
|
| 3 |
+
size 913026650
|
melbandroformers/experimental/INSTV9.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b689ffa942c48353bbad9e8df7b96d61a24b5c50256b0052579b327552565c8
|
| 3 |
+
size 913026650
|
melbandroformers/experimental/Inst_FV8b.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a28f79ae38b92caaeb37c5ccdf3912316b135414353ec0d5553d507e3de8ef7
|
| 3 |
+
size 913027055
|
melbandroformers/experimental/Inst_Fv8.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:026caebb240226f64b1611e623735bb14c042f772464292aa1a26e896a759cc5
|
| 3 |
+
size 913027055
|
melbandroformers/experimental/Inst_Fv9.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a6c02cd154bb49d6d08044a8dc935a94ec26d09f2a0e6a03cc80a65d05a98ff2
|
| 3 |
+
size 913090472
|
melbandroformers/experimental/Karaoke_GaboxV2.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ec34be50327aeaf1a996c27977f5c30d1ac80c0076d69683d3e5184c31ea29d3
|
| 3 |
+
size 913090472
|
melbandroformers/experimental/Lead_VocalDereverb.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:097c13b298414905651867ef315d881bbf2604a5c04be28820ed5946338b3eb4
|
| 3 |
+
size 913031195
|
melbandroformers/experimental/inst_fv7b.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3cf9538616c835575eb07bef1fe4550b2406c852919a5644d22e9ae84a2ec6ba
|
| 3 |
+
size 913030778
|
melbandroformers/experimental/instv7beta.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b4f12d81fd7fb02f38609216d59f0e42b3dca655fd90ca275ab5321b3e4d9aa
|
| 3 |
+
size 913026650
|
melbandroformers/experimental/instv7beta2.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:11dcd18b2bca5ff189924145a52c9e1a145632c7659aac59b239221dca4f1703
|
| 3 |
+
size 913026650
|
melbandroformers/experimental/instv7beta3.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc33cc26ab805b057e47a753241bc17102564af4bc1c0e76364f3bb6a31c09d2
|
| 3 |
+
size 913026650
|
melbandroformers/experimental/instv7plus.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7baf1aab873e534a186e2dcf6e64bebad2dbf3e2c49337729487294539118232
|
| 3 |
+
size 913031195
|
melbandroformers/experimental/kar_gabox.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:303fc631e7aa587e9dc1e6ac4bb3667c6ba53aacb6b6a90abcfcf57935b92bd8
|
| 3 |
+
size 913026650
|
melbandroformers/experimental/small_inst.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31c0a91d60aac2bdc6ee4a53bf7ebd0370e6e894f6b22a1e37511e05eecca335
|
| 3 |
+
size 202573672
|
melbandroformers/experimental/vocfv7beta1.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6bb98488b80e7797e6f447c0dd8f89f2b7040eebc5e839b5a59871b7a3a19265
|
| 3 |
+
size 913091027
|
melbandroformers/experimental/vocfv7beta2.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:85bcaa03568f96e56b245302c177658e8cb83aa6e2f4ae260e67202db9d56123
|
| 3 |
+
size 913030778
|
melbandroformers/experimental/vocfv7beta3.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45463b0051651b7ed15677cc41f21bd384d66796938f46955617bf6f1d661988
|
| 3 |
+
size 913030778
|
melbandroformers/instrumental/INSTV5.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:38e935cf1e97afcc1de84d0bdb87dd8090bad530fa0df28e707d16448e1d38e2
|
| 3 |
+
size 913026650
|
melbandroformers/instrumental/INSTV5N.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:175203923fac3e52ae00e7e37d41e8a7fef5020b6ee4e4144f4786daabc54b34
|
| 3 |
+
size 913026650
|
melbandroformers/instrumental/INSTV6.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:677951b8556a27abe32e39705640638826e78101fa901a51ad73d20522be6d25
|
| 3 |
+
size 913026650
|
melbandroformers/instrumental/INSTV6N.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:802f3e5d183d7c4b50dea147c320e61634f5be6ff55fa899fdebeaf0f3cf7f42
|
| 3 |
+
size 913026650
|
melbandroformers/instrumental/INSTV7N.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b0ca36af5d1314be46b56c8a53b6be02f98511fa5d7e3e196fd895755e65be3c
|
| 3 |
+
size 913026650
|
melbandroformers/instrumental/Inst_ExperimentalV1.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb19b968287d31b6d229816342aa7f08f7e4010b7c61be2427fbdb0c04a8a020
|
| 3 |
+
size 913026650
|
melbandroformers/instrumental/Inst_GaboxFVX.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:545ef13b0cdbac505818a38db98e09c54e7c03ea17b4e0c895a531bfa352fa59
|
| 3 |
+
size 913026650
|
melbandroformers/instrumental/Inst_GaboxFv7z.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef229f0dcd370c1767e4292981c59e5248814da45f32bfacebcc0f28adaa30a1
|
| 3 |
+
size 913031195
|
melbandroformers/instrumental/Inst_GaboxFv8.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50bfa127d21f419e0da89730867d28c7ac4484c9473e6f313d036bc8b429df80
|
| 3 |
+
size 913026650
|
melbandroformers/instrumental/Inst_GaboxFv9.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:068d29a54e71f69ed871ca410e09b9877e153ad1439e825bd093573ea4de5762
|
| 3 |
+
size 913091027
|
melbandroformers/instrumental/Inst_GaboxV7.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e725a860176acb475d983a1ddd9c1a99a619c69cc9ceda808dd294d10db746a5
|
| 3 |
+
size 913026650
|
melbandroformers/instrumental/denoisedebleed.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91aa7a546ed2e93482e4629c982d35b0d258bb3de6eeab497fd91658cc86c7fd
|
| 3 |
+
size 913026650
|
melbandroformers/instrumental/inst_Fv4.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b03d9bf0fc643dabf006dee38fd798140b4884c1b65c573b4488c1a2a876ad84
|
| 3 |
+
size 913091027
|
melbandroformers/instrumental/inst_Fv4Noise.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f50296e913b9af3b5b3b961e92877ef0d4a74f9a433e796e89960c4c2b1abe53
|
| 3 |
+
size 913026650
|
melbandroformers/instrumental/inst_gabox.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7b3ad6bd8bed3aaaa4d9320ea2ca910d140196a2302186db1754f3a8d8e16fb1
|
| 3 |
+
size 913026650
|
melbandroformers/instrumental/inst_gabox.yaml
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
audio:
|
| 2 |
+
chunk_size: 485100
|
| 3 |
+
dim_f: 1024
|
| 4 |
+
dim_t: 1101
|
| 5 |
+
hop_length: 441
|
| 6 |
+
n_fft: 2048
|
| 7 |
+
num_channels: 2
|
| 8 |
+
sample_rate: 44100
|
| 9 |
+
min_mean_abs: 0.000
|
| 10 |
+
|
| 11 |
+
model:
|
| 12 |
+
dim: 384
|
| 13 |
+
depth: 6
|
| 14 |
+
stereo: true
|
| 15 |
+
num_stems: 1
|
| 16 |
+
time_transformer_depth: 1
|
| 17 |
+
freq_transformer_depth: 1
|
| 18 |
+
num_bands: 60
|
| 19 |
+
dim_head: 64
|
| 20 |
+
heads: 8
|
| 21 |
+
attn_dropout: 0
|
| 22 |
+
ff_dropout: 0
|
| 23 |
+
flash_attn: True
|
| 24 |
+
dim_freqs_in: 1025
|
| 25 |
+
sample_rate: 44100 # needed for mel filter bank from librosa
|
| 26 |
+
stft_n_fft: 2048
|
| 27 |
+
stft_hop_length: 441
|
| 28 |
+
stft_win_length: 2048
|
| 29 |
+
stft_normalized: False
|
| 30 |
+
mask_estimator_depth: 2
|
| 31 |
+
multi_stft_resolution_loss_weight: 1.0
|
| 32 |
+
multi_stft_resolutions_window_sizes: !!python/tuple
|
| 33 |
+
- 4096
|
| 34 |
+
- 2048
|
| 35 |
+
- 1024
|
| 36 |
+
- 512
|
| 37 |
+
- 256
|
| 38 |
+
multi_stft_hop_size: 147
|
| 39 |
+
multi_stft_normalized: False
|
| 40 |
+
|
| 41 |
+
training:
|
| 42 |
+
instruments:
|
| 43 |
+
- Instrumental
|
| 44 |
+
- Vocals
|
| 45 |
+
target_instrument: Instrumental
|
| 46 |
+
use_amp: True
|
| 47 |
+
|
| 48 |
+
inference:
|
| 49 |
+
batch_size: 1
|
| 50 |
+
dim_t: 1101
|
| 51 |
+
num_overlap: 2
|
melbandroformers/instrumental/inst_gaboxBv1.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:de972fb724601beef237abe94c8b934c73218e9baf3e344ab4c2333276e5cfe7
|
| 3 |
+
size 913026650
|
melbandroformers/instrumental/intrumental_gabox.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7b3ad6bd8bed3aaaa4d9320ea2ca910d140196a2302186db1754f3a8d8e16fb1
|
| 3 |
+
size 913026650
|
melbandroformers/instrumental/v10.yaml
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
audio:
|
| 2 |
+
chunk_size: 352800
|
| 3 |
+
dim_f: 1024
|
| 4 |
+
dim_t: 256
|
| 5 |
+
hop_length: 441
|
| 6 |
+
n_fft: 2048
|
| 7 |
+
num_channels: 2
|
| 8 |
+
sample_rate: 44100
|
| 9 |
+
min_mean_abs: 0.00
|
| 10 |
+
|
| 11 |
+
model:
|
| 12 |
+
dim: 256
|
| 13 |
+
depth: 12
|
| 14 |
+
stereo: true
|
| 15 |
+
num_stems: 1
|
| 16 |
+
time_transformer_depth: 1
|
| 17 |
+
freq_transformer_depth: 1
|
| 18 |
+
num_bands: 60
|
| 19 |
+
dim_head: 64
|
| 20 |
+
heads: 8
|
| 21 |
+
attn_dropout: 0
|
| 22 |
+
ff_dropout: 0
|
| 23 |
+
flash_attn: true
|
| 24 |
+
dim_freqs_in: 1025
|
| 25 |
+
sample_rate: 44100
|
| 26 |
+
stft_n_fft: 2048
|
| 27 |
+
stft_hop_length: 441
|
| 28 |
+
stft_win_length: 2048
|
| 29 |
+
stft_normalized: true
|
| 30 |
+
mask_estimator_depth: 2
|
| 31 |
+
multi_stft_resolution_loss_weight: 1.0
|
| 32 |
+
multi_stft_resolutions_window_sizes: !!python/tuple
|
| 33 |
+
- 4096
|
| 34 |
+
- 2048
|
| 35 |
+
- 1024
|
| 36 |
+
- 512
|
| 37 |
+
- 256
|
| 38 |
+
multi_stft_hop_size: 250
|
| 39 |
+
multi_stft_normalized: false
|
| 40 |
+
use_torch_checkpoint: true
|
| 41 |
+
|
| 42 |
+
training:
|
| 43 |
+
batch_size: 1
|
| 44 |
+
gradient_accumulation_steps: 999999999999999999999999
|
| 45 |
+
grad_clip: 0
|
| 46 |
+
instruments:
|
| 47 |
+
- other
|
| 48 |
+
- vocals
|
| 49 |
+
lr: 0.00001
|
| 50 |
+
patience: 100000000
|
| 51 |
+
reduce_factor: 0.95
|
| 52 |
+
target_instrument: other
|
| 53 |
+
num_epochs: 1000
|
| 54 |
+
num_steps: 1000
|
| 55 |
+
augmentation: false # enable augmentations by audiomentations and pedalboard
|
| 56 |
+
augmentation_type: simple1
|
| 57 |
+
use_mp3_compress: false # Deprecated
|
| 58 |
+
augmentation_mix: false # Mix several stems of the same type with some probability
|
| 59 |
+
augmentation_loudness: true # randomly change loudness of each stem
|
| 60 |
+
augmentation_loudness_type: 1 # Type 1 or 2
|
| 61 |
+
augmentation_loudness_min: 0
|
| 62 |
+
augmentation_loudness_max: 0
|
| 63 |
+
q: 0.95
|
| 64 |
+
coarse_loss_clip: false
|
| 65 |
+
ema_momentum: 0.999
|
| 66 |
+
optimizer: adamw
|
| 67 |
+
other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
|
| 68 |
+
use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
|
| 69 |
+
|
| 70 |
+
inference:
|
| 71 |
+
batch_size: 1
|
| 72 |
+
dim_t: 1101
|
| 73 |
+
num_overlap: 2
|
melbandroformers/karaoke/karaokegabox_1750911344.yaml
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
audio:
|
| 2 |
+
chunk_size: 485100
|
| 3 |
+
dim_f: 1024
|
| 4 |
+
dim_t: 256
|
| 5 |
+
hop_length: 441
|
| 6 |
+
n_fft: 2048
|
| 7 |
+
num_channels: 2
|
| 8 |
+
sample_rate: 44100
|
| 9 |
+
min_mean_abs: 0.000
|
| 10 |
+
|
| 11 |
+
model:
|
| 12 |
+
dim: 384
|
| 13 |
+
depth: 6
|
| 14 |
+
stereo: true
|
| 15 |
+
num_stems: 1
|
| 16 |
+
time_transformer_depth: 1
|
| 17 |
+
freq_transformer_depth: 1
|
| 18 |
+
num_bands: 60
|
| 19 |
+
dim_head: 64
|
| 20 |
+
heads: 8
|
| 21 |
+
attn_dropout: 0
|
| 22 |
+
ff_dropout: 0
|
| 23 |
+
flash_attn: true
|
| 24 |
+
dim_freqs_in: 1025
|
| 25 |
+
sample_rate: 44100 # needed for mel filter bank from librosa
|
| 26 |
+
stft_n_fft: 2048
|
| 27 |
+
stft_hop_length: 441
|
| 28 |
+
stft_win_length: 2048
|
| 29 |
+
stft_normalized: false
|
| 30 |
+
mask_estimator_depth: 2
|
| 31 |
+
multi_stft_resolution_loss_weight: 1.0
|
| 32 |
+
multi_stft_resolutions_window_sizes: !!python/tuple
|
| 33 |
+
- 4096
|
| 34 |
+
- 2048
|
| 35 |
+
- 1024
|
| 36 |
+
- 512
|
| 37 |
+
- 256
|
| 38 |
+
multi_stft_hop_size: 147
|
| 39 |
+
multi_stft_normalized: true
|
| 40 |
+
|
| 41 |
+
training:
|
| 42 |
+
batch_size: 1
|
| 43 |
+
gradient_accumulation_steps: 1
|
| 44 |
+
grad_clip: 0
|
| 45 |
+
instruments:
|
| 46 |
+
- Vocals
|
| 47 |
+
- Instrumental
|
| 48 |
+
lr: 0.0005
|
| 49 |
+
patience: 2
|
| 50 |
+
reduce_factor: 0.95
|
| 51 |
+
target_instrument: Vocals
|
| 52 |
+
num_epochs: 1000
|
| 53 |
+
num_steps: 1000
|
| 54 |
+
augmentation: false # enable augmentations by audiomentations and pedalboard
|
| 55 |
+
augmentation_type:
|
| 56 |
+
use_mp3_compress: false # Deprecated
|
| 57 |
+
augmentation_mix: false # Mix several stems of the same type with some probability
|
| 58 |
+
augmentation_loudness: false # randomly change loudness of each stem
|
| 59 |
+
augmentation_loudness_type: 1 # Type 1 or 2
|
| 60 |
+
augmentation_loudness_min: 0
|
| 61 |
+
augmentation_loudness_max: 0
|
| 62 |
+
q: 0.95
|
| 63 |
+
coarse_loss_clip: false
|
| 64 |
+
ema_momentum: 0.999
|
| 65 |
+
optimizer: adamw
|
| 66 |
+
other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
|
| 67 |
+
use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
|
| 68 |
+
|
| 69 |
+
inference:
|
| 70 |
+
batch_size: 1
|
| 71 |
+
dim_t: 1101
|
| 72 |
+
num_overlap: 8
|
melbandroformers/vocals/voc_Fv3.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:49d81446b34a7848446efde7898b25bdc32fe872c2393617acb5356649f7ea93
|
| 3 |
+
size 913026650
|
melbandroformers/vocals/voc_fv4.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1a9657de5fd3ed87ad4fd1a9d2069743ecb33424836973ad0f3288e2a64e90bc
|
| 3 |
+
size 913026650
|
melbandroformers/vocals/voc_fv5.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ede0504ddc55cb44b966a8212dac75a364f8157974cc40c8e92b9f5d4f17ce2
|
| 3 |
+
size 913026650
|
melbandroformers/vocals/voc_gabox.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff802a67501fac70587c3ff4e8dbc89c2558e7d8911c92222dfea2aaac208517
|
| 3 |
+
size 913026650
|
melbandroformers/vocals/voc_gabox.yaml
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
audio:
|
| 2 |
+
chunk_size: 352800
|
| 3 |
+
dim_f: 1024
|
| 4 |
+
dim_t: 256
|
| 5 |
+
hop_length: 441
|
| 6 |
+
n_fft: 2048
|
| 7 |
+
num_channels: 2
|
| 8 |
+
sample_rate: 44100
|
| 9 |
+
min_mean_abs: 0.001
|
| 10 |
+
|
| 11 |
+
model:
|
| 12 |
+
dim: 384
|
| 13 |
+
depth: 6
|
| 14 |
+
stereo: true
|
| 15 |
+
num_stems: 1
|
| 16 |
+
time_transformer_depth: 1
|
| 17 |
+
freq_transformer_depth: 1
|
| 18 |
+
num_bands: 60
|
| 19 |
+
dim_head: 64
|
| 20 |
+
heads: 8
|
| 21 |
+
attn_dropout: 0
|
| 22 |
+
ff_dropout: 0
|
| 23 |
+
flash_attn: True
|
| 24 |
+
dim_freqs_in: 1025
|
| 25 |
+
sample_rate: 44100 # needed for mel filter bank from librosa
|
| 26 |
+
stft_n_fft: 2048
|
| 27 |
+
stft_hop_length: 441
|
| 28 |
+
stft_win_length: 2048
|
| 29 |
+
stft_normalized: False
|
| 30 |
+
mask_estimator_depth: 2
|
| 31 |
+
multi_stft_resolution_loss_weight: 1.0
|
| 32 |
+
multi_stft_resolutions_window_sizes: !!python/tuple
|
| 33 |
+
- 4096
|
| 34 |
+
- 2048
|
| 35 |
+
- 1024
|
| 36 |
+
- 512
|
| 37 |
+
- 256
|
| 38 |
+
multi_stft_hop_size: 147
|
| 39 |
+
multi_stft_normalized: False
|
| 40 |
+
|
| 41 |
+
training:
|
| 42 |
+
instruments:
|
| 43 |
+
- Vocals
|
| 44 |
+
- Instrumental
|
| 45 |
+
target_instrument: Vocals
|
| 46 |
+
|
| 47 |
+
inference:
|
| 48 |
+
batch_size: 1
|
| 49 |
+
dim_t: 1101
|
| 50 |
+
num_overlap: 1
|
| 51 |
+
chunk_size: 352800
|
melbandroformers/vocals/voc_gaboxFv2.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2888813aa5b519941fa8548efc5a4331d63c61909007eb17fe95c367be230196
|
| 3 |
+
size 913026650
|