jaman21 commited on
Commit
1dc62f8
·
verified ·
1 Parent(s): ff65860

Upload model

Browse files
roformer/config_melband_roformer_karaoke_aufr33_viperx_sdr_10.1956.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 352800
3
+ dim_f: 1024
4
+ dim_t: 801
5
+ hop_length: 441
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 000
10
+
11
+ model:
12
+ dim: 384
13
+ depth: 6
14
+ stereo: true
15
+ num_stems: 1
16
+ time_transformer_depth: 1
17
+ freq_transformer_depth: 1
18
+ num_bands: 60
19
+ dim_head: 64
20
+ heads: 8
21
+ attn_dropout: 0
22
+ ff_dropout: 0
23
+ flash_attn: True
24
+ dim_freqs_in: 1025
25
+ sample_rate: 44100 # needed for mel filter bank from librosa
26
+ stft_n_fft: 2048
27
+ stft_hop_length: 441
28
+ stft_win_length: 2048
29
+ stft_normalized: False
30
+ mask_estimator_depth: 2
31
+ multi_stft_resolution_loss_weight: 1.0
32
+ multi_stft_resolutions_window_sizes: !!python/tuple
33
+ - 4096
34
+ - 2048
35
+ - 1024
36
+ - 512
37
+ - 256
38
+ multi_stft_hop_size: 147
39
+ multi_stft_normalized: False
40
+
41
+ training:
42
+ batch_size: 4
43
+ gradient_accumulation_steps: 1
44
+ grad_clip: 0
45
+ instruments:
46
+ - Vocals
47
+ - Instrumental
48
+ lr: 1.0e-05
49
+ patience: 2
50
+ reduce_factor: 0.95
51
+ target_instrument: Vocals
52
+ num_epochs: 1000
53
+ num_steps: 2000
54
+ augmentation: false # enable augmentations by audiomentations and pedalboard
55
+ augmentation_type: null
56
+ use_mp3_compress: false # Deprecated
57
+ augmentation_mix: false # Mix several stems of the same type with some probability
58
+ augmentation_loudness: false # randomly change loudness of each stem
59
+ augmentation_loudness_type: 1 # Type 1 or 2
60
+ augmentation_loudness_min: 0
61
+ augmentation_loudness_max: 0
62
+ q: 0.95
63
+ coarse_loss_clip: false
64
+ ema_momentum: 0.999
65
+ optimizer: adam
66
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
67
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
68
+ inference:
69
+ batch_size: 1
70
+ dim_t: 801
71
+ num_overlap: 4
roformer/config_melband_roformer_voc_gabox.yaml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 352800
3
+ dim_f: 1024
4
+ dim_t: 256
5
+ hop_length: 441
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.001
10
+
11
+ model:
12
+ dim: 384
13
+ depth: 6
14
+ stereo: true
15
+ num_stems: 1
16
+ time_transformer_depth: 1
17
+ freq_transformer_depth: 1
18
+ num_bands: 60
19
+ dim_head: 64
20
+ heads: 8
21
+ attn_dropout: 0
22
+ ff_dropout: 0
23
+ flash_attn: True
24
+ dim_freqs_in: 1025
25
+ sample_rate: 44100 # needed for mel filter bank from librosa
26
+ stft_n_fft: 2048
27
+ stft_hop_length: 441
28
+ stft_win_length: 2048
29
+ stft_normalized: False
30
+ mask_estimator_depth: 2
31
+ multi_stft_resolution_loss_weight: 1.0
32
+ multi_stft_resolutions_window_sizes: !!python/tuple
33
+ - 4096
34
+ - 2048
35
+ - 1024
36
+ - 512
37
+ - 256
38
+ multi_stft_hop_size: 147
39
+ multi_stft_normalized: False
40
+
41
+ training:
42
+ instruments:
43
+ - Vocals
44
+ - Instrumental
45
+ target_instrument: Vocals
46
+
47
+ inference:
48
+ batch_size: 1
49
+ dim_t: 1101
50
+ num_overlap: 1
51
+ chunk_size: 352800
roformer/config_melband_roformer_vocals_kim.yaml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 352800
3
+ dim_f: 1024
4
+ dim_t: 256
5
+ hop_length: 441
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.001
10
+
11
+ model:
12
+ dim: 384
13
+ depth: 6
14
+ stereo: true
15
+ num_stems: 1
16
+ time_transformer_depth: 1
17
+ freq_transformer_depth: 1
18
+ num_bands: 60
19
+ dim_head: 64
20
+ heads: 8
21
+ attn_dropout: 0
22
+ ff_dropout: 0
23
+ flash_attn: True
24
+ dim_freqs_in: 1025
25
+ sample_rate: 44100 # needed for mel filter bank from librosa
26
+ stft_n_fft: 2048
27
+ stft_hop_length: 441
28
+ stft_win_length: 2048
29
+ stft_normalized: False
30
+ mask_estimator_depth: 2
31
+ multi_stft_resolution_loss_weight: 1.0
32
+ multi_stft_resolutions_window_sizes: !!python/tuple
33
+ - 4096
34
+ - 2048
35
+ - 1024
36
+ - 512
37
+ - 256
38
+ multi_stft_hop_size: 147
39
+ multi_stft_normalized: False
40
+
41
+ training:
42
+ instruments:
43
+ - vocals
44
+ - other
45
+ target_instrument: vocals
46
+
47
+ inference:
48
+ dim_t: 1101
49
+ num_overlap: 1
50
+ chunk_size: 352800
roformer/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1de20d459332fe8869aeb01327a31df0032262706e1365114e852dc271779813
3
+ size 913096801
roformer/mel_band_roformer_karaoke_gabox.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:303fc631e7aa587e9dc1e6ac4bb3667c6ba53aacb6b6a90abcfcf57935b92bd8
3
+ size 913026650
roformer/mel_band_roformer_voc_fullness_v4_gabox.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ede0504ddc55cb44b966a8212dac75a364f8157974cc40c8e92b9f5d4f17ce2
3
+ size 913026650
roformer/mel_band_roformer_voc_fullness_v5_gabox.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ede0504ddc55cb44b966a8212dac75a364f8157974cc40c8e92b9f5d4f17ce2
3
+ size 913026650
roformer/mel_band_roformer_voc_gabox.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff802a67501fac70587c3ff4e8dbc89c2558e7d8911c92222dfea2aaac208517
3
+ size 913026650
roformer/vocals_mel_band_roformer.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87201f4d31afb5bc79993230fc49446918425574db48c01c405e44f365c7559e
3
+ size 913106900
vr/1_HP-UVR.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9c654305888fe09668078ade76962b32d3782848f158192da7403a650e935a6
3
+ size 126792647
vr/2_HP-UVR.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26d6400d2d2e90811ba2381828c8c3519196097375af524c3d0edd969acf5356
3
+ size 126782699
vr/3_HP-Vocal-UVR.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d3f9be265b2f2c3d684f23a9905674627c2552e0f6b5f36325cef8dc1f0ff6b
3
+ size 126792647
vr/4_HP-Vocal-UVR.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:295d1e1e3a52c17a040a4a6fe390214ad8ca0797f091b7c6d8db97d247a27271
3
+ size 126792647
vr/5_HP-Karaoke-UVR.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe00891defbb61f4261500af22f7624f1a3df8dc75fa3998d1aece02e6be4537
3
+ size 126782699
vr/6_HP-Karaoke-UVR.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ce7eaaa9e56f09366b788aebf6d3a72aec8145692c56f1e090e4e7e2d7ce65f
3
+ size 126782699
vr/7_HP2-UVR.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:545ee733b704a263bf6f75328c5ed14f0fb439dcda7a6e8f53c8cecb92877afe
3
+ size 550120123
vr/8_HP2-UVR.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e072299a87cf3fcdd36da6adc6d2837bed0c07a208775a7678c3d5deeea58f79
3
+ size 550131079
vr/9_HP2-UVR.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a99d032ed8c9d59077a8a41f0ceccb279d59ec10c87102167a283151d25ad9a2
3
+ size 550131079
vr/HP2-人声vocals+非人声instrumentals.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39796caa5db18d7f9382d8ac997ac967bfd85f7761014bb807d2543cc844ef05
3
+ size 63454827
vr/HP2_all_vocals.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39796caa5db18d7f9382d8ac997ac967bfd85f7761014bb807d2543cc844ef05
3
+ size 63454827
vr/HP3_all_vocals.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45e6b65199e781b4a6542002699be9f19cd3d1cb7d1558bc2bfbcd84674dfe28
3
+ size 63454827
vr/HP5-主旋律人声vocals+其他instrumentals.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5908891829634926119720241e8573d97cbeb8277110a7512bdb0bd7563258ee
3
+ size 63454827
vr/HP5_only_main_vocal.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5908891829634926119720241e8573d97cbeb8277110a7512bdb0bd7563258ee
3
+ size 63454827
vr/UVR-DeEcho-Aggressive.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bd1d79d9c5d1b17d20f96f8a9f8aff1b55a83014f70712446bf420c0188e0a0
3
+ size 127139365
vr/UVR-DeEcho-DeReverb.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e644028ec82865dc0fe082bc6fea85a43f7c71cfe375caee2da2d154aa661ee7
3
+ size 223650277
vr/UVR-DeEcho-Normal.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b849dd575643b075c257fb7a96c2ef5a79d7a5e7df74a2b319ad47118f1ee769
3
+ size 127139365
vr/UVR-DeNoise-Lite.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0023492fe98c406817b5253965de19ede65d1c147db015a3a428f07602e99571
3
+ size 17922277
vr/UVR-DeNoise.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5addf43ece5bddd18da9f575a02d7ffdb32342414e6ad7ac8d1dd7a04138a628
3
+ size 127139365
vr/VR-DeEcho-Aggressive.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c8fd1582f9aabc363e47af62ddb88df6cae7e064cae75bbf041a067a5e0aee2
3
+ size 63666335
vr/VR-DeEcho-DeReverb.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01376dd2a571bf3cb9cced680732726d2d732609d09216a610b0d110f133febe
3
+ size 111925279
vr/VR-DeEcho-Normal.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56aba59db3bcdd14a14464e62f3129698ecdea62eee0f003b9360923eb3ac79e
3
+ size 63666335