niobures commited on
Commit
efdb832
·
verified ·
1 Parent(s): e4672d0

RoFormer (code, models, paper)

Browse files
Files changed (43) hide show
  1. .gitattributes +5 -0
  2. Extending Context Window of Large Language Models via Positional Interpolation.pdf +3 -0
  3. Mel-Band RoFormer for Music Source Separation.pdf +3 -0
  4. Music Source Separation with Band-Split RoPE Transformer.pdf +3 -0
  5. RoFormer. Enhanced Transformer with Rotary Position Embedding.pdf +3 -0
  6. Rotational dynamics reduce interference between sensory and memory representations.pdf +3 -0
  7. code/Axora.zip +3 -0
  8. code/BS-RoFormer.zip +3 -0
  9. code/ComfyUI-MelBandRoFormer.zip +3 -0
  10. code/Mel-Band-Roformer-Vocal-Model-modal.zip +3 -0
  11. code/Mel-Band-Roformer-Vocal-Model.zip +3 -0
  12. code/Q-RoFormer.zip +3 -0
  13. code/RoFormer_pytorch.zip +3 -0
  14. code/RoPE-PyTorch.zip +3 -0
  15. code/Roformer_Simlarity.zip +3 -0
  16. code/mel_rof.zip +3 -0
  17. code/roformer-overview.zip +3 -0
  18. code/roformer-separation.zip +3 -0
  19. code/rotary-embedding-torch.zip +3 -0
  20. code/rotary-embedding-triton.zip +3 -0
  21. code/vocal-separation.zip +3 -0
  22. code/vocalback.zip +3 -0
  23. models/MelBandRoFormer_comfy/.gitattributes +35 -0
  24. models/MelBandRoFormer_comfy/MelBandRoformer_fp16.safetensors +3 -0
  25. models/MelBandRoFormer_comfy/MelBandRoformer_fp32.safetensors +3 -0
  26. models/MelBandRoFormer_comfy/README.md +11 -0
  27. models/MelBandRoFormer_comfy/source.txt +1 -0
  28. models/MelBandRoformer/.gitattributes +35 -0
  29. models/MelBandRoformer/MelBandRoformer.ckpt +3 -0
  30. models/MelBandRoformer/README.md +3 -0
  31. models/MelBandRoformer/source.txt +1 -0
  32. models/audio-separation (melmass)/.gitattributes +35 -0
  33. models/audio-separation (melmass)/README.md +3 -0
  34. models/audio-separation (melmass)/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.safetensors +3 -0
  35. models/audio-separation (melmass)/denoise_mel_band_roformer_aufr33_sdr_27.9959.safetensors +3 -0
  36. models/audio-separation (melmass)/model_vocals_mel_band_roformer_sdr_8.42.safetensors +3 -0
  37. models/audio-separation (melmass)/source.txt +1 -0
  38. models/audio-separation-models/Kim-MelRoformer/config_vocals_mel_band_roformer_kj.yaml +72 -0
  39. models/audio-separation-models/mel_band_roformer_karaoke_aufr33_viperx/config_mel_band_roformer_karaoke.yaml +72 -0
  40. models/audio-separation-models/source.txt +1 -0
  41. models/mel_band_roformers/.gitattributes +35 -0
  42. models/mel_band_roformers/mel_band_roformer_karaoke.ckpt +3 -0
  43. models/mel_band_roformers/source.txt +1 -0
.gitattributes CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Extending[[:space:]]Context[[:space:]]Window[[:space:]]of[[:space:]]Large[[:space:]]Language[[:space:]]Models[[:space:]]via[[:space:]]Positional[[:space:]]Interpolation.pdf filter=lfs diff=lfs merge=lfs -text
37
+ Mel-Band[[:space:]]RoFormer[[:space:]]for[[:space:]]Music[[:space:]]Source[[:space:]]Separation.pdf filter=lfs diff=lfs merge=lfs -text
38
+ Music[[:space:]]Source[[:space:]]Separation[[:space:]]with[[:space:]]Band-Split[[:space:]]RoPE[[:space:]]Transformer.pdf filter=lfs diff=lfs merge=lfs -text
39
+ RoFormer.[[:space:]]Enhanced[[:space:]]Transformer[[:space:]]with[[:space:]]Rotary[[:space:]]Position[[:space:]]Embedding.pdf filter=lfs diff=lfs merge=lfs -text
40
+ Rotational[[:space:]]dynamics[[:space:]]reduce[[:space:]]interference[[:space:]]between[[:space:]]sensory[[:space:]]and[[:space:]]memory[[:space:]]representations.pdf filter=lfs diff=lfs merge=lfs -text
Extending Context Window of Large Language Models via Positional Interpolation.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:890702e3170e5b8e2fb55552bd86cd99b0aa4ddbb3670ae8c351faa575cfaf09
3
+ size 751217
Mel-Band RoFormer for Music Source Separation.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2a1cd47455bf12402c09a94685e084c4c17d98a62383284ae396d092eceada2
3
+ size 120735
Music Source Separation with Band-Split RoPE Transformer.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23671679203069299531436e31cb8a85b699fe6baa3f2e733133e06bd06d9dee
3
+ size 489032
RoFormer. Enhanced Transformer with Rotary Position Embedding.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9a481fbe1c8a20b7b1fa566b13102a1896c7829fa9a8b4c80528452a5ddaf79
3
+ size 599289
Rotational dynamics reduce interference between sensory and memory representations.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a2e5af126c6f4abd52ed22011c20f0528865b00d4e27802b314bf257cc8dd8c
3
+ size 16210834
code/Axora.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e00c4c3c450cc764d9ca327f313b98d69b37a252f4b2a3fb3e8f6242ea3153ff
3
+ size 38412395
code/BS-RoFormer.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1d4a2d651274420c966debdebfc0e9673f21ffb36806440710c46c833c89834
3
+ size 4110983
code/ComfyUI-MelBandRoFormer.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc45e03e6b5054b9adefbf3ba563a6360caf8168734b0f59ae5753eb0034aedf
3
+ size 49015
code/Mel-Band-Roformer-Vocal-Model-modal.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19fe8493c04586bc16592cfcea46d19006a42698931d05a4f2737f411a13018d
3
+ size 71107
code/Mel-Band-Roformer-Vocal-Model.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a865fe53fffbad2be24d3bb112b6ad3041f0473549a05d067216d7cdb8d4cd9
3
+ size 63179
code/Q-RoFormer.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e104ceda535d1e546a3edf3835300136479a543076b5f87d4757262ce91e96e
3
+ size 4892621
code/RoFormer_pytorch.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daa24cd942ea2c0fa4ff34e64ba31865a4375315c9d36d0f8336386e09c96214
3
+ size 22421664
code/RoPE-PyTorch.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21e69667c9642babfab5650c865eee99e2792cd91976d8f10e4f12ee7ad40fc5
3
+ size 31925
code/Roformer_Simlarity.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb0c1e995b0e0bef93980f948fa7b81a13d3d36b60ad588d80c3e5f38c57b1e4
3
+ size 4745955
code/mel_rof.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24b2b20a05acbcd0600ab3fbb006d2363c7b48e7c25846ae693ebfdcbea044a1
3
+ size 109589
code/roformer-overview.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02d911f9204c73a125226d4ac9ab406eeda987ac9cdfce7c6175d4fa1bf4f127
3
+ size 1250357
code/roformer-separation.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27d9aebfccb5ba3dd10a6dcda623c2764ad9b7c595e8e6fe40fefeb6595009d0
3
+ size 91265
code/rotary-embedding-torch.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff0ce0e3892a6102ccd7c165952f034dea9830b8fb99b64b0ec5a0dbf7cf8d66
3
+ size 205932
code/rotary-embedding-triton.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d42a4e55cb9a7e6ba832cd1302fd9bac16c287e496815b0a231705127dc3bdb1
3
+ size 159750
code/vocal-separation.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d8fb40fb58a47d040dc55031e4c10102fb42402c14e2f538caea94b05fcd5f2
3
+ size 46243
code/vocalback.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e4556544546ff93d5d1289923e7cd14fae164d66c9743099bae6a2e6ea21c5c
3
+ size 49000
models/MelBandRoFormer_comfy/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
models/MelBandRoFormer_comfy/MelBandRoformer_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6119aef379a6c7264e0b37db65ae1e6488b8ca4a00baf56d6d244737b8488226
3
+ size 456479072
models/MelBandRoFormer_comfy/MelBandRoformer_fp32.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdc1643fc59480722b50b223d6b07021eacdee4cb4d72f55dd600ee3c96d6327
3
+ size 912885656
models/MelBandRoFormer_comfy/README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - diffusion-single-file
4
+ - comfyui
5
+ ---
6
+
7
+ Safetensors version of https://huggingface.co/KimberleyJSN/melbandroformer
8
+
9
+ To use in ComfyUI:
10
+
11
+ https://github.com/kijai/ComfyUI-MelBandRoFormer
models/MelBandRoFormer_comfy/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/Kijai/MelBandRoFormer_comfy
models/MelBandRoformer/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
models/MelBandRoformer/MelBandRoformer.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87201f4d31afb5bc79993230fc49446918425574db48c01c405e44f365c7559e
3
+ size 913106900
models/MelBandRoformer/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ---
2
+ license: gpl-3.0
3
+ ---
models/MelBandRoformer/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/KimberleyJSN/melbandroformer
models/audio-separation (melmass)/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
models/audio-separation (melmass)/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
models/audio-separation (melmass)/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:018e839fe81699a392110eadc1d071234f5576d8058ee9d09112071614f46c75
3
+ size 912884152
models/audio-separation (melmass)/denoise_mel_band_roformer_aufr33_sdr_27.9959.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ef342228a72f79bb0efdd3da4c6051d474e39e9313da961afb99e3c326d3584
3
+ size 912884152
models/audio-separation (melmass)/model_vocals_mel_band_roformer_sdr_8.42.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a886ee316a0f6cd82e0fd049c44d56793897ae2a10d23bedd62bf360aa2d71e
3
+ size 134731176
models/audio-separation (melmass)/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/melmass/audio-separation
models/audio-separation-models/Kim-MelRoformer/config_vocals_mel_band_roformer_kj.yaml ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 352800
3
+ dim_f: 1024
4
+ dim_t: 256
5
+ hop_length: 441
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.000
10
+
11
+ model:
12
+ dim: 384
13
+ depth: 6
14
+ stereo: true
15
+ num_stems: 1
16
+ time_transformer_depth: 1
17
+ freq_transformer_depth: 1
18
+ num_bands: 60
19
+ dim_head: 64
20
+ heads: 8
21
+ attn_dropout: 0
22
+ ff_dropout: 0
23
+ flash_attn: True
24
+ dim_freqs_in: 1025
25
+ sample_rate: 44100 # needed for mel filter bank from librosa
26
+ stft_n_fft: 2048
27
+ stft_hop_length: 441
28
+ stft_win_length: 2048
29
+ stft_normalized: False
30
+ mask_estimator_depth: 2
31
+ multi_stft_resolution_loss_weight: 1.0
32
+ multi_stft_resolutions_window_sizes: !!python/tuple
33
+ - 4096
34
+ - 2048
35
+ - 1024
36
+ - 512
37
+ - 256
38
+ multi_stft_hop_size: 147
39
+ multi_stft_normalized: False
40
+
41
+ training:
42
+ batch_size: 4
43
+ gradient_accumulation_steps: 1
44
+ grad_clip: 0
45
+ instruments:
46
+ - vocals
47
+ - other
48
+ lr: 1.0e-05
49
+ patience: 2
50
+ reduce_factor: 0.95
51
+ target_instrument: vocals
52
+ num_epochs: 1000
53
+ num_steps: 1000
54
+ augmentation: false # enable augmentations by audiomentations and pedalboard
55
+ augmentation_type: null
56
+ use_mp3_compress: false # Deprecated
57
+ augmentation_mix: false # Mix several stems of the same type with some probability
58
+ augmentation_loudness: false # randomly change loudness of each stem
59
+ augmentation_loudness_type: 1 # Type 1 or 2
60
+ augmentation_loudness_min: 0
61
+ augmentation_loudness_max: 0
62
+ q: 0.95
63
+ coarse_loss_clip: false
64
+ ema_momentum: 0.999
65
+ optimizer: adam
66
+ other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
67
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
68
+
69
+ inference:
70
+ batch_size: 4
71
+ dim_t: 256
72
+ num_overlap: 2
models/audio-separation-models/mel_band_roformer_karaoke_aufr33_viperx/config_mel_band_roformer_karaoke.yaml ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 352800
3
+ dim_f: 1024
4
+ dim_t: 256
5
+ hop_length: 441
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 000
10
+
11
+ model:
12
+ dim: 384
13
+ depth: 6
14
+ stereo: true
15
+ num_stems: 1
16
+ time_transformer_depth: 1
17
+ freq_transformer_depth: 1
18
+ num_bands: 60
19
+ dim_head: 64
20
+ heads: 8
21
+ attn_dropout: 0
22
+ ff_dropout: 0
23
+ flash_attn: True
24
+ dim_freqs_in: 1025
25
+ sample_rate: 44100 # needed for mel filter bank from librosa
26
+ stft_n_fft: 2048
27
+ stft_hop_length: 441
28
+ stft_win_length: 2048
29
+ stft_normalized: False
30
+ mask_estimator_depth: 2
31
+ multi_stft_resolution_loss_weight: 1.0
32
+ multi_stft_resolutions_window_sizes: !!python/tuple
33
+ - 4096
34
+ - 2048
35
+ - 1024
36
+ - 512
37
+ - 256
38
+ multi_stft_hop_size: 147
39
+ multi_stft_normalized: False
40
+
41
+ training:
42
+ batch_size: 4
43
+ gradient_accumulation_steps: 1
44
+ grad_clip: 0
45
+ instruments:
46
+ - karaoke
47
+ - other
48
+ lr: 1.0e-05
49
+ patience: 2
50
+ reduce_factor: 0.95
51
+ target_instrument: karaoke
52
+ num_epochs: 1000
53
+ num_steps: 2000
54
+ augmentation: false # enable augmentations by audiomentations and pedalboard
55
+ augmentation_type: null
56
+ use_mp3_compress: false # Deprecated
57
+ augmentation_mix: false # Mix several stems of the same type with some probability
58
+ augmentation_loudness: false # randomly change loudness of each stem
59
+ augmentation_loudness_type: 1 # Type 1 or 2
60
+ augmentation_loudness_min: 0
61
+ augmentation_loudness_max: 0
62
+ q: 0.95
63
+ coarse_loss_clip: false
64
+ ema_momentum: 0.999
65
+ optimizer: adam
66
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
67
+ use_amp: true
68
+
69
+ inference:
70
+ batch_size: 1
71
+ dim_t: 256
72
+ num_overlap: 4
models/audio-separation-models/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/shiromiya/audio-separation-models
models/mel_band_roformers/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
models/mel_band_roformers/mel_band_roformer_karaoke.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1de20d459332fe8869aeb01327a31df0032262706e1365114e852dc271779813
3
+ size 913096801
models/mel_band_roformers/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/empz/mel_band_roformers