Mirror lj1995/VoiceConversionWebUI @ b2c8cae96e3b
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- pretrained/G32k.pth +3 -0
- pretrained/G40k.pth +3 -0
- pretrained/G48k.pth +3 -0
- pretrained/f0D48k.pth +3 -0
- pretrained/f0G32k.pth +3 -0
- pretrained/f0G40k.pth +3 -0
- pretrained/f0G48k.pth +3 -0
- pretrained_v0/.gitignore +2 -0
- pretrained_v0/D32k.pth +3 -0
- pretrained_v0/D40k.pth +3 -0
- pretrained_v0/D48k.pth +3 -0
- pretrained_v0/G32k.pth +3 -0
- pretrained_v0/G40k.pth +3 -0
- pretrained_v0/G48k.pth +3 -0
- pretrained_v0/f0D32k.pth +3 -0
- pretrained_v0/f0D40k.pth +3 -0
- pretrained_v0/f0D48k.pth +3 -0
- pretrained_v0/f0G32k.pth +3 -0
- pretrained_v0/f0G40k.pth +3 -0
- pretrained_v0/f0G48k.pth +3 -0
- pretrained_v2/D32k.pth +3 -0
- pretrained_v2/D40k.pth +3 -0
- pretrained_v2/D48k.pth +3 -0
- pretrained_v2/G32k.pth +3 -0
- pretrained_v2/G40k.pth +3 -0
- pretrained_v2/G48k.pth +3 -0
- pretrained_v2/f0D32k.pth +3 -0
- pretrained_v2/f0D40k.pth +3 -0
- pretrained_v2/f0D48k.pth +3 -0
- pretrained_v2/f0G32k.pth +3 -0
- pretrained_v2/f0G40k.pth +3 -0
- pretrained_v2/f0G48k.pth +3 -0
- uvr5_pack/__pycache__/utils.cpython-39.pyc +0 -0
- uvr5_pack/lib_v5/__pycache__/layers_123821KB.cpython-39.pyc +0 -0
- uvr5_pack/lib_v5/__pycache__/model_param_init.cpython-39.pyc +0 -0
- uvr5_pack/lib_v5/__pycache__/nets_61968KB.cpython-39.pyc +0 -0
- uvr5_pack/lib_v5/__pycache__/spec_utils.cpython-39.pyc +0 -0
- uvr5_pack/lib_v5/dataset.py +170 -0
- uvr5_pack/lib_v5/layers.py +116 -0
- uvr5_pack/lib_v5/layers_123812KB .py +116 -0
- uvr5_pack/lib_v5/layers_123821KB.py +116 -0
- uvr5_pack/lib_v5/layers_33966KB.py +122 -0
- uvr5_pack/lib_v5/layers_537227KB.py +122 -0
- uvr5_pack/lib_v5/layers_537238KB.py +122 -0
- uvr5_pack/lib_v5/model_param_init.py +60 -0
- uvr5_pack/lib_v5/modelparams/1band_sr16000_hl512.json +19 -0
- uvr5_pack/lib_v5/modelparams/1band_sr32000_hl512.json +19 -0
- uvr5_pack/lib_v5/modelparams/1band_sr33075_hl384.json +19 -0
- uvr5_pack/lib_v5/modelparams/1band_sr44100_hl1024.json +19 -0
- uvr5_pack/lib_v5/modelparams/1band_sr44100_hl256.json +19 -0
pretrained/G32k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81817645cde7ed2e2d83f23ef883f33dda564924b497e84d792743912eca4c23
|
| 3 |
+
size 72653893
|
pretrained/G40k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e428573bda1124b0ae0ae843fd8dcded6027d3993444790b3e9b0100938b2113
|
| 3 |
+
size 72763063
|
pretrained/G48k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3862a67ea6313e8ffefc05cee6bee656ef3e089442e9ecf4a6618d60721f3e95
|
| 3 |
+
size 72850501
|
pretrained/f0D48k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b84c8bf347ad1e539c842e8f2a4c36ecd9e7fb23c16041189e4877e9b07925c
|
| 3 |
+
size 109978943
|
pretrained/f0G32k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:285f524bf48bb692c76ad7bd0bc654c12bd9e5edeb784dddf7f61a789a608574
|
| 3 |
+
size 72795627
|
pretrained/f0G40k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9115654aeef1995f7dd3c6fc4140bebbef0ca9760bed798105a2380a34299831
|
| 3 |
+
size 72909665
|
pretrained/f0G48k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:78bc9cab27e34bcfc194f93029374d871d8b3e663ddedea32a9709e894cc8fe8
|
| 3 |
+
size 73008619
|
pretrained_v0/.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*
|
| 2 |
+
!.gitignore
|
pretrained_v0/D32k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ab20645829460fdad0d3c44254f1ab53c32cae50c22a66c926ae5aa30abda6f
|
| 3 |
+
size 109978943
|
pretrained_v0/D40k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:547f66dbbcd9023b9051ed244d12ab043ba8a4e854b154cc28761ac7c002909b
|
| 3 |
+
size 109978943
|
pretrained_v0/D48k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8cc013fa60ed9c3f902f5bd99f48c7e3b9352d763d4d3cd6bc241c37b0bfd9ad
|
| 3 |
+
size 109978943
|
pretrained_v0/G32k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90e0458e39efd50beef48ab398c9f88cec2b405d69565e2ad958a25882936aa1
|
| 3 |
+
size 72653893
|
pretrained_v0/G40k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c1e7e413d5b1009777c39cd232ac4d91a81382161d19350fe15bf3f137b01425
|
| 3 |
+
size 72763063
|
pretrained_v0/G48k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4d4a19433aa20617d0712dce2d379620b2f9b80e21b58d6081ba442f203e26d0
|
| 3 |
+
size 72850501
|
pretrained_v0/f0D32k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:294db3087236e2c75260d6179056791c9231245daf5d0485545d9e54c4057c77
|
| 3 |
+
size 109978943
|
pretrained_v0/f0D40k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d4f5a441594b470d67579958b2fd4c6b992852ded28ff9e72eda67abcebe423
|
| 3 |
+
size 109978943
|
pretrained_v0/f0D48k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b84c8bf347ad1e539c842e8f2a4c36ecd9e7fb23c16041189e4877e9b07925c
|
| 3 |
+
size 109978943
|
pretrained_v0/f0G32k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68c08e16e47d2796427b518c9b702edeae5c9d11f8d62608215f10c776d6170b
|
| 3 |
+
size 72795627
|
pretrained_v0/f0G40k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a562c00b615367ed2bd97857346a26ce05b5c231629264f5ca2e19ce5f8d59ea
|
| 3 |
+
size 72909665
|
pretrained_v0/f0G48k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f36a4b1e1c3009ee72e5e6c572e0418d8585094e94b00ca9bd5c2b6c8a6c404
|
| 3 |
+
size 73008619
|
pretrained_v2/D32k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d8043378cc6619083d385f5a045de09b83fb3bf8de45c433ca863b71723ac3ca
|
| 3 |
+
size 142875703
|
pretrained_v2/D40k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:471378e894e7191f89a94eda8288c5947b16bbe0b10c3f1f17efdb7a1d998242
|
| 3 |
+
size 142875703
|
pretrained_v2/D48k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:db01094a93c09868a278e03dafe8bb781bfcc1a5ba8df168c948bf9168c84d82
|
| 3 |
+
size 142875703
|
pretrained_v2/G32k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:869b26a47f75168d6126f64ac39e6de5247017a8658cfd68aca600f7323efb9f
|
| 3 |
+
size 73811639
|
pretrained_v2/G40k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3843da7fde33db1dab176146c70d6c2df06eafe9457f4e3aa10024e9c6a4b69
|
| 3 |
+
size 72959671
|
pretrained_v2/G48k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e2b1581a436d07a76b10b9d38765f64aa02836dc65c7dee1ce4140c11ea158b
|
| 3 |
+
size 75318967
|
pretrained_v2/f0D32k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd7134e7793674c85474d5145d2d982e3c5d8124fc7bb6c20f710ed65808fa8a
|
| 3 |
+
size 142875703
|
pretrained_v2/f0D40k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6b6ab091e70801b28e3f41f335f2fc5f3f35c75b39ae2628d419644ec2b0fa09
|
| 3 |
+
size 142875703
|
pretrained_v2/f0D48k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2269b73c7a4cf34da09aea99274dabf99b2ddb8a42cbfb065fb3c0aa9a2fc748
|
| 3 |
+
size 142875703
|
pretrained_v2/f0G32k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2332611297b8d88c7436de8f17ef5f07a2119353e962cd93cda5806d59a1133d
|
| 3 |
+
size 73950049
|
pretrained_v2/f0G40k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b2c44035e782c4b14ddc0bede9e2f4a724d025cd073f736d4f43708453adfcb
|
| 3 |
+
size 73106273
|
pretrained_v2/f0G48k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5d51f589cc3632d4eae36a315b4179397695042edc01d15312e1bddc2b764a4
|
| 3 |
+
size 75465569
|
uvr5_pack/__pycache__/utils.cpython-39.pyc
ADDED
|
Binary file (6.87 kB). View file
|
|
|
uvr5_pack/lib_v5/__pycache__/layers_123821KB.cpython-39.pyc
ADDED
|
Binary file (4.14 kB). View file
|
|
|
uvr5_pack/lib_v5/__pycache__/model_param_init.cpython-39.pyc
ADDED
|
Binary file (1.63 kB). View file
|
|
|
uvr5_pack/lib_v5/__pycache__/nets_61968KB.cpython-39.pyc
ADDED
|
Binary file (3.46 kB). View file
|
|
|
uvr5_pack/lib_v5/__pycache__/spec_utils.cpython-39.pyc
ADDED
|
Binary file (13.3 kB). View file
|
|
|
uvr5_pack/lib_v5/dataset.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import random
|
| 3 |
+
|
| 4 |
+
import numpy as np
|
| 5 |
+
import torch
|
| 6 |
+
import torch.utils.data
|
| 7 |
+
from tqdm import tqdm
|
| 8 |
+
|
| 9 |
+
from uvr5_pack.lib_v5 import spec_utils
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class VocalRemoverValidationSet(torch.utils.data.Dataset):
|
| 13 |
+
|
| 14 |
+
def __init__(self, patch_list):
|
| 15 |
+
self.patch_list = patch_list
|
| 16 |
+
|
| 17 |
+
def __len__(self):
|
| 18 |
+
return len(self.patch_list)
|
| 19 |
+
|
| 20 |
+
def __getitem__(self, idx):
|
| 21 |
+
path = self.patch_list[idx]
|
| 22 |
+
data = np.load(path)
|
| 23 |
+
|
| 24 |
+
X, y = data['X'], data['y']
|
| 25 |
+
|
| 26 |
+
X_mag = np.abs(X)
|
| 27 |
+
y_mag = np.abs(y)
|
| 28 |
+
|
| 29 |
+
return X_mag, y_mag
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def make_pair(mix_dir, inst_dir):
|
| 33 |
+
input_exts = ['.wav', '.m4a', '.mp3', '.mp4', '.flac']
|
| 34 |
+
|
| 35 |
+
X_list = sorted([
|
| 36 |
+
os.path.join(mix_dir, fname)
|
| 37 |
+
for fname in os.listdir(mix_dir)
|
| 38 |
+
if os.path.splitext(fname)[1] in input_exts])
|
| 39 |
+
y_list = sorted([
|
| 40 |
+
os.path.join(inst_dir, fname)
|
| 41 |
+
for fname in os.listdir(inst_dir)
|
| 42 |
+
if os.path.splitext(fname)[1] in input_exts])
|
| 43 |
+
|
| 44 |
+
filelist = list(zip(X_list, y_list))
|
| 45 |
+
|
| 46 |
+
return filelist
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def train_val_split(dataset_dir, split_mode, val_rate, val_filelist):
|
| 50 |
+
if split_mode == 'random':
|
| 51 |
+
filelist = make_pair(
|
| 52 |
+
os.path.join(dataset_dir, 'mixtures'),
|
| 53 |
+
os.path.join(dataset_dir, 'instruments'))
|
| 54 |
+
|
| 55 |
+
random.shuffle(filelist)
|
| 56 |
+
|
| 57 |
+
if len(val_filelist) == 0:
|
| 58 |
+
val_size = int(len(filelist) * val_rate)
|
| 59 |
+
train_filelist = filelist[:-val_size]
|
| 60 |
+
val_filelist = filelist[-val_size:]
|
| 61 |
+
else:
|
| 62 |
+
train_filelist = [
|
| 63 |
+
pair for pair in filelist
|
| 64 |
+
if list(pair) not in val_filelist]
|
| 65 |
+
elif split_mode == 'subdirs':
|
| 66 |
+
if len(val_filelist) != 0:
|
| 67 |
+
raise ValueError('The `val_filelist` option is not available in `subdirs` mode')
|
| 68 |
+
|
| 69 |
+
train_filelist = make_pair(
|
| 70 |
+
os.path.join(dataset_dir, 'training/mixtures'),
|
| 71 |
+
os.path.join(dataset_dir, 'training/instruments'))
|
| 72 |
+
|
| 73 |
+
val_filelist = make_pair(
|
| 74 |
+
os.path.join(dataset_dir, 'validation/mixtures'),
|
| 75 |
+
os.path.join(dataset_dir, 'validation/instruments'))
|
| 76 |
+
|
| 77 |
+
return train_filelist, val_filelist
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def augment(X, y, reduction_rate, reduction_mask, mixup_rate, mixup_alpha):
|
| 81 |
+
perm = np.random.permutation(len(X))
|
| 82 |
+
for i, idx in enumerate(tqdm(perm)):
|
| 83 |
+
if np.random.uniform() < reduction_rate:
|
| 84 |
+
y[idx] = spec_utils.reduce_vocal_aggressively(X[idx], y[idx], reduction_mask)
|
| 85 |
+
|
| 86 |
+
if np.random.uniform() < 0.5:
|
| 87 |
+
# swap channel
|
| 88 |
+
X[idx] = X[idx, ::-1]
|
| 89 |
+
y[idx] = y[idx, ::-1]
|
| 90 |
+
if np.random.uniform() < 0.02:
|
| 91 |
+
# mono
|
| 92 |
+
X[idx] = X[idx].mean(axis=0, keepdims=True)
|
| 93 |
+
y[idx] = y[idx].mean(axis=0, keepdims=True)
|
| 94 |
+
if np.random.uniform() < 0.02:
|
| 95 |
+
# inst
|
| 96 |
+
X[idx] = y[idx]
|
| 97 |
+
|
| 98 |
+
if np.random.uniform() < mixup_rate and i < len(perm) - 1:
|
| 99 |
+
lam = np.random.beta(mixup_alpha, mixup_alpha)
|
| 100 |
+
X[idx] = lam * X[idx] + (1 - lam) * X[perm[i + 1]]
|
| 101 |
+
y[idx] = lam * y[idx] + (1 - lam) * y[perm[i + 1]]
|
| 102 |
+
|
| 103 |
+
return X, y
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def make_padding(width, cropsize, offset):
|
| 107 |
+
left = offset
|
| 108 |
+
roi_size = cropsize - left * 2
|
| 109 |
+
if roi_size == 0:
|
| 110 |
+
roi_size = cropsize
|
| 111 |
+
right = roi_size - (width % roi_size) + left
|
| 112 |
+
|
| 113 |
+
return left, right, roi_size
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def make_training_set(filelist, cropsize, patches, sr, hop_length, n_fft, offset):
|
| 117 |
+
len_dataset = patches * len(filelist)
|
| 118 |
+
|
| 119 |
+
X_dataset = np.zeros(
|
| 120 |
+
(len_dataset, 2, n_fft // 2 + 1, cropsize), dtype=np.complex64)
|
| 121 |
+
y_dataset = np.zeros(
|
| 122 |
+
(len_dataset, 2, n_fft // 2 + 1, cropsize), dtype=np.complex64)
|
| 123 |
+
|
| 124 |
+
for i, (X_path, y_path) in enumerate(tqdm(filelist)):
|
| 125 |
+
X, y = spec_utils.cache_or_load(X_path, y_path, sr, hop_length, n_fft)
|
| 126 |
+
coef = np.max([np.abs(X).max(), np.abs(y).max()])
|
| 127 |
+
X, y = X / coef, y / coef
|
| 128 |
+
|
| 129 |
+
l, r, roi_size = make_padding(X.shape[2], cropsize, offset)
|
| 130 |
+
X_pad = np.pad(X, ((0, 0), (0, 0), (l, r)), mode='constant')
|
| 131 |
+
y_pad = np.pad(y, ((0, 0), (0, 0), (l, r)), mode='constant')
|
| 132 |
+
|
| 133 |
+
starts = np.random.randint(0, X_pad.shape[2] - cropsize, patches)
|
| 134 |
+
ends = starts + cropsize
|
| 135 |
+
for j in range(patches):
|
| 136 |
+
idx = i * patches + j
|
| 137 |
+
X_dataset[idx] = X_pad[:, :, starts[j]:ends[j]]
|
| 138 |
+
y_dataset[idx] = y_pad[:, :, starts[j]:ends[j]]
|
| 139 |
+
|
| 140 |
+
return X_dataset, y_dataset
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
def make_validation_set(filelist, cropsize, sr, hop_length, n_fft, offset):
|
| 144 |
+
patch_list = []
|
| 145 |
+
patch_dir = 'cs{}_sr{}_hl{}_nf{}_of{}'.format(cropsize, sr, hop_length, n_fft, offset)
|
| 146 |
+
os.makedirs(patch_dir, exist_ok=True)
|
| 147 |
+
|
| 148 |
+
for i, (X_path, y_path) in enumerate(tqdm(filelist)):
|
| 149 |
+
basename = os.path.splitext(os.path.basename(X_path))[0]
|
| 150 |
+
|
| 151 |
+
X, y = spec_utils.cache_or_load(X_path, y_path, sr, hop_length, n_fft)
|
| 152 |
+
coef = np.max([np.abs(X).max(), np.abs(y).max()])
|
| 153 |
+
X, y = X / coef, y / coef
|
| 154 |
+
|
| 155 |
+
l, r, roi_size = make_padding(X.shape[2], cropsize, offset)
|
| 156 |
+
X_pad = np.pad(X, ((0, 0), (0, 0), (l, r)), mode='constant')
|
| 157 |
+
y_pad = np.pad(y, ((0, 0), (0, 0), (l, r)), mode='constant')
|
| 158 |
+
|
| 159 |
+
len_dataset = int(np.ceil(X.shape[2] / roi_size))
|
| 160 |
+
for j in range(len_dataset):
|
| 161 |
+
outpath = os.path.join(patch_dir, '{}_p{}.npz'.format(basename, j))
|
| 162 |
+
start = j * roi_size
|
| 163 |
+
if not os.path.exists(outpath):
|
| 164 |
+
np.savez(
|
| 165 |
+
outpath,
|
| 166 |
+
X=X_pad[:, :, start:start + cropsize],
|
| 167 |
+
y=y_pad[:, :, start:start + cropsize])
|
| 168 |
+
patch_list.append(outpath)
|
| 169 |
+
|
| 170 |
+
return VocalRemoverValidationSet(patch_list)
|
uvr5_pack/lib_v5/layers.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from torch import nn
|
| 3 |
+
import torch.nn.functional as F
|
| 4 |
+
|
| 5 |
+
from uvr5_pack.lib_v5 import spec_utils
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class Conv2DBNActiv(nn.Module):
|
| 9 |
+
|
| 10 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
|
| 11 |
+
super(Conv2DBNActiv, self).__init__()
|
| 12 |
+
self.conv = nn.Sequential(
|
| 13 |
+
nn.Conv2d(
|
| 14 |
+
nin, nout,
|
| 15 |
+
kernel_size=ksize,
|
| 16 |
+
stride=stride,
|
| 17 |
+
padding=pad,
|
| 18 |
+
dilation=dilation,
|
| 19 |
+
bias=False),
|
| 20 |
+
nn.BatchNorm2d(nout),
|
| 21 |
+
activ()
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
def __call__(self, x):
|
| 25 |
+
return self.conv(x)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class SeperableConv2DBNActiv(nn.Module):
|
| 29 |
+
|
| 30 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
|
| 31 |
+
super(SeperableConv2DBNActiv, self).__init__()
|
| 32 |
+
self.conv = nn.Sequential(
|
| 33 |
+
nn.Conv2d(
|
| 34 |
+
nin, nin,
|
| 35 |
+
kernel_size=ksize,
|
| 36 |
+
stride=stride,
|
| 37 |
+
padding=pad,
|
| 38 |
+
dilation=dilation,
|
| 39 |
+
groups=nin,
|
| 40 |
+
bias=False),
|
| 41 |
+
nn.Conv2d(
|
| 42 |
+
nin, nout,
|
| 43 |
+
kernel_size=1,
|
| 44 |
+
bias=False),
|
| 45 |
+
nn.BatchNorm2d(nout),
|
| 46 |
+
activ()
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
def __call__(self, x):
|
| 50 |
+
return self.conv(x)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
class Encoder(nn.Module):
|
| 54 |
+
|
| 55 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
|
| 56 |
+
super(Encoder, self).__init__()
|
| 57 |
+
self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
| 58 |
+
self.conv2 = Conv2DBNActiv(nout, nout, ksize, stride, pad, activ=activ)
|
| 59 |
+
|
| 60 |
+
def __call__(self, x):
|
| 61 |
+
skip = self.conv1(x)
|
| 62 |
+
h = self.conv2(skip)
|
| 63 |
+
|
| 64 |
+
return h, skip
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
class Decoder(nn.Module):
|
| 68 |
+
|
| 69 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
|
| 70 |
+
super(Decoder, self).__init__()
|
| 71 |
+
self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
| 72 |
+
self.dropout = nn.Dropout2d(0.1) if dropout else None
|
| 73 |
+
|
| 74 |
+
def __call__(self, x, skip=None):
|
| 75 |
+
x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
|
| 76 |
+
if skip is not None:
|
| 77 |
+
skip = spec_utils.crop_center(skip, x)
|
| 78 |
+
x = torch.cat([x, skip], dim=1)
|
| 79 |
+
h = self.conv(x)
|
| 80 |
+
|
| 81 |
+
if self.dropout is not None:
|
| 82 |
+
h = self.dropout(h)
|
| 83 |
+
|
| 84 |
+
return h
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
class ASPPModule(nn.Module):
|
| 88 |
+
|
| 89 |
+
def __init__(self, nin, nout, dilations=(4, 8, 16), activ=nn.ReLU):
|
| 90 |
+
super(ASPPModule, self).__init__()
|
| 91 |
+
self.conv1 = nn.Sequential(
|
| 92 |
+
nn.AdaptiveAvgPool2d((1, None)),
|
| 93 |
+
Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
|
| 94 |
+
)
|
| 95 |
+
self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
|
| 96 |
+
self.conv3 = SeperableConv2DBNActiv(
|
| 97 |
+
nin, nin, 3, 1, dilations[0], dilations[0], activ=activ)
|
| 98 |
+
self.conv4 = SeperableConv2DBNActiv(
|
| 99 |
+
nin, nin, 3, 1, dilations[1], dilations[1], activ=activ)
|
| 100 |
+
self.conv5 = SeperableConv2DBNActiv(
|
| 101 |
+
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
|
| 102 |
+
self.bottleneck = nn.Sequential(
|
| 103 |
+
Conv2DBNActiv(nin * 5, nout, 1, 1, 0, activ=activ),
|
| 104 |
+
nn.Dropout2d(0.1)
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
def forward(self, x):
|
| 108 |
+
_, _, h, w = x.size()
|
| 109 |
+
feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
|
| 110 |
+
feat2 = self.conv2(x)
|
| 111 |
+
feat3 = self.conv3(x)
|
| 112 |
+
feat4 = self.conv4(x)
|
| 113 |
+
feat5 = self.conv5(x)
|
| 114 |
+
out = torch.cat((feat1, feat2, feat3, feat4, feat5), dim=1)
|
| 115 |
+
bottle = self.bottleneck(out)
|
| 116 |
+
return bottle
|
uvr5_pack/lib_v5/layers_123812KB .py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from torch import nn
|
| 3 |
+
import torch.nn.functional as F
|
| 4 |
+
|
| 5 |
+
from uvr5_pack.lib_v5 import spec_utils
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class Conv2DBNActiv(nn.Module):
|
| 9 |
+
|
| 10 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
|
| 11 |
+
super(Conv2DBNActiv, self).__init__()
|
| 12 |
+
self.conv = nn.Sequential(
|
| 13 |
+
nn.Conv2d(
|
| 14 |
+
nin, nout,
|
| 15 |
+
kernel_size=ksize,
|
| 16 |
+
stride=stride,
|
| 17 |
+
padding=pad,
|
| 18 |
+
dilation=dilation,
|
| 19 |
+
bias=False),
|
| 20 |
+
nn.BatchNorm2d(nout),
|
| 21 |
+
activ()
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
def __call__(self, x):
|
| 25 |
+
return self.conv(x)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class SeperableConv2DBNActiv(nn.Module):
|
| 29 |
+
|
| 30 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
|
| 31 |
+
super(SeperableConv2DBNActiv, self).__init__()
|
| 32 |
+
self.conv = nn.Sequential(
|
| 33 |
+
nn.Conv2d(
|
| 34 |
+
nin, nin,
|
| 35 |
+
kernel_size=ksize,
|
| 36 |
+
stride=stride,
|
| 37 |
+
padding=pad,
|
| 38 |
+
dilation=dilation,
|
| 39 |
+
groups=nin,
|
| 40 |
+
bias=False),
|
| 41 |
+
nn.Conv2d(
|
| 42 |
+
nin, nout,
|
| 43 |
+
kernel_size=1,
|
| 44 |
+
bias=False),
|
| 45 |
+
nn.BatchNorm2d(nout),
|
| 46 |
+
activ()
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
def __call__(self, x):
|
| 50 |
+
return self.conv(x)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
class Encoder(nn.Module):
|
| 54 |
+
|
| 55 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
|
| 56 |
+
super(Encoder, self).__init__()
|
| 57 |
+
self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
| 58 |
+
self.conv2 = Conv2DBNActiv(nout, nout, ksize, stride, pad, activ=activ)
|
| 59 |
+
|
| 60 |
+
def __call__(self, x):
|
| 61 |
+
skip = self.conv1(x)
|
| 62 |
+
h = self.conv2(skip)
|
| 63 |
+
|
| 64 |
+
return h, skip
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
class Decoder(nn.Module):
|
| 68 |
+
|
| 69 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
|
| 70 |
+
super(Decoder, self).__init__()
|
| 71 |
+
self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
| 72 |
+
self.dropout = nn.Dropout2d(0.1) if dropout else None
|
| 73 |
+
|
| 74 |
+
def __call__(self, x, skip=None):
|
| 75 |
+
x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
|
| 76 |
+
if skip is not None:
|
| 77 |
+
skip = spec_utils.crop_center(skip, x)
|
| 78 |
+
x = torch.cat([x, skip], dim=1)
|
| 79 |
+
h = self.conv(x)
|
| 80 |
+
|
| 81 |
+
if self.dropout is not None:
|
| 82 |
+
h = self.dropout(h)
|
| 83 |
+
|
| 84 |
+
return h
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
class ASPPModule(nn.Module):
|
| 88 |
+
|
| 89 |
+
def __init__(self, nin, nout, dilations=(4, 8, 16), activ=nn.ReLU):
|
| 90 |
+
super(ASPPModule, self).__init__()
|
| 91 |
+
self.conv1 = nn.Sequential(
|
| 92 |
+
nn.AdaptiveAvgPool2d((1, None)),
|
| 93 |
+
Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
|
| 94 |
+
)
|
| 95 |
+
self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
|
| 96 |
+
self.conv3 = SeperableConv2DBNActiv(
|
| 97 |
+
nin, nin, 3, 1, dilations[0], dilations[0], activ=activ)
|
| 98 |
+
self.conv4 = SeperableConv2DBNActiv(
|
| 99 |
+
nin, nin, 3, 1, dilations[1], dilations[1], activ=activ)
|
| 100 |
+
self.conv5 = SeperableConv2DBNActiv(
|
| 101 |
+
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
|
| 102 |
+
self.bottleneck = nn.Sequential(
|
| 103 |
+
Conv2DBNActiv(nin * 5, nout, 1, 1, 0, activ=activ),
|
| 104 |
+
nn.Dropout2d(0.1)
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
def forward(self, x):
|
| 108 |
+
_, _, h, w = x.size()
|
| 109 |
+
feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
|
| 110 |
+
feat2 = self.conv2(x)
|
| 111 |
+
feat3 = self.conv3(x)
|
| 112 |
+
feat4 = self.conv4(x)
|
| 113 |
+
feat5 = self.conv5(x)
|
| 114 |
+
out = torch.cat((feat1, feat2, feat3, feat4, feat5), dim=1)
|
| 115 |
+
bottle = self.bottleneck(out)
|
| 116 |
+
return bottle
|
uvr5_pack/lib_v5/layers_123821KB.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from torch import nn
|
| 3 |
+
import torch.nn.functional as F
|
| 4 |
+
|
| 5 |
+
from uvr5_pack.lib_v5 import spec_utils
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class Conv2DBNActiv(nn.Module):
|
| 9 |
+
|
| 10 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
|
| 11 |
+
super(Conv2DBNActiv, self).__init__()
|
| 12 |
+
self.conv = nn.Sequential(
|
| 13 |
+
nn.Conv2d(
|
| 14 |
+
nin, nout,
|
| 15 |
+
kernel_size=ksize,
|
| 16 |
+
stride=stride,
|
| 17 |
+
padding=pad,
|
| 18 |
+
dilation=dilation,
|
| 19 |
+
bias=False),
|
| 20 |
+
nn.BatchNorm2d(nout),
|
| 21 |
+
activ()
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
def __call__(self, x):
|
| 25 |
+
return self.conv(x)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class SeperableConv2DBNActiv(nn.Module):
|
| 29 |
+
|
| 30 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
|
| 31 |
+
super(SeperableConv2DBNActiv, self).__init__()
|
| 32 |
+
self.conv = nn.Sequential(
|
| 33 |
+
nn.Conv2d(
|
| 34 |
+
nin, nin,
|
| 35 |
+
kernel_size=ksize,
|
| 36 |
+
stride=stride,
|
| 37 |
+
padding=pad,
|
| 38 |
+
dilation=dilation,
|
| 39 |
+
groups=nin,
|
| 40 |
+
bias=False),
|
| 41 |
+
nn.Conv2d(
|
| 42 |
+
nin, nout,
|
| 43 |
+
kernel_size=1,
|
| 44 |
+
bias=False),
|
| 45 |
+
nn.BatchNorm2d(nout),
|
| 46 |
+
activ()
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
def __call__(self, x):
|
| 50 |
+
return self.conv(x)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
class Encoder(nn.Module):
|
| 54 |
+
|
| 55 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
|
| 56 |
+
super(Encoder, self).__init__()
|
| 57 |
+
self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
| 58 |
+
self.conv2 = Conv2DBNActiv(nout, nout, ksize, stride, pad, activ=activ)
|
| 59 |
+
|
| 60 |
+
def __call__(self, x):
|
| 61 |
+
skip = self.conv1(x)
|
| 62 |
+
h = self.conv2(skip)
|
| 63 |
+
|
| 64 |
+
return h, skip
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
class Decoder(nn.Module):
|
| 68 |
+
|
| 69 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
|
| 70 |
+
super(Decoder, self).__init__()
|
| 71 |
+
self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
| 72 |
+
self.dropout = nn.Dropout2d(0.1) if dropout else None
|
| 73 |
+
|
| 74 |
+
def __call__(self, x, skip=None):
|
| 75 |
+
x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
|
| 76 |
+
if skip is not None:
|
| 77 |
+
skip = spec_utils.crop_center(skip, x)
|
| 78 |
+
x = torch.cat([x, skip], dim=1)
|
| 79 |
+
h = self.conv(x)
|
| 80 |
+
|
| 81 |
+
if self.dropout is not None:
|
| 82 |
+
h = self.dropout(h)
|
| 83 |
+
|
| 84 |
+
return h
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
class ASPPModule(nn.Module):
|
| 88 |
+
|
| 89 |
+
def __init__(self, nin, nout, dilations=(4, 8, 16), activ=nn.ReLU):
|
| 90 |
+
super(ASPPModule, self).__init__()
|
| 91 |
+
self.conv1 = nn.Sequential(
|
| 92 |
+
nn.AdaptiveAvgPool2d((1, None)),
|
| 93 |
+
Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
|
| 94 |
+
)
|
| 95 |
+
self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
|
| 96 |
+
self.conv3 = SeperableConv2DBNActiv(
|
| 97 |
+
nin, nin, 3, 1, dilations[0], dilations[0], activ=activ)
|
| 98 |
+
self.conv4 = SeperableConv2DBNActiv(
|
| 99 |
+
nin, nin, 3, 1, dilations[1], dilations[1], activ=activ)
|
| 100 |
+
self.conv5 = SeperableConv2DBNActiv(
|
| 101 |
+
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
|
| 102 |
+
self.bottleneck = nn.Sequential(
|
| 103 |
+
Conv2DBNActiv(nin * 5, nout, 1, 1, 0, activ=activ),
|
| 104 |
+
nn.Dropout2d(0.1)
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
def forward(self, x):
|
| 108 |
+
_, _, h, w = x.size()
|
| 109 |
+
feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
|
| 110 |
+
feat2 = self.conv2(x)
|
| 111 |
+
feat3 = self.conv3(x)
|
| 112 |
+
feat4 = self.conv4(x)
|
| 113 |
+
feat5 = self.conv5(x)
|
| 114 |
+
out = torch.cat((feat1, feat2, feat3, feat4, feat5), dim=1)
|
| 115 |
+
bottle = self.bottleneck(out)
|
| 116 |
+
return bottle
|
uvr5_pack/lib_v5/layers_33966KB.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from torch import nn
|
| 3 |
+
import torch.nn.functional as F
|
| 4 |
+
|
| 5 |
+
from uvr5_pack.lib_v5 import spec_utils
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class Conv2DBNActiv(nn.Module):
|
| 9 |
+
|
| 10 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
|
| 11 |
+
super(Conv2DBNActiv, self).__init__()
|
| 12 |
+
self.conv = nn.Sequential(
|
| 13 |
+
nn.Conv2d(
|
| 14 |
+
nin, nout,
|
| 15 |
+
kernel_size=ksize,
|
| 16 |
+
stride=stride,
|
| 17 |
+
padding=pad,
|
| 18 |
+
dilation=dilation,
|
| 19 |
+
bias=False),
|
| 20 |
+
nn.BatchNorm2d(nout),
|
| 21 |
+
activ()
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
def __call__(self, x):
|
| 25 |
+
return self.conv(x)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class SeperableConv2DBNActiv(nn.Module):
|
| 29 |
+
|
| 30 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
|
| 31 |
+
super(SeperableConv2DBNActiv, self).__init__()
|
| 32 |
+
self.conv = nn.Sequential(
|
| 33 |
+
nn.Conv2d(
|
| 34 |
+
nin, nin,
|
| 35 |
+
kernel_size=ksize,
|
| 36 |
+
stride=stride,
|
| 37 |
+
padding=pad,
|
| 38 |
+
dilation=dilation,
|
| 39 |
+
groups=nin,
|
| 40 |
+
bias=False),
|
| 41 |
+
nn.Conv2d(
|
| 42 |
+
nin, nout,
|
| 43 |
+
kernel_size=1,
|
| 44 |
+
bias=False),
|
| 45 |
+
nn.BatchNorm2d(nout),
|
| 46 |
+
activ()
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
def __call__(self, x):
|
| 50 |
+
return self.conv(x)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
class Encoder(nn.Module):
|
| 54 |
+
|
| 55 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
|
| 56 |
+
super(Encoder, self).__init__()
|
| 57 |
+
self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
| 58 |
+
self.conv2 = Conv2DBNActiv(nout, nout, ksize, stride, pad, activ=activ)
|
| 59 |
+
|
| 60 |
+
def __call__(self, x):
|
| 61 |
+
skip = self.conv1(x)
|
| 62 |
+
h = self.conv2(skip)
|
| 63 |
+
|
| 64 |
+
return h, skip
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
class Decoder(nn.Module):
|
| 68 |
+
|
| 69 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
|
| 70 |
+
super(Decoder, self).__init__()
|
| 71 |
+
self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
| 72 |
+
self.dropout = nn.Dropout2d(0.1) if dropout else None
|
| 73 |
+
|
| 74 |
+
def __call__(self, x, skip=None):
|
| 75 |
+
x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
|
| 76 |
+
if skip is not None:
|
| 77 |
+
skip = spec_utils.crop_center(skip, x)
|
| 78 |
+
x = torch.cat([x, skip], dim=1)
|
| 79 |
+
h = self.conv(x)
|
| 80 |
+
|
| 81 |
+
if self.dropout is not None:
|
| 82 |
+
h = self.dropout(h)
|
| 83 |
+
|
| 84 |
+
return h
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
class ASPPModule(nn.Module):
|
| 88 |
+
|
| 89 |
+
def __init__(self, nin, nout, dilations=(4, 8, 16, 32, 64), activ=nn.ReLU):
|
| 90 |
+
super(ASPPModule, self).__init__()
|
| 91 |
+
self.conv1 = nn.Sequential(
|
| 92 |
+
nn.AdaptiveAvgPool2d((1, None)),
|
| 93 |
+
Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
|
| 94 |
+
)
|
| 95 |
+
self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
|
| 96 |
+
self.conv3 = SeperableConv2DBNActiv(
|
| 97 |
+
nin, nin, 3, 1, dilations[0], dilations[0], activ=activ)
|
| 98 |
+
self.conv4 = SeperableConv2DBNActiv(
|
| 99 |
+
nin, nin, 3, 1, dilations[1], dilations[1], activ=activ)
|
| 100 |
+
self.conv5 = SeperableConv2DBNActiv(
|
| 101 |
+
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
|
| 102 |
+
self.conv6 = SeperableConv2DBNActiv(
|
| 103 |
+
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
|
| 104 |
+
self.conv7 = SeperableConv2DBNActiv(
|
| 105 |
+
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
|
| 106 |
+
self.bottleneck = nn.Sequential(
|
| 107 |
+
Conv2DBNActiv(nin * 7, nout, 1, 1, 0, activ=activ),
|
| 108 |
+
nn.Dropout2d(0.1)
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
def forward(self, x):
|
| 112 |
+
_, _, h, w = x.size()
|
| 113 |
+
feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
|
| 114 |
+
feat2 = self.conv2(x)
|
| 115 |
+
feat3 = self.conv3(x)
|
| 116 |
+
feat4 = self.conv4(x)
|
| 117 |
+
feat5 = self.conv5(x)
|
| 118 |
+
feat6 = self.conv6(x)
|
| 119 |
+
feat7 = self.conv7(x)
|
| 120 |
+
out = torch.cat((feat1, feat2, feat3, feat4, feat5, feat6, feat7), dim=1)
|
| 121 |
+
bottle = self.bottleneck(out)
|
| 122 |
+
return bottle
|
uvr5_pack/lib_v5/layers_537227KB.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from torch import nn
|
| 3 |
+
import torch.nn.functional as F
|
| 4 |
+
|
| 5 |
+
from uvr5_pack.lib_v5 import spec_utils
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class Conv2DBNActiv(nn.Module):
|
| 9 |
+
|
| 10 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
|
| 11 |
+
super(Conv2DBNActiv, self).__init__()
|
| 12 |
+
self.conv = nn.Sequential(
|
| 13 |
+
nn.Conv2d(
|
| 14 |
+
nin, nout,
|
| 15 |
+
kernel_size=ksize,
|
| 16 |
+
stride=stride,
|
| 17 |
+
padding=pad,
|
| 18 |
+
dilation=dilation,
|
| 19 |
+
bias=False),
|
| 20 |
+
nn.BatchNorm2d(nout),
|
| 21 |
+
activ()
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
def __call__(self, x):
|
| 25 |
+
return self.conv(x)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class SeperableConv2DBNActiv(nn.Module):
|
| 29 |
+
|
| 30 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
|
| 31 |
+
super(SeperableConv2DBNActiv, self).__init__()
|
| 32 |
+
self.conv = nn.Sequential(
|
| 33 |
+
nn.Conv2d(
|
| 34 |
+
nin, nin,
|
| 35 |
+
kernel_size=ksize,
|
| 36 |
+
stride=stride,
|
| 37 |
+
padding=pad,
|
| 38 |
+
dilation=dilation,
|
| 39 |
+
groups=nin,
|
| 40 |
+
bias=False),
|
| 41 |
+
nn.Conv2d(
|
| 42 |
+
nin, nout,
|
| 43 |
+
kernel_size=1,
|
| 44 |
+
bias=False),
|
| 45 |
+
nn.BatchNorm2d(nout),
|
| 46 |
+
activ()
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
def __call__(self, x):
|
| 50 |
+
return self.conv(x)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
class Encoder(nn.Module):
|
| 54 |
+
|
| 55 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
|
| 56 |
+
super(Encoder, self).__init__()
|
| 57 |
+
self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
| 58 |
+
self.conv2 = Conv2DBNActiv(nout, nout, ksize, stride, pad, activ=activ)
|
| 59 |
+
|
| 60 |
+
def __call__(self, x):
|
| 61 |
+
skip = self.conv1(x)
|
| 62 |
+
h = self.conv2(skip)
|
| 63 |
+
|
| 64 |
+
return h, skip
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
class Decoder(nn.Module):
|
| 68 |
+
|
| 69 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
|
| 70 |
+
super(Decoder, self).__init__()
|
| 71 |
+
self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
| 72 |
+
self.dropout = nn.Dropout2d(0.1) if dropout else None
|
| 73 |
+
|
| 74 |
+
def __call__(self, x, skip=None):
|
| 75 |
+
x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
|
| 76 |
+
if skip is not None:
|
| 77 |
+
skip = spec_utils.crop_center(skip, x)
|
| 78 |
+
x = torch.cat([x, skip], dim=1)
|
| 79 |
+
h = self.conv(x)
|
| 80 |
+
|
| 81 |
+
if self.dropout is not None:
|
| 82 |
+
h = self.dropout(h)
|
| 83 |
+
|
| 84 |
+
return h
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
class ASPPModule(nn.Module):
|
| 88 |
+
|
| 89 |
+
def __init__(self, nin, nout, dilations=(4, 8, 16, 32, 64), activ=nn.ReLU):
|
| 90 |
+
super(ASPPModule, self).__init__()
|
| 91 |
+
self.conv1 = nn.Sequential(
|
| 92 |
+
nn.AdaptiveAvgPool2d((1, None)),
|
| 93 |
+
Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
|
| 94 |
+
)
|
| 95 |
+
self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
|
| 96 |
+
self.conv3 = SeperableConv2DBNActiv(
|
| 97 |
+
nin, nin, 3, 1, dilations[0], dilations[0], activ=activ)
|
| 98 |
+
self.conv4 = SeperableConv2DBNActiv(
|
| 99 |
+
nin, nin, 3, 1, dilations[1], dilations[1], activ=activ)
|
| 100 |
+
self.conv5 = SeperableConv2DBNActiv(
|
| 101 |
+
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
|
| 102 |
+
self.conv6 = SeperableConv2DBNActiv(
|
| 103 |
+
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
|
| 104 |
+
self.conv7 = SeperableConv2DBNActiv(
|
| 105 |
+
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
|
| 106 |
+
self.bottleneck = nn.Sequential(
|
| 107 |
+
Conv2DBNActiv(nin * 7, nout, 1, 1, 0, activ=activ),
|
| 108 |
+
nn.Dropout2d(0.1)
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
def forward(self, x):
|
| 112 |
+
_, _, h, w = x.size()
|
| 113 |
+
feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
|
| 114 |
+
feat2 = self.conv2(x)
|
| 115 |
+
feat3 = self.conv3(x)
|
| 116 |
+
feat4 = self.conv4(x)
|
| 117 |
+
feat5 = self.conv5(x)
|
| 118 |
+
feat6 = self.conv6(x)
|
| 119 |
+
feat7 = self.conv7(x)
|
| 120 |
+
out = torch.cat((feat1, feat2, feat3, feat4, feat5, feat6, feat7), dim=1)
|
| 121 |
+
bottle = self.bottleneck(out)
|
| 122 |
+
return bottle
|
uvr5_pack/lib_v5/layers_537238KB.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from torch import nn
|
| 3 |
+
import torch.nn.functional as F
|
| 4 |
+
|
| 5 |
+
from uvr5_pack.lib_v5 import spec_utils
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class Conv2DBNActiv(nn.Module):
|
| 9 |
+
|
| 10 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
|
| 11 |
+
super(Conv2DBNActiv, self).__init__()
|
| 12 |
+
self.conv = nn.Sequential(
|
| 13 |
+
nn.Conv2d(
|
| 14 |
+
nin, nout,
|
| 15 |
+
kernel_size=ksize,
|
| 16 |
+
stride=stride,
|
| 17 |
+
padding=pad,
|
| 18 |
+
dilation=dilation,
|
| 19 |
+
bias=False),
|
| 20 |
+
nn.BatchNorm2d(nout),
|
| 21 |
+
activ()
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
def __call__(self, x):
|
| 25 |
+
return self.conv(x)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class SeperableConv2DBNActiv(nn.Module):
|
| 29 |
+
|
| 30 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
|
| 31 |
+
super(SeperableConv2DBNActiv, self).__init__()
|
| 32 |
+
self.conv = nn.Sequential(
|
| 33 |
+
nn.Conv2d(
|
| 34 |
+
nin, nin,
|
| 35 |
+
kernel_size=ksize,
|
| 36 |
+
stride=stride,
|
| 37 |
+
padding=pad,
|
| 38 |
+
dilation=dilation,
|
| 39 |
+
groups=nin,
|
| 40 |
+
bias=False),
|
| 41 |
+
nn.Conv2d(
|
| 42 |
+
nin, nout,
|
| 43 |
+
kernel_size=1,
|
| 44 |
+
bias=False),
|
| 45 |
+
nn.BatchNorm2d(nout),
|
| 46 |
+
activ()
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
def __call__(self, x):
|
| 50 |
+
return self.conv(x)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
class Encoder(nn.Module):
|
| 54 |
+
|
| 55 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
|
| 56 |
+
super(Encoder, self).__init__()
|
| 57 |
+
self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
| 58 |
+
self.conv2 = Conv2DBNActiv(nout, nout, ksize, stride, pad, activ=activ)
|
| 59 |
+
|
| 60 |
+
def __call__(self, x):
|
| 61 |
+
skip = self.conv1(x)
|
| 62 |
+
h = self.conv2(skip)
|
| 63 |
+
|
| 64 |
+
return h, skip
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
class Decoder(nn.Module):
|
| 68 |
+
|
| 69 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
|
| 70 |
+
super(Decoder, self).__init__()
|
| 71 |
+
self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
| 72 |
+
self.dropout = nn.Dropout2d(0.1) if dropout else None
|
| 73 |
+
|
| 74 |
+
def __call__(self, x, skip=None):
|
| 75 |
+
x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
|
| 76 |
+
if skip is not None:
|
| 77 |
+
skip = spec_utils.crop_center(skip, x)
|
| 78 |
+
x = torch.cat([x, skip], dim=1)
|
| 79 |
+
h = self.conv(x)
|
| 80 |
+
|
| 81 |
+
if self.dropout is not None:
|
| 82 |
+
h = self.dropout(h)
|
| 83 |
+
|
| 84 |
+
return h
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
class ASPPModule(nn.Module):
|
| 88 |
+
|
| 89 |
+
def __init__(self, nin, nout, dilations=(4, 8, 16, 32, 64), activ=nn.ReLU):
|
| 90 |
+
super(ASPPModule, self).__init__()
|
| 91 |
+
self.conv1 = nn.Sequential(
|
| 92 |
+
nn.AdaptiveAvgPool2d((1, None)),
|
| 93 |
+
Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
|
| 94 |
+
)
|
| 95 |
+
self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
|
| 96 |
+
self.conv3 = SeperableConv2DBNActiv(
|
| 97 |
+
nin, nin, 3, 1, dilations[0], dilations[0], activ=activ)
|
| 98 |
+
self.conv4 = SeperableConv2DBNActiv(
|
| 99 |
+
nin, nin, 3, 1, dilations[1], dilations[1], activ=activ)
|
| 100 |
+
self.conv5 = SeperableConv2DBNActiv(
|
| 101 |
+
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
|
| 102 |
+
self.conv6 = SeperableConv2DBNActiv(
|
| 103 |
+
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
|
| 104 |
+
self.conv7 = SeperableConv2DBNActiv(
|
| 105 |
+
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
|
| 106 |
+
self.bottleneck = nn.Sequential(
|
| 107 |
+
Conv2DBNActiv(nin * 7, nout, 1, 1, 0, activ=activ),
|
| 108 |
+
nn.Dropout2d(0.1)
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
def forward(self, x):
|
| 112 |
+
_, _, h, w = x.size()
|
| 113 |
+
feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
|
| 114 |
+
feat2 = self.conv2(x)
|
| 115 |
+
feat3 = self.conv3(x)
|
| 116 |
+
feat4 = self.conv4(x)
|
| 117 |
+
feat5 = self.conv5(x)
|
| 118 |
+
feat6 = self.conv6(x)
|
| 119 |
+
feat7 = self.conv7(x)
|
| 120 |
+
out = torch.cat((feat1, feat2, feat3, feat4, feat5, feat6, feat7), dim=1)
|
| 121 |
+
bottle = self.bottleneck(out)
|
| 122 |
+
return bottle
|
uvr5_pack/lib_v5/model_param_init.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
import pathlib
|
| 4 |
+
|
| 5 |
+
default_param = {}
|
| 6 |
+
default_param['bins'] = 768
|
| 7 |
+
default_param['unstable_bins'] = 9 # training only
|
| 8 |
+
default_param['reduction_bins'] = 762 # training only
|
| 9 |
+
default_param['sr'] = 44100
|
| 10 |
+
default_param['pre_filter_start'] = 757
|
| 11 |
+
default_param['pre_filter_stop'] = 768
|
| 12 |
+
default_param['band'] = {}
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
default_param['band'][1] = {
|
| 16 |
+
'sr': 11025,
|
| 17 |
+
'hl': 128,
|
| 18 |
+
'n_fft': 960,
|
| 19 |
+
'crop_start': 0,
|
| 20 |
+
'crop_stop': 245,
|
| 21 |
+
'lpf_start': 61, # inference only
|
| 22 |
+
'res_type': 'polyphase'
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
default_param['band'][2] = {
|
| 26 |
+
'sr': 44100,
|
| 27 |
+
'hl': 512,
|
| 28 |
+
'n_fft': 1536,
|
| 29 |
+
'crop_start': 24,
|
| 30 |
+
'crop_stop': 547,
|
| 31 |
+
'hpf_start': 81, # inference only
|
| 32 |
+
'res_type': 'sinc_best'
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def int_keys(d):
|
| 37 |
+
r = {}
|
| 38 |
+
for k, v in d:
|
| 39 |
+
if k.isdigit():
|
| 40 |
+
k = int(k)
|
| 41 |
+
r[k] = v
|
| 42 |
+
return r
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
class ModelParameters(object):
|
| 46 |
+
def __init__(self, config_path=''):
|
| 47 |
+
if '.pth' == pathlib.Path(config_path).suffix:
|
| 48 |
+
import zipfile
|
| 49 |
+
|
| 50 |
+
with zipfile.ZipFile(config_path, 'r') as zip:
|
| 51 |
+
self.param = json.loads(zip.read('param.json'), object_pairs_hook=int_keys)
|
| 52 |
+
elif '.json' == pathlib.Path(config_path).suffix:
|
| 53 |
+
with open(config_path, 'r') as f:
|
| 54 |
+
self.param = json.loads(f.read(), object_pairs_hook=int_keys)
|
| 55 |
+
else:
|
| 56 |
+
self.param = default_param
|
| 57 |
+
|
| 58 |
+
for k in ['mid_side', 'mid_side_b', 'mid_side_b2', 'stereo_w', 'stereo_n', 'reverse']:
|
| 59 |
+
if not k in self.param:
|
| 60 |
+
self.param[k] = False
|
uvr5_pack/lib_v5/modelparams/1band_sr16000_hl512.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bins": 1024,
|
| 3 |
+
"unstable_bins": 0,
|
| 4 |
+
"reduction_bins": 0,
|
| 5 |
+
"band": {
|
| 6 |
+
"1": {
|
| 7 |
+
"sr": 16000,
|
| 8 |
+
"hl": 512,
|
| 9 |
+
"n_fft": 2048,
|
| 10 |
+
"crop_start": 0,
|
| 11 |
+
"crop_stop": 1024,
|
| 12 |
+
"hpf_start": -1,
|
| 13 |
+
"res_type": "sinc_best"
|
| 14 |
+
}
|
| 15 |
+
},
|
| 16 |
+
"sr": 16000,
|
| 17 |
+
"pre_filter_start": 1023,
|
| 18 |
+
"pre_filter_stop": 1024
|
| 19 |
+
}
|
uvr5_pack/lib_v5/modelparams/1band_sr32000_hl512.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bins": 1024,
|
| 3 |
+
"unstable_bins": 0,
|
| 4 |
+
"reduction_bins": 0,
|
| 5 |
+
"band": {
|
| 6 |
+
"1": {
|
| 7 |
+
"sr": 32000,
|
| 8 |
+
"hl": 512,
|
| 9 |
+
"n_fft": 2048,
|
| 10 |
+
"crop_start": 0,
|
| 11 |
+
"crop_stop": 1024,
|
| 12 |
+
"hpf_start": -1,
|
| 13 |
+
"res_type": "kaiser_fast"
|
| 14 |
+
}
|
| 15 |
+
},
|
| 16 |
+
"sr": 32000,
|
| 17 |
+
"pre_filter_start": 1000,
|
| 18 |
+
"pre_filter_stop": 1021
|
| 19 |
+
}
|
uvr5_pack/lib_v5/modelparams/1band_sr33075_hl384.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bins": 1024,
|
| 3 |
+
"unstable_bins": 0,
|
| 4 |
+
"reduction_bins": 0,
|
| 5 |
+
"band": {
|
| 6 |
+
"1": {
|
| 7 |
+
"sr": 33075,
|
| 8 |
+
"hl": 384,
|
| 9 |
+
"n_fft": 2048,
|
| 10 |
+
"crop_start": 0,
|
| 11 |
+
"crop_stop": 1024,
|
| 12 |
+
"hpf_start": -1,
|
| 13 |
+
"res_type": "sinc_best"
|
| 14 |
+
}
|
| 15 |
+
},
|
| 16 |
+
"sr": 33075,
|
| 17 |
+
"pre_filter_start": 1000,
|
| 18 |
+
"pre_filter_stop": 1021
|
| 19 |
+
}
|
uvr5_pack/lib_v5/modelparams/1band_sr44100_hl1024.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bins": 1024,
|
| 3 |
+
"unstable_bins": 0,
|
| 4 |
+
"reduction_bins": 0,
|
| 5 |
+
"band": {
|
| 6 |
+
"1": {
|
| 7 |
+
"sr": 44100,
|
| 8 |
+
"hl": 1024,
|
| 9 |
+
"n_fft": 2048,
|
| 10 |
+
"crop_start": 0,
|
| 11 |
+
"crop_stop": 1024,
|
| 12 |
+
"hpf_start": -1,
|
| 13 |
+
"res_type": "sinc_best"
|
| 14 |
+
}
|
| 15 |
+
},
|
| 16 |
+
"sr": 44100,
|
| 17 |
+
"pre_filter_start": 1023,
|
| 18 |
+
"pre_filter_stop": 1024
|
| 19 |
+
}
|
uvr5_pack/lib_v5/modelparams/1band_sr44100_hl256.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bins": 256,
|
| 3 |
+
"unstable_bins": 0,
|
| 4 |
+
"reduction_bins": 0,
|
| 5 |
+
"band": {
|
| 6 |
+
"1": {
|
| 7 |
+
"sr": 44100,
|
| 8 |
+
"hl": 256,
|
| 9 |
+
"n_fft": 512,
|
| 10 |
+
"crop_start": 0,
|
| 11 |
+
"crop_stop": 256,
|
| 12 |
+
"hpf_start": -1,
|
| 13 |
+
"res_type": "sinc_best"
|
| 14 |
+
}
|
| 15 |
+
},
|
| 16 |
+
"sr": 44100,
|
| 17 |
+
"pre_filter_start": 256,
|
| 18 |
+
"pre_filter_stop": 256
|
| 19 |
+
}
|