bodhisativa commited on
Commit
e8a591f
·
verified ·
1 Parent(s): bcb6e04

Mirror lj1995/VoiceConversionWebUI @ b2c8cae96e3b

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. pretrained/G32k.pth +3 -0
  2. pretrained/G40k.pth +3 -0
  3. pretrained/G48k.pth +3 -0
  4. pretrained/f0D48k.pth +3 -0
  5. pretrained/f0G32k.pth +3 -0
  6. pretrained/f0G40k.pth +3 -0
  7. pretrained/f0G48k.pth +3 -0
  8. pretrained_v0/.gitignore +2 -0
  9. pretrained_v0/D32k.pth +3 -0
  10. pretrained_v0/D40k.pth +3 -0
  11. pretrained_v0/D48k.pth +3 -0
  12. pretrained_v0/G32k.pth +3 -0
  13. pretrained_v0/G40k.pth +3 -0
  14. pretrained_v0/G48k.pth +3 -0
  15. pretrained_v0/f0D32k.pth +3 -0
  16. pretrained_v0/f0D40k.pth +3 -0
  17. pretrained_v0/f0D48k.pth +3 -0
  18. pretrained_v0/f0G32k.pth +3 -0
  19. pretrained_v0/f0G40k.pth +3 -0
  20. pretrained_v0/f0G48k.pth +3 -0
  21. pretrained_v2/D32k.pth +3 -0
  22. pretrained_v2/D40k.pth +3 -0
  23. pretrained_v2/D48k.pth +3 -0
  24. pretrained_v2/G32k.pth +3 -0
  25. pretrained_v2/G40k.pth +3 -0
  26. pretrained_v2/G48k.pth +3 -0
  27. pretrained_v2/f0D32k.pth +3 -0
  28. pretrained_v2/f0D40k.pth +3 -0
  29. pretrained_v2/f0D48k.pth +3 -0
  30. pretrained_v2/f0G32k.pth +3 -0
  31. pretrained_v2/f0G40k.pth +3 -0
  32. pretrained_v2/f0G48k.pth +3 -0
  33. uvr5_pack/__pycache__/utils.cpython-39.pyc +0 -0
  34. uvr5_pack/lib_v5/__pycache__/layers_123821KB.cpython-39.pyc +0 -0
  35. uvr5_pack/lib_v5/__pycache__/model_param_init.cpython-39.pyc +0 -0
  36. uvr5_pack/lib_v5/__pycache__/nets_61968KB.cpython-39.pyc +0 -0
  37. uvr5_pack/lib_v5/__pycache__/spec_utils.cpython-39.pyc +0 -0
  38. uvr5_pack/lib_v5/dataset.py +170 -0
  39. uvr5_pack/lib_v5/layers.py +116 -0
  40. uvr5_pack/lib_v5/layers_123812KB .py +116 -0
  41. uvr5_pack/lib_v5/layers_123821KB.py +116 -0
  42. uvr5_pack/lib_v5/layers_33966KB.py +122 -0
  43. uvr5_pack/lib_v5/layers_537227KB.py +122 -0
  44. uvr5_pack/lib_v5/layers_537238KB.py +122 -0
  45. uvr5_pack/lib_v5/model_param_init.py +60 -0
  46. uvr5_pack/lib_v5/modelparams/1band_sr16000_hl512.json +19 -0
  47. uvr5_pack/lib_v5/modelparams/1band_sr32000_hl512.json +19 -0
  48. uvr5_pack/lib_v5/modelparams/1band_sr33075_hl384.json +19 -0
  49. uvr5_pack/lib_v5/modelparams/1band_sr44100_hl1024.json +19 -0
  50. uvr5_pack/lib_v5/modelparams/1band_sr44100_hl256.json +19 -0
pretrained/G32k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81817645cde7ed2e2d83f23ef883f33dda564924b497e84d792743912eca4c23
3
+ size 72653893
pretrained/G40k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e428573bda1124b0ae0ae843fd8dcded6027d3993444790b3e9b0100938b2113
3
+ size 72763063
pretrained/G48k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3862a67ea6313e8ffefc05cee6bee656ef3e089442e9ecf4a6618d60721f3e95
3
+ size 72850501
pretrained/f0D48k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b84c8bf347ad1e539c842e8f2a4c36ecd9e7fb23c16041189e4877e9b07925c
3
+ size 109978943
pretrained/f0G32k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:285f524bf48bb692c76ad7bd0bc654c12bd9e5edeb784dddf7f61a789a608574
3
+ size 72795627
pretrained/f0G40k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9115654aeef1995f7dd3c6fc4140bebbef0ca9760bed798105a2380a34299831
3
+ size 72909665
pretrained/f0G48k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78bc9cab27e34bcfc194f93029374d871d8b3e663ddedea32a9709e894cc8fe8
3
+ size 73008619
pretrained_v0/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *
2
+ !.gitignore
pretrained_v0/D32k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ab20645829460fdad0d3c44254f1ab53c32cae50c22a66c926ae5aa30abda6f
3
+ size 109978943
pretrained_v0/D40k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:547f66dbbcd9023b9051ed244d12ab043ba8a4e854b154cc28761ac7c002909b
3
+ size 109978943
pretrained_v0/D48k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cc013fa60ed9c3f902f5bd99f48c7e3b9352d763d4d3cd6bc241c37b0bfd9ad
3
+ size 109978943
pretrained_v0/G32k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90e0458e39efd50beef48ab398c9f88cec2b405d69565e2ad958a25882936aa1
3
+ size 72653893
pretrained_v0/G40k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1e7e413d5b1009777c39cd232ac4d91a81382161d19350fe15bf3f137b01425
3
+ size 72763063
pretrained_v0/G48k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d4a19433aa20617d0712dce2d379620b2f9b80e21b58d6081ba442f203e26d0
3
+ size 72850501
pretrained_v0/f0D32k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:294db3087236e2c75260d6179056791c9231245daf5d0485545d9e54c4057c77
3
+ size 109978943
pretrained_v0/f0D40k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d4f5a441594b470d67579958b2fd4c6b992852ded28ff9e72eda67abcebe423
3
+ size 109978943
pretrained_v0/f0D48k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b84c8bf347ad1e539c842e8f2a4c36ecd9e7fb23c16041189e4877e9b07925c
3
+ size 109978943
pretrained_v0/f0G32k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68c08e16e47d2796427b518c9b702edeae5c9d11f8d62608215f10c776d6170b
3
+ size 72795627
pretrained_v0/f0G40k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a562c00b615367ed2bd97857346a26ce05b5c231629264f5ca2e19ce5f8d59ea
3
+ size 72909665
pretrained_v0/f0G48k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f36a4b1e1c3009ee72e5e6c572e0418d8585094e94b00ca9bd5c2b6c8a6c404
3
+ size 73008619
pretrained_v2/D32k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8043378cc6619083d385f5a045de09b83fb3bf8de45c433ca863b71723ac3ca
3
+ size 142875703
pretrained_v2/D40k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:471378e894e7191f89a94eda8288c5947b16bbe0b10c3f1f17efdb7a1d998242
3
+ size 142875703
pretrained_v2/D48k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db01094a93c09868a278e03dafe8bb781bfcc1a5ba8df168c948bf9168c84d82
3
+ size 142875703
pretrained_v2/G32k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:869b26a47f75168d6126f64ac39e6de5247017a8658cfd68aca600f7323efb9f
3
+ size 73811639
pretrained_v2/G40k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3843da7fde33db1dab176146c70d6c2df06eafe9457f4e3aa10024e9c6a4b69
3
+ size 72959671
pretrained_v2/G48k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e2b1581a436d07a76b10b9d38765f64aa02836dc65c7dee1ce4140c11ea158b
3
+ size 75318967
pretrained_v2/f0D32k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd7134e7793674c85474d5145d2d982e3c5d8124fc7bb6c20f710ed65808fa8a
3
+ size 142875703
pretrained_v2/f0D40k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b6ab091e70801b28e3f41f335f2fc5f3f35c75b39ae2628d419644ec2b0fa09
3
+ size 142875703
pretrained_v2/f0D48k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2269b73c7a4cf34da09aea99274dabf99b2ddb8a42cbfb065fb3c0aa9a2fc748
3
+ size 142875703
pretrained_v2/f0G32k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2332611297b8d88c7436de8f17ef5f07a2119353e962cd93cda5806d59a1133d
3
+ size 73950049
pretrained_v2/f0G40k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b2c44035e782c4b14ddc0bede9e2f4a724d025cd073f736d4f43708453adfcb
3
+ size 73106273
pretrained_v2/f0G48k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5d51f589cc3632d4eae36a315b4179397695042edc01d15312e1bddc2b764a4
3
+ size 75465569
uvr5_pack/__pycache__/utils.cpython-39.pyc ADDED
Binary file (6.87 kB). View file
 
uvr5_pack/lib_v5/__pycache__/layers_123821KB.cpython-39.pyc ADDED
Binary file (4.14 kB). View file
 
uvr5_pack/lib_v5/__pycache__/model_param_init.cpython-39.pyc ADDED
Binary file (1.63 kB). View file
 
uvr5_pack/lib_v5/__pycache__/nets_61968KB.cpython-39.pyc ADDED
Binary file (3.46 kB). View file
 
uvr5_pack/lib_v5/__pycache__/spec_utils.cpython-39.pyc ADDED
Binary file (13.3 kB). View file
 
uvr5_pack/lib_v5/dataset.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import random
3
+
4
+ import numpy as np
5
+ import torch
6
+ import torch.utils.data
7
+ from tqdm import tqdm
8
+
9
+ from uvr5_pack.lib_v5 import spec_utils
10
+
11
+
12
+ class VocalRemoverValidationSet(torch.utils.data.Dataset):
13
+
14
+ def __init__(self, patch_list):
15
+ self.patch_list = patch_list
16
+
17
+ def __len__(self):
18
+ return len(self.patch_list)
19
+
20
+ def __getitem__(self, idx):
21
+ path = self.patch_list[idx]
22
+ data = np.load(path)
23
+
24
+ X, y = data['X'], data['y']
25
+
26
+ X_mag = np.abs(X)
27
+ y_mag = np.abs(y)
28
+
29
+ return X_mag, y_mag
30
+
31
+
32
+ def make_pair(mix_dir, inst_dir):
33
+ input_exts = ['.wav', '.m4a', '.mp3', '.mp4', '.flac']
34
+
35
+ X_list = sorted([
36
+ os.path.join(mix_dir, fname)
37
+ for fname in os.listdir(mix_dir)
38
+ if os.path.splitext(fname)[1] in input_exts])
39
+ y_list = sorted([
40
+ os.path.join(inst_dir, fname)
41
+ for fname in os.listdir(inst_dir)
42
+ if os.path.splitext(fname)[1] in input_exts])
43
+
44
+ filelist = list(zip(X_list, y_list))
45
+
46
+ return filelist
47
+
48
+
49
+ def train_val_split(dataset_dir, split_mode, val_rate, val_filelist):
50
+ if split_mode == 'random':
51
+ filelist = make_pair(
52
+ os.path.join(dataset_dir, 'mixtures'),
53
+ os.path.join(dataset_dir, 'instruments'))
54
+
55
+ random.shuffle(filelist)
56
+
57
+ if len(val_filelist) == 0:
58
+ val_size = int(len(filelist) * val_rate)
59
+ train_filelist = filelist[:-val_size]
60
+ val_filelist = filelist[-val_size:]
61
+ else:
62
+ train_filelist = [
63
+ pair for pair in filelist
64
+ if list(pair) not in val_filelist]
65
+ elif split_mode == 'subdirs':
66
+ if len(val_filelist) != 0:
67
+ raise ValueError('The `val_filelist` option is not available in `subdirs` mode')
68
+
69
+ train_filelist = make_pair(
70
+ os.path.join(dataset_dir, 'training/mixtures'),
71
+ os.path.join(dataset_dir, 'training/instruments'))
72
+
73
+ val_filelist = make_pair(
74
+ os.path.join(dataset_dir, 'validation/mixtures'),
75
+ os.path.join(dataset_dir, 'validation/instruments'))
76
+
77
+ return train_filelist, val_filelist
78
+
79
+
80
+ def augment(X, y, reduction_rate, reduction_mask, mixup_rate, mixup_alpha):
81
+ perm = np.random.permutation(len(X))
82
+ for i, idx in enumerate(tqdm(perm)):
83
+ if np.random.uniform() < reduction_rate:
84
+ y[idx] = spec_utils.reduce_vocal_aggressively(X[idx], y[idx], reduction_mask)
85
+
86
+ if np.random.uniform() < 0.5:
87
+ # swap channel
88
+ X[idx] = X[idx, ::-1]
89
+ y[idx] = y[idx, ::-1]
90
+ if np.random.uniform() < 0.02:
91
+ # mono
92
+ X[idx] = X[idx].mean(axis=0, keepdims=True)
93
+ y[idx] = y[idx].mean(axis=0, keepdims=True)
94
+ if np.random.uniform() < 0.02:
95
+ # inst
96
+ X[idx] = y[idx]
97
+
98
+ if np.random.uniform() < mixup_rate and i < len(perm) - 1:
99
+ lam = np.random.beta(mixup_alpha, mixup_alpha)
100
+ X[idx] = lam * X[idx] + (1 - lam) * X[perm[i + 1]]
101
+ y[idx] = lam * y[idx] + (1 - lam) * y[perm[i + 1]]
102
+
103
+ return X, y
104
+
105
+
106
+ def make_padding(width, cropsize, offset):
107
+ left = offset
108
+ roi_size = cropsize - left * 2
109
+ if roi_size == 0:
110
+ roi_size = cropsize
111
+ right = roi_size - (width % roi_size) + left
112
+
113
+ return left, right, roi_size
114
+
115
+
116
+ def make_training_set(filelist, cropsize, patches, sr, hop_length, n_fft, offset):
117
+ len_dataset = patches * len(filelist)
118
+
119
+ X_dataset = np.zeros(
120
+ (len_dataset, 2, n_fft // 2 + 1, cropsize), dtype=np.complex64)
121
+ y_dataset = np.zeros(
122
+ (len_dataset, 2, n_fft // 2 + 1, cropsize), dtype=np.complex64)
123
+
124
+ for i, (X_path, y_path) in enumerate(tqdm(filelist)):
125
+ X, y = spec_utils.cache_or_load(X_path, y_path, sr, hop_length, n_fft)
126
+ coef = np.max([np.abs(X).max(), np.abs(y).max()])
127
+ X, y = X / coef, y / coef
128
+
129
+ l, r, roi_size = make_padding(X.shape[2], cropsize, offset)
130
+ X_pad = np.pad(X, ((0, 0), (0, 0), (l, r)), mode='constant')
131
+ y_pad = np.pad(y, ((0, 0), (0, 0), (l, r)), mode='constant')
132
+
133
+ starts = np.random.randint(0, X_pad.shape[2] - cropsize, patches)
134
+ ends = starts + cropsize
135
+ for j in range(patches):
136
+ idx = i * patches + j
137
+ X_dataset[idx] = X_pad[:, :, starts[j]:ends[j]]
138
+ y_dataset[idx] = y_pad[:, :, starts[j]:ends[j]]
139
+
140
+ return X_dataset, y_dataset
141
+
142
+
143
+ def make_validation_set(filelist, cropsize, sr, hop_length, n_fft, offset):
144
+ patch_list = []
145
+ patch_dir = 'cs{}_sr{}_hl{}_nf{}_of{}'.format(cropsize, sr, hop_length, n_fft, offset)
146
+ os.makedirs(patch_dir, exist_ok=True)
147
+
148
+ for i, (X_path, y_path) in enumerate(tqdm(filelist)):
149
+ basename = os.path.splitext(os.path.basename(X_path))[0]
150
+
151
+ X, y = spec_utils.cache_or_load(X_path, y_path, sr, hop_length, n_fft)
152
+ coef = np.max([np.abs(X).max(), np.abs(y).max()])
153
+ X, y = X / coef, y / coef
154
+
155
+ l, r, roi_size = make_padding(X.shape[2], cropsize, offset)
156
+ X_pad = np.pad(X, ((0, 0), (0, 0), (l, r)), mode='constant')
157
+ y_pad = np.pad(y, ((0, 0), (0, 0), (l, r)), mode='constant')
158
+
159
+ len_dataset = int(np.ceil(X.shape[2] / roi_size))
160
+ for j in range(len_dataset):
161
+ outpath = os.path.join(patch_dir, '{}_p{}.npz'.format(basename, j))
162
+ start = j * roi_size
163
+ if not os.path.exists(outpath):
164
+ np.savez(
165
+ outpath,
166
+ X=X_pad[:, :, start:start + cropsize],
167
+ y=y_pad[:, :, start:start + cropsize])
168
+ patch_list.append(outpath)
169
+
170
+ return VocalRemoverValidationSet(patch_list)
uvr5_pack/lib_v5/layers.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ import torch.nn.functional as F
4
+
5
+ from uvr5_pack.lib_v5 import spec_utils
6
+
7
+
8
+ class Conv2DBNActiv(nn.Module):
9
+
10
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
11
+ super(Conv2DBNActiv, self).__init__()
12
+ self.conv = nn.Sequential(
13
+ nn.Conv2d(
14
+ nin, nout,
15
+ kernel_size=ksize,
16
+ stride=stride,
17
+ padding=pad,
18
+ dilation=dilation,
19
+ bias=False),
20
+ nn.BatchNorm2d(nout),
21
+ activ()
22
+ )
23
+
24
+ def __call__(self, x):
25
+ return self.conv(x)
26
+
27
+
28
+ class SeperableConv2DBNActiv(nn.Module):
29
+
30
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
31
+ super(SeperableConv2DBNActiv, self).__init__()
32
+ self.conv = nn.Sequential(
33
+ nn.Conv2d(
34
+ nin, nin,
35
+ kernel_size=ksize,
36
+ stride=stride,
37
+ padding=pad,
38
+ dilation=dilation,
39
+ groups=nin,
40
+ bias=False),
41
+ nn.Conv2d(
42
+ nin, nout,
43
+ kernel_size=1,
44
+ bias=False),
45
+ nn.BatchNorm2d(nout),
46
+ activ()
47
+ )
48
+
49
+ def __call__(self, x):
50
+ return self.conv(x)
51
+
52
+
53
+ class Encoder(nn.Module):
54
+
55
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
56
+ super(Encoder, self).__init__()
57
+ self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
58
+ self.conv2 = Conv2DBNActiv(nout, nout, ksize, stride, pad, activ=activ)
59
+
60
+ def __call__(self, x):
61
+ skip = self.conv1(x)
62
+ h = self.conv2(skip)
63
+
64
+ return h, skip
65
+
66
+
67
+ class Decoder(nn.Module):
68
+
69
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
70
+ super(Decoder, self).__init__()
71
+ self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
72
+ self.dropout = nn.Dropout2d(0.1) if dropout else None
73
+
74
+ def __call__(self, x, skip=None):
75
+ x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
76
+ if skip is not None:
77
+ skip = spec_utils.crop_center(skip, x)
78
+ x = torch.cat([x, skip], dim=1)
79
+ h = self.conv(x)
80
+
81
+ if self.dropout is not None:
82
+ h = self.dropout(h)
83
+
84
+ return h
85
+
86
+
87
+ class ASPPModule(nn.Module):
88
+
89
+ def __init__(self, nin, nout, dilations=(4, 8, 16), activ=nn.ReLU):
90
+ super(ASPPModule, self).__init__()
91
+ self.conv1 = nn.Sequential(
92
+ nn.AdaptiveAvgPool2d((1, None)),
93
+ Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
94
+ )
95
+ self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
96
+ self.conv3 = SeperableConv2DBNActiv(
97
+ nin, nin, 3, 1, dilations[0], dilations[0], activ=activ)
98
+ self.conv4 = SeperableConv2DBNActiv(
99
+ nin, nin, 3, 1, dilations[1], dilations[1], activ=activ)
100
+ self.conv5 = SeperableConv2DBNActiv(
101
+ nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
102
+ self.bottleneck = nn.Sequential(
103
+ Conv2DBNActiv(nin * 5, nout, 1, 1, 0, activ=activ),
104
+ nn.Dropout2d(0.1)
105
+ )
106
+
107
+ def forward(self, x):
108
+ _, _, h, w = x.size()
109
+ feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
110
+ feat2 = self.conv2(x)
111
+ feat3 = self.conv3(x)
112
+ feat4 = self.conv4(x)
113
+ feat5 = self.conv5(x)
114
+ out = torch.cat((feat1, feat2, feat3, feat4, feat5), dim=1)
115
+ bottle = self.bottleneck(out)
116
+ return bottle
uvr5_pack/lib_v5/layers_123812KB .py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ import torch.nn.functional as F
4
+
5
+ from uvr5_pack.lib_v5 import spec_utils
6
+
7
+
8
+ class Conv2DBNActiv(nn.Module):
9
+
10
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
11
+ super(Conv2DBNActiv, self).__init__()
12
+ self.conv = nn.Sequential(
13
+ nn.Conv2d(
14
+ nin, nout,
15
+ kernel_size=ksize,
16
+ stride=stride,
17
+ padding=pad,
18
+ dilation=dilation,
19
+ bias=False),
20
+ nn.BatchNorm2d(nout),
21
+ activ()
22
+ )
23
+
24
+ def __call__(self, x):
25
+ return self.conv(x)
26
+
27
+
28
+ class SeperableConv2DBNActiv(nn.Module):
29
+
30
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
31
+ super(SeperableConv2DBNActiv, self).__init__()
32
+ self.conv = nn.Sequential(
33
+ nn.Conv2d(
34
+ nin, nin,
35
+ kernel_size=ksize,
36
+ stride=stride,
37
+ padding=pad,
38
+ dilation=dilation,
39
+ groups=nin,
40
+ bias=False),
41
+ nn.Conv2d(
42
+ nin, nout,
43
+ kernel_size=1,
44
+ bias=False),
45
+ nn.BatchNorm2d(nout),
46
+ activ()
47
+ )
48
+
49
+ def __call__(self, x):
50
+ return self.conv(x)
51
+
52
+
53
+ class Encoder(nn.Module):
54
+
55
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
56
+ super(Encoder, self).__init__()
57
+ self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
58
+ self.conv2 = Conv2DBNActiv(nout, nout, ksize, stride, pad, activ=activ)
59
+
60
+ def __call__(self, x):
61
+ skip = self.conv1(x)
62
+ h = self.conv2(skip)
63
+
64
+ return h, skip
65
+
66
+
67
+ class Decoder(nn.Module):
68
+
69
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
70
+ super(Decoder, self).__init__()
71
+ self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
72
+ self.dropout = nn.Dropout2d(0.1) if dropout else None
73
+
74
+ def __call__(self, x, skip=None):
75
+ x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
76
+ if skip is not None:
77
+ skip = spec_utils.crop_center(skip, x)
78
+ x = torch.cat([x, skip], dim=1)
79
+ h = self.conv(x)
80
+
81
+ if self.dropout is not None:
82
+ h = self.dropout(h)
83
+
84
+ return h
85
+
86
+
87
+ class ASPPModule(nn.Module):
88
+
89
+ def __init__(self, nin, nout, dilations=(4, 8, 16), activ=nn.ReLU):
90
+ super(ASPPModule, self).__init__()
91
+ self.conv1 = nn.Sequential(
92
+ nn.AdaptiveAvgPool2d((1, None)),
93
+ Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
94
+ )
95
+ self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
96
+ self.conv3 = SeperableConv2DBNActiv(
97
+ nin, nin, 3, 1, dilations[0], dilations[0], activ=activ)
98
+ self.conv4 = SeperableConv2DBNActiv(
99
+ nin, nin, 3, 1, dilations[1], dilations[1], activ=activ)
100
+ self.conv5 = SeperableConv2DBNActiv(
101
+ nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
102
+ self.bottleneck = nn.Sequential(
103
+ Conv2DBNActiv(nin * 5, nout, 1, 1, 0, activ=activ),
104
+ nn.Dropout2d(0.1)
105
+ )
106
+
107
+ def forward(self, x):
108
+ _, _, h, w = x.size()
109
+ feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
110
+ feat2 = self.conv2(x)
111
+ feat3 = self.conv3(x)
112
+ feat4 = self.conv4(x)
113
+ feat5 = self.conv5(x)
114
+ out = torch.cat((feat1, feat2, feat3, feat4, feat5), dim=1)
115
+ bottle = self.bottleneck(out)
116
+ return bottle
uvr5_pack/lib_v5/layers_123821KB.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ import torch.nn.functional as F
4
+
5
+ from uvr5_pack.lib_v5 import spec_utils
6
+
7
+
8
+ class Conv2DBNActiv(nn.Module):
9
+
10
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
11
+ super(Conv2DBNActiv, self).__init__()
12
+ self.conv = nn.Sequential(
13
+ nn.Conv2d(
14
+ nin, nout,
15
+ kernel_size=ksize,
16
+ stride=stride,
17
+ padding=pad,
18
+ dilation=dilation,
19
+ bias=False),
20
+ nn.BatchNorm2d(nout),
21
+ activ()
22
+ )
23
+
24
+ def __call__(self, x):
25
+ return self.conv(x)
26
+
27
+
28
+ class SeperableConv2DBNActiv(nn.Module):
29
+
30
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
31
+ super(SeperableConv2DBNActiv, self).__init__()
32
+ self.conv = nn.Sequential(
33
+ nn.Conv2d(
34
+ nin, nin,
35
+ kernel_size=ksize,
36
+ stride=stride,
37
+ padding=pad,
38
+ dilation=dilation,
39
+ groups=nin,
40
+ bias=False),
41
+ nn.Conv2d(
42
+ nin, nout,
43
+ kernel_size=1,
44
+ bias=False),
45
+ nn.BatchNorm2d(nout),
46
+ activ()
47
+ )
48
+
49
+ def __call__(self, x):
50
+ return self.conv(x)
51
+
52
+
53
+ class Encoder(nn.Module):
54
+
55
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
56
+ super(Encoder, self).__init__()
57
+ self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
58
+ self.conv2 = Conv2DBNActiv(nout, nout, ksize, stride, pad, activ=activ)
59
+
60
+ def __call__(self, x):
61
+ skip = self.conv1(x)
62
+ h = self.conv2(skip)
63
+
64
+ return h, skip
65
+
66
+
67
+ class Decoder(nn.Module):
68
+
69
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
70
+ super(Decoder, self).__init__()
71
+ self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
72
+ self.dropout = nn.Dropout2d(0.1) if dropout else None
73
+
74
+ def __call__(self, x, skip=None):
75
+ x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
76
+ if skip is not None:
77
+ skip = spec_utils.crop_center(skip, x)
78
+ x = torch.cat([x, skip], dim=1)
79
+ h = self.conv(x)
80
+
81
+ if self.dropout is not None:
82
+ h = self.dropout(h)
83
+
84
+ return h
85
+
86
+
87
+ class ASPPModule(nn.Module):
88
+
89
+ def __init__(self, nin, nout, dilations=(4, 8, 16), activ=nn.ReLU):
90
+ super(ASPPModule, self).__init__()
91
+ self.conv1 = nn.Sequential(
92
+ nn.AdaptiveAvgPool2d((1, None)),
93
+ Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
94
+ )
95
+ self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
96
+ self.conv3 = SeperableConv2DBNActiv(
97
+ nin, nin, 3, 1, dilations[0], dilations[0], activ=activ)
98
+ self.conv4 = SeperableConv2DBNActiv(
99
+ nin, nin, 3, 1, dilations[1], dilations[1], activ=activ)
100
+ self.conv5 = SeperableConv2DBNActiv(
101
+ nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
102
+ self.bottleneck = nn.Sequential(
103
+ Conv2DBNActiv(nin * 5, nout, 1, 1, 0, activ=activ),
104
+ nn.Dropout2d(0.1)
105
+ )
106
+
107
+ def forward(self, x):
108
+ _, _, h, w = x.size()
109
+ feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
110
+ feat2 = self.conv2(x)
111
+ feat3 = self.conv3(x)
112
+ feat4 = self.conv4(x)
113
+ feat5 = self.conv5(x)
114
+ out = torch.cat((feat1, feat2, feat3, feat4, feat5), dim=1)
115
+ bottle = self.bottleneck(out)
116
+ return bottle
uvr5_pack/lib_v5/layers_33966KB.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ import torch.nn.functional as F
4
+
5
+ from uvr5_pack.lib_v5 import spec_utils
6
+
7
+
8
+ class Conv2DBNActiv(nn.Module):
9
+
10
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
11
+ super(Conv2DBNActiv, self).__init__()
12
+ self.conv = nn.Sequential(
13
+ nn.Conv2d(
14
+ nin, nout,
15
+ kernel_size=ksize,
16
+ stride=stride,
17
+ padding=pad,
18
+ dilation=dilation,
19
+ bias=False),
20
+ nn.BatchNorm2d(nout),
21
+ activ()
22
+ )
23
+
24
+ def __call__(self, x):
25
+ return self.conv(x)
26
+
27
+
28
+ class SeperableConv2DBNActiv(nn.Module):
29
+
30
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
31
+ super(SeperableConv2DBNActiv, self).__init__()
32
+ self.conv = nn.Sequential(
33
+ nn.Conv2d(
34
+ nin, nin,
35
+ kernel_size=ksize,
36
+ stride=stride,
37
+ padding=pad,
38
+ dilation=dilation,
39
+ groups=nin,
40
+ bias=False),
41
+ nn.Conv2d(
42
+ nin, nout,
43
+ kernel_size=1,
44
+ bias=False),
45
+ nn.BatchNorm2d(nout),
46
+ activ()
47
+ )
48
+
49
+ def __call__(self, x):
50
+ return self.conv(x)
51
+
52
+
53
+ class Encoder(nn.Module):
54
+
55
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
56
+ super(Encoder, self).__init__()
57
+ self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
58
+ self.conv2 = Conv2DBNActiv(nout, nout, ksize, stride, pad, activ=activ)
59
+
60
+ def __call__(self, x):
61
+ skip = self.conv1(x)
62
+ h = self.conv2(skip)
63
+
64
+ return h, skip
65
+
66
+
67
+ class Decoder(nn.Module):
68
+
69
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
70
+ super(Decoder, self).__init__()
71
+ self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
72
+ self.dropout = nn.Dropout2d(0.1) if dropout else None
73
+
74
+ def __call__(self, x, skip=None):
75
+ x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
76
+ if skip is not None:
77
+ skip = spec_utils.crop_center(skip, x)
78
+ x = torch.cat([x, skip], dim=1)
79
+ h = self.conv(x)
80
+
81
+ if self.dropout is not None:
82
+ h = self.dropout(h)
83
+
84
+ return h
85
+
86
+
87
+ class ASPPModule(nn.Module):
88
+
89
+ def __init__(self, nin, nout, dilations=(4, 8, 16, 32, 64), activ=nn.ReLU):
90
+ super(ASPPModule, self).__init__()
91
+ self.conv1 = nn.Sequential(
92
+ nn.AdaptiveAvgPool2d((1, None)),
93
+ Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
94
+ )
95
+ self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
96
+ self.conv3 = SeperableConv2DBNActiv(
97
+ nin, nin, 3, 1, dilations[0], dilations[0], activ=activ)
98
+ self.conv4 = SeperableConv2DBNActiv(
99
+ nin, nin, 3, 1, dilations[1], dilations[1], activ=activ)
100
+ self.conv5 = SeperableConv2DBNActiv(
101
+ nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
102
+ self.conv6 = SeperableConv2DBNActiv(
103
+ nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
104
+ self.conv7 = SeperableConv2DBNActiv(
105
+ nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
106
+ self.bottleneck = nn.Sequential(
107
+ Conv2DBNActiv(nin * 7, nout, 1, 1, 0, activ=activ),
108
+ nn.Dropout2d(0.1)
109
+ )
110
+
111
+ def forward(self, x):
112
+ _, _, h, w = x.size()
113
+ feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
114
+ feat2 = self.conv2(x)
115
+ feat3 = self.conv3(x)
116
+ feat4 = self.conv4(x)
117
+ feat5 = self.conv5(x)
118
+ feat6 = self.conv6(x)
119
+ feat7 = self.conv7(x)
120
+ out = torch.cat((feat1, feat2, feat3, feat4, feat5, feat6, feat7), dim=1)
121
+ bottle = self.bottleneck(out)
122
+ return bottle
uvr5_pack/lib_v5/layers_537227KB.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ import torch.nn.functional as F
4
+
5
+ from uvr5_pack.lib_v5 import spec_utils
6
+
7
+
8
+ class Conv2DBNActiv(nn.Module):
9
+
10
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
11
+ super(Conv2DBNActiv, self).__init__()
12
+ self.conv = nn.Sequential(
13
+ nn.Conv2d(
14
+ nin, nout,
15
+ kernel_size=ksize,
16
+ stride=stride,
17
+ padding=pad,
18
+ dilation=dilation,
19
+ bias=False),
20
+ nn.BatchNorm2d(nout),
21
+ activ()
22
+ )
23
+
24
+ def __call__(self, x):
25
+ return self.conv(x)
26
+
27
+
28
+ class SeperableConv2DBNActiv(nn.Module):
29
+
30
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
31
+ super(SeperableConv2DBNActiv, self).__init__()
32
+ self.conv = nn.Sequential(
33
+ nn.Conv2d(
34
+ nin, nin,
35
+ kernel_size=ksize,
36
+ stride=stride,
37
+ padding=pad,
38
+ dilation=dilation,
39
+ groups=nin,
40
+ bias=False),
41
+ nn.Conv2d(
42
+ nin, nout,
43
+ kernel_size=1,
44
+ bias=False),
45
+ nn.BatchNorm2d(nout),
46
+ activ()
47
+ )
48
+
49
+ def __call__(self, x):
50
+ return self.conv(x)
51
+
52
+
53
+ class Encoder(nn.Module):
54
+
55
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
56
+ super(Encoder, self).__init__()
57
+ self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
58
+ self.conv2 = Conv2DBNActiv(nout, nout, ksize, stride, pad, activ=activ)
59
+
60
+ def __call__(self, x):
61
+ skip = self.conv1(x)
62
+ h = self.conv2(skip)
63
+
64
+ return h, skip
65
+
66
+
67
+ class Decoder(nn.Module):
68
+
69
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
70
+ super(Decoder, self).__init__()
71
+ self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
72
+ self.dropout = nn.Dropout2d(0.1) if dropout else None
73
+
74
+ def __call__(self, x, skip=None):
75
+ x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
76
+ if skip is not None:
77
+ skip = spec_utils.crop_center(skip, x)
78
+ x = torch.cat([x, skip], dim=1)
79
+ h = self.conv(x)
80
+
81
+ if self.dropout is not None:
82
+ h = self.dropout(h)
83
+
84
+ return h
85
+
86
+
87
+ class ASPPModule(nn.Module):
88
+
89
+ def __init__(self, nin, nout, dilations=(4, 8, 16, 32, 64), activ=nn.ReLU):
90
+ super(ASPPModule, self).__init__()
91
+ self.conv1 = nn.Sequential(
92
+ nn.AdaptiveAvgPool2d((1, None)),
93
+ Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
94
+ )
95
+ self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
96
+ self.conv3 = SeperableConv2DBNActiv(
97
+ nin, nin, 3, 1, dilations[0], dilations[0], activ=activ)
98
+ self.conv4 = SeperableConv2DBNActiv(
99
+ nin, nin, 3, 1, dilations[1], dilations[1], activ=activ)
100
+ self.conv5 = SeperableConv2DBNActiv(
101
+ nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
102
+ self.conv6 = SeperableConv2DBNActiv(
103
+ nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
104
+ self.conv7 = SeperableConv2DBNActiv(
105
+ nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
106
+ self.bottleneck = nn.Sequential(
107
+ Conv2DBNActiv(nin * 7, nout, 1, 1, 0, activ=activ),
108
+ nn.Dropout2d(0.1)
109
+ )
110
+
111
+ def forward(self, x):
112
+ _, _, h, w = x.size()
113
+ feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
114
+ feat2 = self.conv2(x)
115
+ feat3 = self.conv3(x)
116
+ feat4 = self.conv4(x)
117
+ feat5 = self.conv5(x)
118
+ feat6 = self.conv6(x)
119
+ feat7 = self.conv7(x)
120
+ out = torch.cat((feat1, feat2, feat3, feat4, feat5, feat6, feat7), dim=1)
121
+ bottle = self.bottleneck(out)
122
+ return bottle
uvr5_pack/lib_v5/layers_537238KB.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ import torch.nn.functional as F
4
+
5
+ from uvr5_pack.lib_v5 import spec_utils
6
+
7
+
8
+ class Conv2DBNActiv(nn.Module):
9
+
10
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
11
+ super(Conv2DBNActiv, self).__init__()
12
+ self.conv = nn.Sequential(
13
+ nn.Conv2d(
14
+ nin, nout,
15
+ kernel_size=ksize,
16
+ stride=stride,
17
+ padding=pad,
18
+ dilation=dilation,
19
+ bias=False),
20
+ nn.BatchNorm2d(nout),
21
+ activ()
22
+ )
23
+
24
+ def __call__(self, x):
25
+ return self.conv(x)
26
+
27
+
28
+ class SeperableConv2DBNActiv(nn.Module):
29
+
30
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
31
+ super(SeperableConv2DBNActiv, self).__init__()
32
+ self.conv = nn.Sequential(
33
+ nn.Conv2d(
34
+ nin, nin,
35
+ kernel_size=ksize,
36
+ stride=stride,
37
+ padding=pad,
38
+ dilation=dilation,
39
+ groups=nin,
40
+ bias=False),
41
+ nn.Conv2d(
42
+ nin, nout,
43
+ kernel_size=1,
44
+ bias=False),
45
+ nn.BatchNorm2d(nout),
46
+ activ()
47
+ )
48
+
49
+ def __call__(self, x):
50
+ return self.conv(x)
51
+
52
+
53
+ class Encoder(nn.Module):
54
+
55
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
56
+ super(Encoder, self).__init__()
57
+ self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
58
+ self.conv2 = Conv2DBNActiv(nout, nout, ksize, stride, pad, activ=activ)
59
+
60
+ def __call__(self, x):
61
+ skip = self.conv1(x)
62
+ h = self.conv2(skip)
63
+
64
+ return h, skip
65
+
66
+
67
+ class Decoder(nn.Module):
68
+
69
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
70
+ super(Decoder, self).__init__()
71
+ self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
72
+ self.dropout = nn.Dropout2d(0.1) if dropout else None
73
+
74
+ def __call__(self, x, skip=None):
75
+ x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
76
+ if skip is not None:
77
+ skip = spec_utils.crop_center(skip, x)
78
+ x = torch.cat([x, skip], dim=1)
79
+ h = self.conv(x)
80
+
81
+ if self.dropout is not None:
82
+ h = self.dropout(h)
83
+
84
+ return h
85
+
86
+
87
+ class ASPPModule(nn.Module):
88
+
89
+ def __init__(self, nin, nout, dilations=(4, 8, 16, 32, 64), activ=nn.ReLU):
90
+ super(ASPPModule, self).__init__()
91
+ self.conv1 = nn.Sequential(
92
+ nn.AdaptiveAvgPool2d((1, None)),
93
+ Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
94
+ )
95
+ self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
96
+ self.conv3 = SeperableConv2DBNActiv(
97
+ nin, nin, 3, 1, dilations[0], dilations[0], activ=activ)
98
+ self.conv4 = SeperableConv2DBNActiv(
99
+ nin, nin, 3, 1, dilations[1], dilations[1], activ=activ)
100
+ self.conv5 = SeperableConv2DBNActiv(
101
+ nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
102
+ self.conv6 = SeperableConv2DBNActiv(
103
+ nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
104
+ self.conv7 = SeperableConv2DBNActiv(
105
+ nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
106
+ self.bottleneck = nn.Sequential(
107
+ Conv2DBNActiv(nin * 7, nout, 1, 1, 0, activ=activ),
108
+ nn.Dropout2d(0.1)
109
+ )
110
+
111
+ def forward(self, x):
112
+ _, _, h, w = x.size()
113
+ feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
114
+ feat2 = self.conv2(x)
115
+ feat3 = self.conv3(x)
116
+ feat4 = self.conv4(x)
117
+ feat5 = self.conv5(x)
118
+ feat6 = self.conv6(x)
119
+ feat7 = self.conv7(x)
120
+ out = torch.cat((feat1, feat2, feat3, feat4, feat5, feat6, feat7), dim=1)
121
+ bottle = self.bottleneck(out)
122
+ return bottle
uvr5_pack/lib_v5/model_param_init.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import pathlib
4
+
5
+ default_param = {}
6
+ default_param['bins'] = 768
7
+ default_param['unstable_bins'] = 9 # training only
8
+ default_param['reduction_bins'] = 762 # training only
9
+ default_param['sr'] = 44100
10
+ default_param['pre_filter_start'] = 757
11
+ default_param['pre_filter_stop'] = 768
12
+ default_param['band'] = {}
13
+
14
+
15
+ default_param['band'][1] = {
16
+ 'sr': 11025,
17
+ 'hl': 128,
18
+ 'n_fft': 960,
19
+ 'crop_start': 0,
20
+ 'crop_stop': 245,
21
+ 'lpf_start': 61, # inference only
22
+ 'res_type': 'polyphase'
23
+ }
24
+
25
+ default_param['band'][2] = {
26
+ 'sr': 44100,
27
+ 'hl': 512,
28
+ 'n_fft': 1536,
29
+ 'crop_start': 24,
30
+ 'crop_stop': 547,
31
+ 'hpf_start': 81, # inference only
32
+ 'res_type': 'sinc_best'
33
+ }
34
+
35
+
36
+ def int_keys(d):
37
+ r = {}
38
+ for k, v in d:
39
+ if k.isdigit():
40
+ k = int(k)
41
+ r[k] = v
42
+ return r
43
+
44
+
45
+ class ModelParameters(object):
46
+ def __init__(self, config_path=''):
47
+ if '.pth' == pathlib.Path(config_path).suffix:
48
+ import zipfile
49
+
50
+ with zipfile.ZipFile(config_path, 'r') as zip:
51
+ self.param = json.loads(zip.read('param.json'), object_pairs_hook=int_keys)
52
+ elif '.json' == pathlib.Path(config_path).suffix:
53
+ with open(config_path, 'r') as f:
54
+ self.param = json.loads(f.read(), object_pairs_hook=int_keys)
55
+ else:
56
+ self.param = default_param
57
+
58
+ for k in ['mid_side', 'mid_side_b', 'mid_side_b2', 'stereo_w', 'stereo_n', 'reverse']:
59
+ if not k in self.param:
60
+ self.param[k] = False
uvr5_pack/lib_v5/modelparams/1band_sr16000_hl512.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bins": 1024,
3
+ "unstable_bins": 0,
4
+ "reduction_bins": 0,
5
+ "band": {
6
+ "1": {
7
+ "sr": 16000,
8
+ "hl": 512,
9
+ "n_fft": 2048,
10
+ "crop_start": 0,
11
+ "crop_stop": 1024,
12
+ "hpf_start": -1,
13
+ "res_type": "sinc_best"
14
+ }
15
+ },
16
+ "sr": 16000,
17
+ "pre_filter_start": 1023,
18
+ "pre_filter_stop": 1024
19
+ }
uvr5_pack/lib_v5/modelparams/1band_sr32000_hl512.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bins": 1024,
3
+ "unstable_bins": 0,
4
+ "reduction_bins": 0,
5
+ "band": {
6
+ "1": {
7
+ "sr": 32000,
8
+ "hl": 512,
9
+ "n_fft": 2048,
10
+ "crop_start": 0,
11
+ "crop_stop": 1024,
12
+ "hpf_start": -1,
13
+ "res_type": "kaiser_fast"
14
+ }
15
+ },
16
+ "sr": 32000,
17
+ "pre_filter_start": 1000,
18
+ "pre_filter_stop": 1021
19
+ }
uvr5_pack/lib_v5/modelparams/1band_sr33075_hl384.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bins": 1024,
3
+ "unstable_bins": 0,
4
+ "reduction_bins": 0,
5
+ "band": {
6
+ "1": {
7
+ "sr": 33075,
8
+ "hl": 384,
9
+ "n_fft": 2048,
10
+ "crop_start": 0,
11
+ "crop_stop": 1024,
12
+ "hpf_start": -1,
13
+ "res_type": "sinc_best"
14
+ }
15
+ },
16
+ "sr": 33075,
17
+ "pre_filter_start": 1000,
18
+ "pre_filter_stop": 1021
19
+ }
uvr5_pack/lib_v5/modelparams/1band_sr44100_hl1024.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bins": 1024,
3
+ "unstable_bins": 0,
4
+ "reduction_bins": 0,
5
+ "band": {
6
+ "1": {
7
+ "sr": 44100,
8
+ "hl": 1024,
9
+ "n_fft": 2048,
10
+ "crop_start": 0,
11
+ "crop_stop": 1024,
12
+ "hpf_start": -1,
13
+ "res_type": "sinc_best"
14
+ }
15
+ },
16
+ "sr": 44100,
17
+ "pre_filter_start": 1023,
18
+ "pre_filter_stop": 1024
19
+ }
uvr5_pack/lib_v5/modelparams/1band_sr44100_hl256.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bins": 256,
3
+ "unstable_bins": 0,
4
+ "reduction_bins": 0,
5
+ "band": {
6
+ "1": {
7
+ "sr": 44100,
8
+ "hl": 256,
9
+ "n_fft": 512,
10
+ "crop_start": 0,
11
+ "crop_stop": 256,
12
+ "hpf_start": -1,
13
+ "res_type": "sinc_best"
14
+ }
15
+ },
16
+ "sr": 44100,
17
+ "pre_filter_start": 256,
18
+ "pre_filter_stop": 256
19
+ }