rbustos89 commited on
Commit
92f1429
·
verified ·
1 Parent(s): 884477e

Upload 3 files

Browse files
config_scnet_choirsep.yaml ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 131072 # 44100 * 11
3
+ num_channels: 2
4
+ sample_rate: 44100
5
+ min_mean_abs: 0.000
6
+
7
+ model:
8
+ sources:
9
+ - alto
10
+ - bass
11
+ - soprano
12
+ - tenor
13
+ audio_channels: 2
14
+ dims:
15
+ - 4
16
+ - 32
17
+ - 64
18
+ - 128
19
+ nfft: 4096
20
+ hop_size: 1024
21
+ win_size: 4096
22
+ normalized: True
23
+ band_SR:
24
+ - 0.175
25
+ - 0.392
26
+ - 0.433
27
+ band_stride:
28
+ - 1
29
+ - 4
30
+ - 16
31
+ band_kernel:
32
+ - 3
33
+ - 4
34
+ - 16
35
+ conv_depths:
36
+ - 3
37
+ - 2
38
+ - 1
39
+ compress: 4
40
+ conv_kernel: 3
41
+ num_dplayer: 6
42
+ expand: 1
43
+
44
+ training:
45
+ batch_size: 9
46
+ gradient_accumulation_steps: 1
47
+ grad_clip: 0
48
+ instruments:
49
+ - alto
50
+ - bass
51
+ - soprano
52
+ - tenor
53
+ lr: 5.0e-4
54
+ patience: 6
55
+ reduce_factor: 0.95
56
+ target_instrument: null
57
+ num_epochs: 1000
58
+ num_steps: 1000
59
+ q: 0.95
60
+ coarse_loss_clip: true
61
+ ema_momentum: 0.999
62
+ optimizer: adamw8bit
63
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
64
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
65
+
66
+ loss_multistft:
67
+ fft_sizes:
68
+ - 1024
69
+ - 2048
70
+ - 4096
71
+ hop_sizes:
72
+ - 512
73
+ - 1024
74
+ - 2048
75
+ win_lengths:
76
+ - 1024
77
+ - 2048
78
+ - 4096
79
+ window: "hann_window"
80
+ scale: "mel"
81
+ n_bins: 128
82
+ sample_rate: 44100
83
+ perceptual_weighting: true
84
+ w_sc: 1.0
85
+ w_log_mag: 1.0
86
+ w_lin_mag: 0.0
87
+ w_phs: 0.0
88
+ mag_distance: "L1"
89
+
90
+ augmentations:
91
+ enable: false # enable or disable all augmentations (to fast disable if needed)
92
+ loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
93
+ loudness_min: 0.5
94
+ loudness_max: 1.5
95
+ mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
96
+ mixup_probs:
97
+ !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
98
+ - 0.2
99
+ - 0.02
100
+ mixup_loudness_min: 0.5
101
+ mixup_loudness_max: 1.5
102
+
103
+ inference:
104
+ batch_size: 16
105
+ dim_t: 256
106
+ num_overlap: 1
107
+ normalize: false
model_scnet_ep_36_sdr_5.4596.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e104ce6f6733542f8356ac2e59f2b7ece49ebfd2713d1ff635806de5ceb483f
3
+ size 42457314
model_scnet_ep_42_sdr_5.2559.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14a668939243ab1551d2643820d4ff1a2b8f3c4ac1b9642718d5673405ea2f8b
3
+ size 42457314