Politrees commited on
Commit
fe62612
·
verified ·
1 Parent(s): 848ebc6
Bandit_models/checkpoint-multi_fixed.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:20bcd513dc7eb0541dd045909a4e7dff8dab474cc2efba4904101c76524aee85
3
- size 149133378
 
 
 
 
Bandit_models/config_dnr_bandit_bsrnn_multi_mus64.yaml DELETED
@@ -1,78 +0,0 @@
1
- name: "MultiMaskMultiSourceBandSplitRNN"
2
- audio:
3
- chunk_size: 264600
4
- num_channels: 2
5
- sample_rate: 44100
6
- min_mean_abs: 0.001
7
-
8
- model:
9
- in_channel: 1
10
- stems: ['speech', 'music', 'effects']
11
- band_specs: "musical"
12
- n_bands: 64
13
- fs: 44100
14
- require_no_overlap: false
15
- require_no_gap: true
16
- normalize_channel_independently: false
17
- treat_channel_as_feature: true
18
- n_sqm_modules: 8
19
- emb_dim: 128
20
- rnn_dim: 256
21
- bidirectional: true
22
- rnn_type: "GRU"
23
- mlp_dim: 512
24
- hidden_activation: "Tanh"
25
- hidden_activation_kwargs: null
26
- complex_mask: true
27
- n_fft: 2048
28
- win_length: 2048
29
- hop_length: 512
30
- window_fn: "hann_window"
31
- wkwargs: null
32
- power: null
33
- center: true
34
- normalized: true
35
- pad_mode: "constant"
36
- onesided: true
37
-
38
- training:
39
- batch_size: 4
40
- gradient_accumulation_steps: 4
41
- grad_clip: 0
42
- instruments:
43
- - Speech
44
- - Music
45
- - Effects
46
- lr: 9.0e-05
47
- patience: 2
48
- reduce_factor: 0.95
49
- target_instrument: null
50
- num_epochs: 1000
51
- num_steps: 1000
52
- q: 0.95
53
- coarse_loss_clip: true
54
- ema_momentum: 0.999
55
- optimizer: adam
56
- other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
57
- use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
58
-
59
- augmentations:
60
- enable: true # enable or disable all augmentations (to fast disable if needed)
61
- loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
62
- loudness_min: 0.5
63
- loudness_max: 1.5
64
- mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
65
- mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
66
- - 0.2
67
- - 0.02
68
- mixup_loudness_min: 0.5
69
- mixup_loudness_max: 1.5
70
- all:
71
- channel_shuffle: 0.5 # Set 0 or lower to disable
72
- random_inverse: 0.1 # inverse track (better lower probability)
73
- random_polarity: 0.5 # polarity change (multiply waveform to -1)
74
-
75
- inference:
76
- batch_size: 1
77
- dim_t: 256
78
- num_overlap: 4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Bandit_models/config_dnr_bandit_v2_mus64.yaml DELETED
@@ -1,78 +0,0 @@
1
- cls: Bandit
2
-
3
- audio:
4
- chunk_size: 384000
5
- num_channels: 2
6
- sample_rate: 48000
7
- min_mean_abs: 0.000
8
-
9
- kwargs:
10
- in_channels: 1
11
- stems: ['speech', 'music', 'sfx']
12
- band_type: musical
13
- n_bands: 64
14
- normalize_channel_independently: false
15
- treat_channel_as_feature: true
16
- n_sqm_modules: 8
17
- emb_dim: 128
18
- rnn_dim: 256
19
- bidirectional: true
20
- rnn_type: "GRU"
21
- mlp_dim: 512
22
- hidden_activation: "Tanh"
23
- hidden_activation_kwargs:
24
- complex_mask: true
25
- use_freq_weights: true
26
- n_fft: 2048
27
- win_length: 2048
28
- hop_length: 512
29
- window_fn: "hann_window"
30
- wkwargs:
31
- power:
32
- center: true
33
- normalized: true
34
- pad_mode: "reflect"
35
- onesided: true
36
-
37
- training:
38
- batch_size: 4
39
- gradient_accumulation_steps: 4
40
- grad_clip: 0
41
- instruments:
42
- - Speech
43
- - Music
44
- - Sfx
45
- lr: 9.0e-05
46
- patience: 2
47
- reduce_factor: 0.95
48
- target_instrument:
49
- num_epochs: 1000
50
- num_steps: 1000
51
- q: 0.95
52
- coarse_loss_clip: true
53
- ema_momentum: 0.999
54
- optimizer: adam
55
- other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
56
- use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
57
-
58
- augmentations:
59
- enable: true # enable or disable all augmentations (to fast disable if needed)
60
- loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
61
- loudness_min: 0.5
62
- loudness_max: 1.5
63
- mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
64
- mixup_probs: !!python/tuple
65
- # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
66
- - 0.2
67
- - 0.02
68
- mixup_loudness_min: 0.5
69
- mixup_loudness_max: 1.5
70
- all:
71
- channel_shuffle: 0.5 # Set 0 or lower to disable
72
- random_inverse: 0.1 # inverse track (better lower probability)
73
- random_polarity: 0.5 # polarity change (multiply waveform to -1)
74
-
75
- inference:
76
- batch_size: 8
77
- dim_t: 256
78
- num_overlap: 4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Bandit_models/model_bandit_plus_dnr_sdr_11.47.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c48284779f7d1258a6527d3aaa18a532d45c1f506e2dcc25d5ab179a8c5e2573
3
- size 148891175