Tangible-Music-Lab commited on
Commit
945db8c
·
verified ·
1 Parent(s): 70d3328

Upload 8 files

Browse files
checkpoints/freesoundloop10k_default_b2048_r48000/config.gin ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __gin__ import dynamic_registration
2
+ import cached_conv as cc
3
+ from cached_conv import convs
4
+ import rave
5
+ from rave import blocks
6
+ from rave import core
7
+ from rave import dataset
8
+ from rave import descript_discriminator
9
+ from rave import discriminator
10
+ from rave import model
11
+ from rave import pqmf
12
+ import torch
13
+ import torch.nn as nn
14
+
15
+ # Macros:
16
+ # ==============================================================================
17
+ ACTIVATION = @blocks.Snake
18
+ CAPACITY = 96
19
+ DILATIONS = [[1, 3, 9], [1, 3, 9], [1, 3, 9], [1, 3]]
20
+ KERNEL_SIZE = 3
21
+ LATENT_SIZE = 128
22
+ N_BAND = 16
23
+ NOISE_AUGMENTATION = 0
24
+ PHASE_1_DURATION = 0
25
+ RATIOS = [4, 4, 4, 2]
26
+ SAMPLING_RATE = 48000
27
+
28
+ # Parameters for blocks.AdaptiveInstanceNormalization:
29
+ # ==============================================================================
30
+ # None.
31
+
32
+ # Parameters for variational/blocks.AdaptiveInstanceNormalization:
33
+ # ==============================================================================
34
+ # None.
35
+
36
+ # Parameters for core.AudioDistanceV1:
37
+ # ==============================================================================
38
+ core.AudioDistanceV1.log_epsilon = 1e-07
39
+ core.AudioDistanceV1.multiscale_stft = @core.MultiScaleSTFT
40
+
41
+ # Parameters for model.BetaWarmupCallback:
42
+ # ==============================================================================
43
+ model.BetaWarmupCallback.initial_value = 1e-06
44
+ model.BetaWarmupCallback.log = True
45
+ model.BetaWarmupCallback.target_value = 0.05
46
+ model.BetaWarmupCallback.warmup_len = 20000
47
+
48
+ # Parameters for pqmf.CachedPQMF:
49
+ # ==============================================================================
50
+ pqmf.CachedPQMF.attenuation = 100
51
+ pqmf.CachedPQMF.n_band = %N_BAND
52
+
53
+ # Parameters for cc.Conv1d:
54
+ # ==============================================================================
55
+ cc.Conv1d.bias = False
56
+
57
+ # Parameters for variational/cc.Conv1d:
58
+ # ==============================================================================
59
+ variational/cc.Conv1d.bias = False
60
+
61
+ # Parameters for cc.ConvTranspose1d:
62
+ # ==============================================================================
63
+ cc.ConvTranspose1d.bias = False
64
+
65
+ # Parameters for descript_discriminator.DescriptDiscriminator:
66
+ # ==============================================================================
67
+ descript_discriminator.DescriptDiscriminator.bands = \
68
+ [(0.0, 0.1), (0.1, 0.25), (0.25, 0.5), (0.5, 0.75), (0.75, 1.0)]
69
+ descript_discriminator.DescriptDiscriminator.fft_sizes = [2048, 1024, 512]
70
+ descript_discriminator.DescriptDiscriminator.periods = [2, 3, 5, 7, 11]
71
+ descript_discriminator.DescriptDiscriminator.rates = []
72
+ descript_discriminator.DescriptDiscriminator.sample_rate = 44100
73
+
74
+ # Parameters for variational/blocks.EncoderV2:
75
+ # ==============================================================================
76
+ variational/blocks.EncoderV2.activation = %ACTIVATION
77
+ variational/blocks.EncoderV2.adain = @blocks.AdaptiveInstanceNormalization
78
+ variational/blocks.EncoderV2.capacity = %CAPACITY
79
+ variational/blocks.EncoderV2.data_size = %N_BAND
80
+ variational/blocks.EncoderV2.dilations = %DILATIONS
81
+ variational/blocks.EncoderV2.group_resample = False
82
+ variational/blocks.EncoderV2.group_size = 65536
83
+ variational/blocks.EncoderV2.keep_dim = False
84
+ variational/blocks.EncoderV2.kernel_size = %KERNEL_SIZE
85
+ variational/blocks.EncoderV2.latent_size = %LATENT_SIZE
86
+ variational/blocks.EncoderV2.n_out = 2
87
+ variational/blocks.EncoderV2.ratios = %RATIOS
88
+ variational/blocks.EncoderV2.recurrent_layer = None
89
+ variational/blocks.EncoderV2.spectrogram = None
90
+
91
+ # Parameters for blocks.GeneratorV2:
92
+ # ==============================================================================
93
+ blocks.GeneratorV2.activation = %ACTIVATION
94
+ blocks.GeneratorV2.adain = @blocks.AdaptiveInstanceNormalization
95
+ blocks.GeneratorV2.amplitude_modulation = True
96
+ blocks.GeneratorV2.capacity = %CAPACITY
97
+ blocks.GeneratorV2.causal_convtranspose = True
98
+ blocks.GeneratorV2.data_size = %N_BAND
99
+ blocks.GeneratorV2.dilations = %DILATIONS
100
+ blocks.GeneratorV2.group_resample = False
101
+ blocks.GeneratorV2.group_size = 65536
102
+ blocks.GeneratorV2.keep_dim = False
103
+ blocks.GeneratorV2.kernel_size = %KERNEL_SIZE
104
+ blocks.GeneratorV2.latent_size = @core.get_augmented_latent_size()
105
+ blocks.GeneratorV2.noise_module = @blocks.NoiseGeneratorV2
106
+ blocks.GeneratorV2.ratios = %RATIOS
107
+ blocks.GeneratorV2.recurrent_layer = None
108
+
109
+ # Parameters for core.get_augmented_latent_size:
110
+ # ==============================================================================
111
+ core.get_augmented_latent_size.latent_size = %LATENT_SIZE
112
+ core.get_augmented_latent_size.noise_augmentation = %NOISE_AUGMENTATION
113
+
114
+ # Parameters for dataset.get_dataset:
115
+ # ==============================================================================
116
+ dataset.get_dataset.augmentations = []
117
+
118
+ # Parameters for convs.get_padding:
119
+ # ==============================================================================
120
+ convs.get_padding.dilation = 1
121
+ convs.get_padding.mode = 'causal'
122
+ convs.get_padding.stride = 1
123
+
124
+ # Parameters for variational/convs.get_padding:
125
+ # ==============================================================================
126
+ variational/convs.get_padding.dilation = 1
127
+ variational/convs.get_padding.mode = 'causal'
128
+ variational/convs.get_padding.stride = 1
129
+
130
+ # Parameters for core.MultiScaleSTFT:
131
+ # ==============================================================================
132
+ core.MultiScaleSTFT.magnitude = True
133
+ core.MultiScaleSTFT.normalized = False
134
+ core.MultiScaleSTFT.num_mels = None
135
+ core.MultiScaleSTFT.random_crop = False
136
+ core.MultiScaleSTFT.sample_rate = %SAMPLING_RATE
137
+ core.MultiScaleSTFT.scales = [2048, 1024, 512, 256, 128]
138
+
139
+ # Parameters for blocks.NoiseGeneratorV2:
140
+ # ==============================================================================
141
+ blocks.NoiseGeneratorV2.activation = %ACTIVATION
142
+ blocks.NoiseGeneratorV2.data_size = %N_BAND
143
+ blocks.NoiseGeneratorV2.hidden_size = 128
144
+ blocks.NoiseGeneratorV2.noise_bands = 5
145
+ blocks.NoiseGeneratorV2.ratios = [2, 2, 2]
146
+
147
+ # Parameters for blocks.normalization:
148
+ # ==============================================================================
149
+ blocks.normalization.mode = 'weight_norm'
150
+
151
+ # Parameters for variational/blocks.normalization:
152
+ # ==============================================================================
153
+ variational/blocks.normalization.mode = 'weight_norm'
154
+
155
+ # Parameters for model.RAVE:
156
+ # ==============================================================================
157
+ model.RAVE.audio_distance = @core.AudioDistanceV1
158
+ model.RAVE.audio_monitor_epochs = 1
159
+ model.RAVE.balancer = None
160
+ model.RAVE.decoder = @blocks.GeneratorV2
161
+ model.RAVE.discriminator = @descript_discriminator.DescriptDiscriminator
162
+ model.RAVE.enable_pqmf_decode = None
163
+ model.RAVE.enable_pqmf_encode = None
164
+ model.RAVE.encoder = @blocks.VariationalEncoder
165
+ model.RAVE.feature_matching_fun = @feature_matching/core.mean_difference
166
+ model.RAVE.freeze_encoder = False
167
+ model.RAVE.gan_loss = @core.hinge_gan
168
+ model.RAVE.input_mode = 'pqmf'
169
+ model.RAVE.is_mel_input = None
170
+ model.RAVE.latent_size = %LATENT_SIZE
171
+ model.RAVE.loss_weights = None
172
+ model.RAVE.multiband_audio_distance = @core.AudioDistanceV1
173
+ model.RAVE.n_bands = 16
174
+ model.RAVE.n_channels = 1
175
+ model.RAVE.num_skipped_features = 1
176
+ model.RAVE.output_mode = 'pqmf'
177
+ model.RAVE.phase_1_duration = %PHASE_1_DURATION
178
+ model.RAVE.pqmf = @pqmf.CachedPQMF
179
+ model.RAVE.sampling_rate = %SAMPLING_RATE
180
+ model.RAVE.spectrogram = None
181
+ model.RAVE.update_discriminator_every = 4
182
+ model.RAVE.valid_signal_crop = True
183
+ model.RAVE.warmup_quantize = None
184
+ model.RAVE.weights = {'feature_matching': 20}
185
+
186
+ # Parameters for blocks.Snake:
187
+ # ==============================================================================
188
+ # None.
189
+
190
+ # Parameters for variational/blocks.Snake:
191
+ # ==============================================================================
192
+ # None.
193
+
194
+ # Parameters for dataset.split_dataset:
195
+ # ==============================================================================
196
+ dataset.split_dataset.max_residual = 1000
197
+
198
+ # Parameters for blocks.VariationalEncoder:
199
+ # ==============================================================================
200
+ blocks.VariationalEncoder.beta = 1.0
201
+ blocks.VariationalEncoder.encoder = @variational/blocks.EncoderV2
checkpoints/freesoundloop10k_default_b2048_r48000/version_0/checkpoints/last.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cfed96e29b9f026df10499ce5fbf157da2b5402e5cd68212ddf7f6a500e84c5
3
+ size 898754420
checkpoints/freesoundloop10k_default_b2048_r48000/version_0/events.out.tfevents.1730231102.tamforce.1561356.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57fb59b4f2584c2f4b6e2e698960fe1b83ed6c2895296b4132c50b7a1bdad2b0
3
+ size 1210921671
checkpoints/freesoundloop10k_default_b2048_r48000/version_0/hparams.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
checkpoints/freesoundloop10k_raspi_b2048_r44100/config.gin ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __gin__ import dynamic_registration
2
+ import cached_conv as cc
3
+ from cached_conv import convs
4
+ import rave
5
+ from rave import blocks
6
+ from rave import core
7
+ from rave import dataset
8
+ from rave import descript_discriminator
9
+ from rave import discriminator
10
+ from rave import model
11
+ from rave import pqmf
12
+ import torch
13
+ import torch.nn as nn
14
+
15
+ # Macros:
16
+ # ==============================================================================
17
+ ACTIVATION = @blocks.Snake
18
+ CAPACITY = 32
19
+ DILATIONS = [[1, 3, 9], [1, 3, 9], [1, 3, 9], [1, 3]]
20
+ KERNEL_SIZE = 3
21
+ LATENT_SIZE = 128
22
+ N_BAND = 16
23
+ NOISE_AUGMENTATION = 0
24
+ PHASE_1_DURATION = 5000000
25
+ RATIOS = [4, 4, 4, 2]
26
+ SAMPLING_RATE = 44100
27
+
28
+ # Parameters for core.AudioDistanceV1:
29
+ # ==============================================================================
30
+ core.AudioDistanceV1.log_epsilon = 1e-07
31
+ core.AudioDistanceV1.multiscale_stft = @core.MultiScaleSTFT
32
+
33
+ # Parameters for model.BetaWarmupCallback:
34
+ # ==============================================================================
35
+ model.BetaWarmupCallback.initial_value = 1e-06
36
+ model.BetaWarmupCallback.log = True
37
+ model.BetaWarmupCallback.target_value = 0.05
38
+ model.BetaWarmupCallback.warmup_len = 20000
39
+
40
+ # Parameters for pqmf.CachedPQMF:
41
+ # ==============================================================================
42
+ pqmf.CachedPQMF.attenuation = 100
43
+ pqmf.CachedPQMF.n_band = %N_BAND
44
+
45
+ # Parameters for cc.Conv1d:
46
+ # ==============================================================================
47
+ cc.Conv1d.bias = False
48
+
49
+ # Parameters for variational/cc.Conv1d:
50
+ # ==============================================================================
51
+ variational/cc.Conv1d.bias = False
52
+
53
+ # Parameters for cc.ConvTranspose1d:
54
+ # ==============================================================================
55
+ cc.ConvTranspose1d.bias = False
56
+
57
+ # Parameters for descript_discriminator.DescriptDiscriminator:
58
+ # ==============================================================================
59
+ descript_discriminator.DescriptDiscriminator.bands = \
60
+ [(0.0, 0.1), (0.1, 0.25), (0.25, 0.5), (0.5, 0.75), (0.75, 1.0)]
61
+ descript_discriminator.DescriptDiscriminator.fft_sizes = [2048, 1024, 512]
62
+ descript_discriminator.DescriptDiscriminator.periods = [2, 3, 5, 7, 11]
63
+ descript_discriminator.DescriptDiscriminator.rates = []
64
+ descript_discriminator.DescriptDiscriminator.sample_rate = 44100
65
+
66
+ # Parameters for variational/blocks.EncoderV2:
67
+ # ==============================================================================
68
+ variational/blocks.EncoderV2.activation = %ACTIVATION
69
+ variational/blocks.EncoderV2.adain = None
70
+ variational/blocks.EncoderV2.capacity = %CAPACITY
71
+ variational/blocks.EncoderV2.data_size = %N_BAND
72
+ variational/blocks.EncoderV2.dilations = %DILATIONS
73
+ variational/blocks.EncoderV2.group_resample = False
74
+ variational/blocks.EncoderV2.group_size = 65536
75
+ variational/blocks.EncoderV2.keep_dim = False
76
+ variational/blocks.EncoderV2.kernel_size = %KERNEL_SIZE
77
+ variational/blocks.EncoderV2.latent_size = %LATENT_SIZE
78
+ variational/blocks.EncoderV2.n_out = 2
79
+ variational/blocks.EncoderV2.ratios = %RATIOS
80
+ variational/blocks.EncoderV2.recurrent_layer = None
81
+ variational/blocks.EncoderV2.spectrogram = None
82
+
83
+ # Parameters for blocks.GeneratorV2:
84
+ # ==============================================================================
85
+ blocks.GeneratorV2.activation = %ACTIVATION
86
+ blocks.GeneratorV2.adain = None
87
+ blocks.GeneratorV2.amplitude_modulation = True
88
+ blocks.GeneratorV2.capacity = %CAPACITY
89
+ blocks.GeneratorV2.causal_convtranspose = False
90
+ blocks.GeneratorV2.data_size = %N_BAND
91
+ blocks.GeneratorV2.dilations = %DILATIONS
92
+ blocks.GeneratorV2.group_resample = False
93
+ blocks.GeneratorV2.group_size = 65536
94
+ blocks.GeneratorV2.keep_dim = False
95
+ blocks.GeneratorV2.kernel_size = %KERNEL_SIZE
96
+ blocks.GeneratorV2.latent_size = @core.get_augmented_latent_size()
97
+ blocks.GeneratorV2.noise_module = None
98
+ blocks.GeneratorV2.ratios = %RATIOS
99
+ blocks.GeneratorV2.recurrent_layer = None
100
+
101
+ # Parameters for core.get_augmented_latent_size:
102
+ # ==============================================================================
103
+ core.get_augmented_latent_size.latent_size = %LATENT_SIZE
104
+ core.get_augmented_latent_size.noise_augmentation = %NOISE_AUGMENTATION
105
+
106
+ # Parameters for dataset.get_dataset:
107
+ # ==============================================================================
108
+ dataset.get_dataset.augmentations = []
109
+
110
+ # Parameters for convs.get_padding:
111
+ # ==============================================================================
112
+ convs.get_padding.dilation = 1
113
+ convs.get_padding.mode = 'centered'
114
+ convs.get_padding.stride = 1
115
+
116
+ # Parameters for variational/convs.get_padding:
117
+ # ==============================================================================
118
+ variational/convs.get_padding.dilation = 1
119
+ variational/convs.get_padding.mode = 'centered'
120
+ variational/convs.get_padding.stride = 1
121
+
122
+ # Parameters for core.MultiScaleSTFT:
123
+ # ==============================================================================
124
+ core.MultiScaleSTFT.magnitude = True
125
+ core.MultiScaleSTFT.normalized = False
126
+ core.MultiScaleSTFT.num_mels = None
127
+ core.MultiScaleSTFT.random_crop = False
128
+ core.MultiScaleSTFT.sample_rate = %SAMPLING_RATE
129
+ core.MultiScaleSTFT.scales = [2048, 1024, 512, 256, 128]
130
+
131
+ # Parameters for blocks.normalization:
132
+ # ==============================================================================
133
+ blocks.normalization.mode = 'weight_norm'
134
+
135
+ # Parameters for variational/blocks.normalization:
136
+ # ==============================================================================
137
+ variational/blocks.normalization.mode = 'weight_norm'
138
+
139
+ # Parameters for model.RAVE:
140
+ # ==============================================================================
141
+ model.RAVE.audio_distance = @core.AudioDistanceV1
142
+ model.RAVE.audio_monitor_epochs = 1
143
+ model.RAVE.balancer = None
144
+ model.RAVE.decoder = @blocks.GeneratorV2
145
+ model.RAVE.discriminator = @descript_discriminator.DescriptDiscriminator
146
+ model.RAVE.enable_pqmf_decode = None
147
+ model.RAVE.enable_pqmf_encode = None
148
+ model.RAVE.encoder = @blocks.VariationalEncoder
149
+ model.RAVE.feature_matching_fun = @feature_matching/core.mean_difference
150
+ model.RAVE.freeze_encoder = True
151
+ model.RAVE.gan_loss = @core.hinge_gan
152
+ model.RAVE.input_mode = 'pqmf'
153
+ model.RAVE.is_mel_input = None
154
+ model.RAVE.latent_size = %LATENT_SIZE
155
+ model.RAVE.loss_weights = None
156
+ model.RAVE.multiband_audio_distance = @core.AudioDistanceV1
157
+ model.RAVE.n_bands = 16
158
+ model.RAVE.n_channels = 1
159
+ model.RAVE.num_skipped_features = 1
160
+ model.RAVE.output_mode = 'pqmf'
161
+ model.RAVE.phase_1_duration = %PHASE_1_DURATION
162
+ model.RAVE.pqmf = @pqmf.CachedPQMF
163
+ model.RAVE.sampling_rate = %SAMPLING_RATE
164
+ model.RAVE.spectrogram = None
165
+ model.RAVE.update_discriminator_every = 4
166
+ model.RAVE.valid_signal_crop = True
167
+ model.RAVE.warmup_quantize = None
168
+ model.RAVE.weights = {'feature_matching': 20}
169
+
170
+ # Parameters for blocks.Snake:
171
+ # ==============================================================================
172
+ # None.
173
+
174
+ # Parameters for variational/blocks.Snake:
175
+ # ==============================================================================
176
+ # None.
177
+
178
+ # Parameters for dataset.split_dataset:
179
+ # ==============================================================================
180
+ dataset.split_dataset.max_residual = 1000
181
+
182
+ # Parameters for blocks.VariationalEncoder:
183
+ # ==============================================================================
184
+ blocks.VariationalEncoder.beta = 1.0
185
+ blocks.VariationalEncoder.encoder = @variational/blocks.EncoderV2
checkpoints/freesoundloop10k_raspi_b2048_r44100/version_0/checkpoints/last.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90426847a088ef1e8482a69689748d2e2597a0769c9e7f4ef57c259624aeaccb
3
+ size 557978425
checkpoints/freesoundloop10k_raspi_b2048_r44100/version_0/events.out.tfevents.1732634568.tamforce.48320.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eed2c82ba18a3f07a4289a81179846fafe84073009e8e9e071696391193cfe95
3
+ size 1208620632
checkpoints/freesoundloop10k_raspi_b2048_r44100/version_0/hparams.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ {}