BernardoTorres commited on
Commit
5408758
·
verified ·
1 Parent(s): 4f561af

Upload folder using huggingface_hub

Browse files
0a3afbec_weights/autoencoder_inference_model_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4311d9460c04b18acde791c4e3348f101e3d971cc78fd27729bbd90b4f145efd
3
+ size 232976270
0a3afbec_weights/autoencoder_inference_model_last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60838454393ea3991c000d98c483303607888e7eac013b3d33f7402adea71b97
3
+ size 232976270
0a3afbec_weights/diffusion_kwargs_best.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_step: 0.1
2
+ end_exp: 2.0
3
+ p_mean: -1.1
4
+ p_std: 2.0
5
+ rho: 7.0
6
+ schedule: exponential
7
+ sigma_data: 0.5
8
+ sigma_max: 80.0
9
+ sigma_min: 0.002
10
+ start_exp: 1.0
11
+ total_iters: 800000
12
+ use_lognormal: true
0a3afbec_weights/diffusion_kwargs_last.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_step: 0.1
2
+ end_exp: 2.0
3
+ p_mean: -1.1
4
+ p_std: 2.0
5
+ rho: 7.0
6
+ schedule: exponential
7
+ sigma_data: 0.5
8
+ sigma_max: 80.0
9
+ sigma_min: 0.002
10
+ start_exp: 1.0
11
+ total_iters: 800000
12
+ use_lognormal: true
0a3afbec_weights/encoder_inference_model_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4402018f1ffb9ab261bf510fdc6f2d6743d700fc45c772ed8af9567d7157d26c
3
+ size 65100948
0a3afbec_weights/encoder_inference_model_last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e2ac2a3a9d61eef882dde6224a829b9b6802d543fdb5b50b378329f8cd9161d
3
+ size 65100948
0a3afbec_weights/frontend_kwargs_best.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ alpha_rescale: 0.65
2
+ beta_rescale: 0.34
3
+ hop_size: 512
4
+ n_fft_factor: 4
5
+ sample_rate: 44100
0a3afbec_weights/frontend_kwargs_last.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ alpha_rescale: 0.65
2
+ beta_rescale: 0.34
3
+ hop_size: 512
4
+ n_fft_factor: 4
5
+ sample_rate: 44100
0a3afbec_weights/generator_kwargs_best.yaml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ attention_list:
2
+ - 0
3
+ - 0
4
+ - 1
5
+ - 1
6
+ - 1
7
+ attention_list_encoder:
8
+ - 0
9
+ - 0
10
+ - 1
11
+ - 1
12
+ - 1
13
+ base_channels: 64
14
+ bottleneck_base_channels: 512
15
+ bottleneck_channels: 64
16
+ cond_channels: 256
17
+ data_channels: 2
18
+ dropout_rate: 0.0
19
+ fourier_scale: 0.2
20
+ freq_downsample_list:
21
+ - 1
22
+ - 0
23
+ - 0
24
+ - 0
25
+ frequency_scaling: true
26
+ heads: 4
27
+ hop: 512
28
+ init_as_zero: true
29
+ layers_list:
30
+ - 2
31
+ - 2
32
+ - 2
33
+ - 2
34
+ - 2
35
+ layers_list_encoder:
36
+ - 1
37
+ - 1
38
+ - 1
39
+ - 1
40
+ - 1
41
+ min_res_dropout: 16
42
+ multipliers_list:
43
+ - 1
44
+ - 2
45
+ - 4
46
+ - 4
47
+ - 4
48
+ normalization: true
49
+ num_bottleneck_layers: 4
50
+ pre_normalize_2d_to_1d: true
51
+ pre_normalize_downsampling_encoder: true
52
+ sigma_data: 0.5
53
+ sigma_max: 80.0
54
+ sigma_min: 0.002
55
+ use_fourier: false
0a3afbec_weights/generator_kwargs_last.yaml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ attention_list:
2
+ - 0
3
+ - 0
4
+ - 1
5
+ - 1
6
+ - 1
7
+ attention_list_encoder:
8
+ - 0
9
+ - 0
10
+ - 1
11
+ - 1
12
+ - 1
13
+ base_channels: 64
14
+ bottleneck_base_channels: 512
15
+ bottleneck_channels: 64
16
+ cond_channels: 256
17
+ data_channels: 2
18
+ dropout_rate: 0.0
19
+ fourier_scale: 0.2
20
+ freq_downsample_list:
21
+ - 1
22
+ - 0
23
+ - 0
24
+ - 0
25
+ frequency_scaling: true
26
+ heads: 4
27
+ hop: 512
28
+ init_as_zero: true
29
+ layers_list:
30
+ - 2
31
+ - 2
32
+ - 2
33
+ - 2
34
+ - 2
35
+ layers_list_encoder:
36
+ - 1
37
+ - 1
38
+ - 1
39
+ - 1
40
+ - 1
41
+ min_res_dropout: 16
42
+ multipliers_list:
43
+ - 1
44
+ - 2
45
+ - 4
46
+ - 4
47
+ - 4
48
+ normalization: true
49
+ num_bottleneck_layers: 4
50
+ pre_normalize_2d_to_1d: true
51
+ pre_normalize_downsampling_encoder: true
52
+ sigma_data: 0.5
53
+ sigma_max: 80.0
54
+ sigma_min: 0.002
55
+ use_fourier: false
2f4c6d21_weights/autoencoder_inference_model_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0b9e50868159aaa1c96c9b5471154b3e71719f3842e18220a5272dbe27f1274
3
+ size 232976270
2f4c6d21_weights/autoencoder_inference_model_last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8b817f4f63ad52ec84c36f0c2cb1bd6b3e8e5bc513124a08dcb1eb992bd0986
3
+ size 232976270
2f4c6d21_weights/diffusion_kwargs_best.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_step: 0.1
2
+ end_exp: 2.0
3
+ p_mean: -1.1
4
+ p_std: 2.0
5
+ rho: 7.0
6
+ schedule: exponential
7
+ sigma_data: 0.5
8
+ sigma_max: 80.0
9
+ sigma_min: 0.002
10
+ start_exp: 1.0
11
+ total_iters: 800000
12
+ use_lognormal: true
2f4c6d21_weights/diffusion_kwargs_last.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_step: 0.1
2
+ end_exp: 2.0
3
+ p_mean: -1.1
4
+ p_std: 2.0
5
+ rho: 7.0
6
+ schedule: exponential
7
+ sigma_data: 0.5
8
+ sigma_max: 80.0
9
+ sigma_min: 0.002
10
+ start_exp: 1.0
11
+ total_iters: 800000
12
+ use_lognormal: true
2f4c6d21_weights/encoder_inference_model_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c87fd5359299c1f423e331123cb346267a3a68881be3d4586a796d5fcf0b244
3
+ size 65100948
2f4c6d21_weights/encoder_inference_model_last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44eaa9e4019aff4b940fec92fbed3d54c40694f66f3ea716d8e7da06d14b13e2
3
+ size 65100948
2f4c6d21_weights/frontend_kwargs_best.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ alpha_rescale: 0.65
2
+ beta_rescale: 0.34
3
+ hop_size: 512
4
+ n_fft_factor: 4
5
+ sample_rate: 44100
2f4c6d21_weights/frontend_kwargs_last.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ alpha_rescale: 0.65
2
+ beta_rescale: 0.34
3
+ hop_size: 512
4
+ n_fft_factor: 4
5
+ sample_rate: 44100
2f4c6d21_weights/generator_kwargs_best.yaml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ attention_list:
2
+ - 0
3
+ - 0
4
+ - 1
5
+ - 1
6
+ - 1
7
+ attention_list_encoder:
8
+ - 0
9
+ - 0
10
+ - 1
11
+ - 1
12
+ - 1
13
+ base_channels: 64
14
+ bottleneck_base_channels: 512
15
+ bottleneck_channels: 64
16
+ cond_channels: 256
17
+ data_channels: 2
18
+ dropout_rate: 0.0
19
+ fourier_scale: 0.2
20
+ freq_downsample_list:
21
+ - 1
22
+ - 0
23
+ - 0
24
+ - 0
25
+ frequency_scaling: true
26
+ heads: 4
27
+ hop: 512
28
+ init_as_zero: true
29
+ layers_list:
30
+ - 2
31
+ - 2
32
+ - 2
33
+ - 2
34
+ - 2
35
+ layers_list_encoder:
36
+ - 1
37
+ - 1
38
+ - 1
39
+ - 1
40
+ - 1
41
+ min_res_dropout: 16
42
+ multipliers_list:
43
+ - 1
44
+ - 2
45
+ - 4
46
+ - 4
47
+ - 4
48
+ normalization: true
49
+ num_bottleneck_layers: 4
50
+ pre_normalize_2d_to_1d: true
51
+ pre_normalize_downsampling_encoder: true
52
+ sigma_data: 0.5
53
+ sigma_max: 80.0
54
+ sigma_min: 0.002
55
+ use_fourier: false
2f4c6d21_weights/generator_kwargs_last.yaml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ attention_list:
2
+ - 0
3
+ - 0
4
+ - 1
5
+ - 1
6
+ - 1
7
+ attention_list_encoder:
8
+ - 0
9
+ - 0
10
+ - 1
11
+ - 1
12
+ - 1
13
+ base_channels: 64
14
+ bottleneck_base_channels: 512
15
+ bottleneck_channels: 64
16
+ cond_channels: 256
17
+ data_channels: 2
18
+ dropout_rate: 0.0
19
+ fourier_scale: 0.2
20
+ freq_downsample_list:
21
+ - 1
22
+ - 0
23
+ - 0
24
+ - 0
25
+ frequency_scaling: true
26
+ heads: 4
27
+ hop: 512
28
+ init_as_zero: true
29
+ layers_list:
30
+ - 2
31
+ - 2
32
+ - 2
33
+ - 2
34
+ - 2
35
+ layers_list_encoder:
36
+ - 1
37
+ - 1
38
+ - 1
39
+ - 1
40
+ - 1
41
+ min_res_dropout: 16
42
+ multipliers_list:
43
+ - 1
44
+ - 2
45
+ - 4
46
+ - 4
47
+ - 4
48
+ normalization: true
49
+ num_bottleneck_layers: 4
50
+ pre_normalize_2d_to_1d: true
51
+ pre_normalize_downsampling_encoder: true
52
+ sigma_data: 0.5
53
+ sigma_max: 80.0
54
+ sigma_min: 0.002
55
+ use_fourier: false