S3Sound commited on
Commit
fd3073c
·
verified ·
1 Parent(s): ea0d9df

Upload kickbass_v1_saos_e257_s18870_model_config.json with huggingface_hub

Browse files
kickbass_v1_saos_e257_s18870_model_config.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "diffusion_cond",
3
+ "sample_size": 524288,
4
+ "sample_rate": 44100,
5
+ "audio_channels": 2,
6
+ "model": {
7
+ "pretransform": {
8
+ "type": "autoencoder",
9
+ "iterate_batch": true,
10
+ "model_half": true,
11
+ "chunked": true,
12
+ "config": {
13
+ "encoder": {
14
+ "type": "oobleck",
15
+ "requires_grad": false,
16
+ "config": {
17
+ "in_channels": 2,
18
+ "channels": 128,
19
+ "c_mults": [1, 2, 4, 8, 16],
20
+ "strides": [2, 4, 4, 8, 8],
21
+ "latent_dim": 128,
22
+ "use_snake": true
23
+ }
24
+ },
25
+ "decoder": {
26
+ "type": "oobleck",
27
+ "config": {
28
+ "out_channels": 2,
29
+ "channels": 128,
30
+ "c_mults": [1, 2, 4, 8, 16],
31
+ "strides": [2, 4, 4, 8, 8],
32
+ "latent_dim": 64,
33
+ "use_snake": true,
34
+ "final_tanh": false
35
+ }
36
+ },
37
+ "bottleneck": {
38
+ "type": "vae"
39
+ },
40
+ "latent_dim": 64,
41
+ "downsampling_ratio": 2048,
42
+ "io_channels": 2
43
+ }
44
+ },
45
+ "conditioning": {
46
+ "configs": [
47
+ {
48
+ "id": "prompt",
49
+ "type": "t5",
50
+ "config": {
51
+ "t5_model_name": "google/t5gemma-b-b-ul2",
52
+ "max_length": 128
53
+ }
54
+ },
55
+ {
56
+ "id": "seconds_total",
57
+ "type": "number",
58
+ "config": {
59
+ "min_val": 0,
60
+ "max_val": 256
61
+ }
62
+ }
63
+ ],
64
+ "cond_dim": 768
65
+ },
66
+ "diffusion": {
67
+ "cross_attention_cond_ids": ["prompt", "seconds_total"],
68
+ "global_cond_ids": ["seconds_total"],
69
+ "diffusion_objective": "rectified_flow",
70
+ "distribution_shift_options": {
71
+ "min_length": 256,
72
+ "max_length": 4096
73
+ },
74
+ "type": "dit",
75
+ "config": {
76
+ "io_channels": 64,
77
+ "embed_dim": 1024,
78
+ "depth": 16,
79
+ "num_heads": 8,
80
+ "cond_token_dim": 768,
81
+ "global_cond_dim": 768,
82
+ "transformer_type": "continuous_transformer",
83
+ "attn_kwargs": {
84
+ "qk_norm": "ln"
85
+ }
86
+ }
87
+ },
88
+ "io_channels": 64
89
+ },
90
+ "training": {
91
+ "use_ema": true,
92
+ "log_loss_info": false,
93
+ "pre_encoded": true,
94
+ "timestep_sampler": "trunc_logit_normal",
95
+ "optimizer_configs": {
96
+ "diffusion": {
97
+ "optimizer": {
98
+ "type": "AdamW8bit",
99
+ "config": {
100
+ "lr": 1e-5,
101
+ "betas": [0.9, 0.999],
102
+ "eps": 1e-8,
103
+ "weight_decay": 1e-2,
104
+ "percentile_clipping": 95,
105
+ "block_wise": true
106
+ }
107
+ },
108
+ "scheduler": {
109
+ "type": "CosineAnnealingWarmRestarts",
110
+ "config": {
111
+ "T_0": 10,
112
+ "T_mult": 2
113
+ }
114
+ }
115
+ }
116
+ },
117
+ "demo": {
118
+ "demo_every": 512,
119
+ "demo_steps": 100,
120
+ "num_demos": 7,
121
+ "demo_cond": [
122
+ {"prompt": "kick", "seconds_total": 2},
123
+ {"prompt": "bass", "seconds_total": 2},
124
+ {"prompt": "drum breaks 174 BPM", "seconds_total": 6},
125
+ {"prompt": "A short, beautiful piano riff in C minor", "seconds_total": 6},
126
+ {"prompt": "Tight Snare Drum", "seconds_total": 1},
127
+ {"prompt": "Glitchy bass design, I used Serum for this", "seconds_total": 4},
128
+ {"prompt": "Synth pluck arp with reverb and delay, 128 BPM", "seconds_total": 6}
129
+ ],
130
+ "demo_cfg_scales": [0.5, 1, 1.5, 8]
131
+ }
132
+ }
133
+ }