S3Sound commited on
Commit
6ba9196
·
verified ·
1 Parent(s): 6ffd986

Upload acid_v4_saos_e22_s16896.ckpt, which was trained in 3x runs from batch size 2 to 4 to 8, from the base SAO-small model.

Browse files
acid_v4_saos_e22_s16896_model_config.json ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "diffusion_cond",
3
+ "sample_size": 354304,
4
+ "sample_rate": 44100,
5
+ "audio_channels": 2,
6
+ "model": {
7
+ "pretransform": {
8
+ "type": "autoencoder",
9
+ "iterate_batch": false,
10
+ "model_half": true,
11
+ "config": {
12
+ "encoder": {
13
+ "type": "oobleck",
14
+ "requires_grad": false,
15
+ "config": {
16
+ "in_channels": 2,
17
+ "channels": 128,
18
+ "c_mults": [1, 2, 4, 8, 16],
19
+ "strides": [2, 4, 4, 8, 8],
20
+ "latent_dim": 128,
21
+ "use_snake": true
22
+ }
23
+ },
24
+ "decoder": {
25
+ "type": "oobleck",
26
+ "config": {
27
+ "out_channels": 2,
28
+ "channels": 128,
29
+ "c_mults": [1, 2, 4, 8, 16],
30
+ "strides": [2, 4, 4, 8, 8],
31
+ "latent_dim": 64,
32
+ "use_snake": true,
33
+ "final_tanh": false
34
+ }
35
+ },
36
+ "bottleneck": {
37
+ "type": "vae"
38
+ },
39
+ "latent_dim": 64,
40
+ "downsampling_ratio": 2048,
41
+ "io_channels": 2
42
+ }
43
+ },
44
+ "conditioning": {
45
+ "configs": [
46
+ {
47
+ "id": "prompt",
48
+ "type": "t5",
49
+ "config": {
50
+ "t5_model_name": "google/t5gemma-b-b-ul2",
51
+ "max_length": 128
52
+ }
53
+ },
54
+ {
55
+ "id": "seconds_total",
56
+ "type": "number",
57
+ "config": {
58
+ "min_val": 0,
59
+ "max_val": 256
60
+ }
61
+ }
62
+ ],
63
+ "cond_dim": 768
64
+ },
65
+ "diffusion": {
66
+ "cross_attention_cond_ids": ["prompt", "seconds_total"],
67
+ "global_cond_ids": ["seconds_total"],
68
+ "diffusion_objective": "rectified_flow",
69
+ "distribution_shift_options": {
70
+ "min_length": 256,
71
+ "max_length": 4096
72
+ },
73
+ "type": "dit",
74
+ "config": {
75
+ "io_channels": 64,
76
+ "embed_dim": 1024,
77
+ "depth": 16,
78
+ "num_heads": 8,
79
+ "cond_token_dim": 768,
80
+ "global_cond_dim": 768,
81
+ "transformer_type": "continuous_transformer",
82
+ "attn_kwargs": {
83
+ "qk_norm": "ln"
84
+ }
85
+ }
86
+ },
87
+ "io_channels": 64
88
+ },
89
+ "training": {
90
+ "use_ema": true,
91
+ "log_loss_info": false,
92
+ "pre_encoded": true,
93
+ "timestep_sampler": "trunc_logit_normal",
94
+ "optimizer_configs": {
95
+ "diffusion": {
96
+ "optimizer": {
97
+ "type": "AdamW8bit",
98
+ "config": {
99
+ "lr": 5e-5,
100
+ "betas": [0.9, 0.999],
101
+ "eps": 1e-8,
102
+ "weight_decay": 0.01,
103
+ "percentile_clipping": 100,
104
+ "block_wise": true
105
+ }
106
+ },
107
+ "scheduler": {
108
+ "type": "InverseLR",
109
+ "config": {
110
+ "inv_gamma": 1000000,
111
+ "power": 0.5,
112
+ "warmup": 0.995
113
+ }
114
+ }
115
+ }
116
+ },
117
+ "demo": {
118
+ "demo_every": 768,
119
+ "demo_steps": 100,
120
+ "num_demos": 8,
121
+ "demo_cond": [
122
+ {"prompt": "acid lead in F# minor, extremely fast tempo of 200 BPM, 3/4 time signature, 4-bar loop, analog distortion", "seconds_total": 8},
123
+ {"prompt": "drum breaks 174 BPM", "seconds_total": 6},
124
+ {"prompt": "A short, beautiful piano riff in C minor", "seconds_total": 6},
125
+ {"prompt": "Tight Snare Drum", "seconds_total": 1},
126
+ {"prompt": "Glitchy bass design, I used Serum for this", "seconds_total": 4},
127
+ {"prompt": "Synth pluck arp with reverb and delay, 128 BPM", "seconds_total": 6},
128
+ {"prompt": "Acid A minor 120 BPM 4/4 4 bar loop", "seconds_total": 8},
129
+ {"prompt": "Electronic, with a synthesized, futuristic tone. It has a steady, rhythmic pattern and a slightly retro, 1980s-inspired sound. The bass is prominent, giving the track a pulsing, driving feel. The music sets a mood that is upbeat, energetic, and slightly playful. It suggests a setting that could be related to technology, gaming, or a lighthearted, futuristic scenario.", "seconds_total": 8}
130
+ ],
131
+ "demo_cfg_scales": [0.5, 1, 1.5, 8]
132
+ }
133
+ }
134
+ }