S3Sound commited on
Commit
6463650
·
verified ·
1 Parent(s): b2a7ecc

Upload .\sao_small\acid_v2_base_model_config.json with huggingface_hub

Browse files
.//sao_small//acid_v2_base_model_config.json ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "diffusion_cond",
3
+ "sample_size": 352800,
4
+ "sample_rate": 44100,
5
+ "audio_channels": 2,
6
+ "model": {
7
+ "pretransform": {
8
+ "type": "autoencoder",
9
+ "iterate_batch": false,
10
+ "model_half": true,
11
+ "config": {
12
+ "encoder": {
13
+ "type": "oobleck",
14
+ "requires_grad": false,
15
+ "config": {
16
+ "in_channels": 2,
17
+ "channels": 128,
18
+ "c_mults": [1, 2, 4, 8, 16],
19
+ "strides": [2, 4, 4, 8, 8],
20
+ "latent_dim": 128,
21
+ "use_snake": true
22
+ }
23
+ },
24
+ "decoder": {
25
+ "type": "oobleck",
26
+ "config": {
27
+ "out_channels": 2,
28
+ "channels": 128,
29
+ "c_mults": [1, 2, 4, 8, 16],
30
+ "strides": [2, 4, 4, 8, 8],
31
+ "latent_dim": 64,
32
+ "use_snake": true,
33
+ "final_tanh": false
34
+ }
35
+ },
36
+ "bottleneck": {
37
+ "type": "vae"
38
+ },
39
+ "latent_dim": 64,
40
+ "downsampling_ratio": 2048,
41
+ "io_channels": 2
42
+ }
43
+ },
44
+ "conditioning": {
45
+ "configs": [
46
+ {
47
+ "id": "prompt",
48
+ "type": "t5",
49
+ "config": {
50
+ "t5_model_name": "t5-base",
51
+ "max_length": 64
52
+ }
53
+ },
54
+ {
55
+ "id": "seconds_total",
56
+ "type": "number",
57
+ "config": {
58
+ "min_val": 0,
59
+ "max_val": 256
60
+ }
61
+ }
62
+ ],
63
+ "cond_dim": 768
64
+ },
65
+ "diffusion": {
66
+ "cross_attention_cond_ids": ["prompt", "seconds_total"],
67
+ "global_cond_ids": ["seconds_total"],
68
+ "diffusion_objective": "rectified_flow",
69
+ "distribution_shift_options": {
70
+ "min_length": 256,
71
+ "max_length": 4096
72
+ },
73
+ "type": "dit",
74
+ "config": {
75
+ "io_channels": 64,
76
+ "embed_dim": 1024,
77
+ "depth": 16,
78
+ "num_heads": 8,
79
+ "cond_token_dim": 768,
80
+ "global_cond_dim": 768,
81
+ "transformer_type": "continuous_transformer",
82
+ "attn_kwargs": {
83
+ "qk_norm": "ln"
84
+ }
85
+ }
86
+ },
87
+ "io_channels": 64
88
+ },
89
+ "training": {
90
+ "use_ema": true,
91
+ "log_loss_info": false,
92
+ "pre_encoded": true,
93
+ "timestep_sampler": "trunc_logit_normal",
94
+ "optimizer_configs": {
95
+ "diffusion": {
96
+ "optimizer": {
97
+ "type": "AdamW",
98
+ "config": {
99
+ "lr": 2e-4,
100
+ "betas": [0.9, 0.95],
101
+ "eps": 1e-8,
102
+ "weight_decay": 0.01,
103
+ "foreach": true
104
+ }
105
+ },
106
+ "scheduler": {
107
+ "type": "InverseLR",
108
+ "config": {
109
+ "inv_gamma": 1000000,
110
+ "power": 0.5,
111
+ "warmup": 0.995
112
+ }
113
+ }
114
+ }
115
+ },
116
+ "demo": {
117
+ "demo_every": 100,
118
+ "demo_steps": 50,
119
+ "num_demos": 7,
120
+ "demo_cond": [
121
+ {"prompt": "Amen break 174 BPM", "seconds_total": 6},
122
+ {"prompt": "drum breaks 174 BPM", "seconds_total": 6},
123
+ {"prompt": "A short, beautiful piano riff in C minor", "seconds_total": 6},
124
+ {"prompt": "Tight Snare Drum", "seconds_total": 1},
125
+ {"prompt": "Glitchy bass design", "seconds_total": 4},
126
+ {"prompt": "Glitchy bass design, I used Serum for this", "seconds_total": 4},
127
+ {"prompt": "Synth pluck arp with reverb and delay, 128 BPM", "seconds_total": 6}
128
+ ],
129
+ "demo_cfg_scales": [1, 2, 4, 8]
130
+ }
131
+ }
132
+ }