MichaelFinkelson commited on
Commit
b47f77a
·
verified ·
1 Parent(s): 330bd77

Upload CAFA_avclip_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. CAFA_avclip_config.json +155 -0
CAFA_avclip_config.json ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "avclip_controlled_diffusion_cond",
3
+ "sample_size": 2097152,
4
+ "sample_rate": 44100,
5
+ "audio_channels": 2,
6
+ "model": {
7
+ "pretransform": {
8
+ "type": "autoencoder",
9
+ "iterate_batch": true,
10
+ "config": {
11
+ "encoder": {
12
+ "type": "oobleck",
13
+ "requires_grad": false,
14
+ "config": {
15
+ "in_channels": 2,
16
+ "channels": 128,
17
+ "c_mults": [1, 2, 4, 8, 16],
18
+ "strides": [2, 4, 4, 8, 8],
19
+ "latent_dim": 128,
20
+ "use_snake": true
21
+ }
22
+ },
23
+ "decoder": {
24
+ "type": "oobleck",
25
+ "config": {
26
+ "out_channels": 2,
27
+ "channels": 128,
28
+ "c_mults": [1, 2, 4, 8, 16],
29
+ "strides": [2, 4, 4, 8, 8],
30
+ "latent_dim": 64,
31
+ "use_snake": true,
32
+ "final_tanh": false
33
+ }
34
+ },
35
+ "bottleneck": {
36
+ "type": "vae"
37
+ },
38
+ "latent_dim": 64,
39
+ "downsampling_ratio": 2048,
40
+ "io_channels": 2
41
+ }
42
+ },
43
+ "conditioning": {
44
+ "configs": [
45
+ {
46
+ "id": "prompt",
47
+ "type": "t5",
48
+ "config": {
49
+ "t5_model_name": "t5-base",
50
+ "max_length": 128
51
+ }
52
+ },
53
+ {
54
+ "id": "seconds_start",
55
+ "type": "number",
56
+ "config": {
57
+ "min_val": 0,
58
+ "max_val": 512
59
+ }
60
+ },
61
+ {
62
+ "id": "seconds_total",
63
+ "type": "number",
64
+ "config": {
65
+ "min_val": 0,
66
+ "max_val": 512
67
+ }
68
+ },
69
+
70
+ {
71
+ "id": "avclip_signal",
72
+ "type": "avclip_frame",
73
+ "config": {
74
+ }
75
+ }
76
+ ],
77
+ "cond_dim": 768
78
+ },
79
+ "diffusion": {
80
+ "cross_attention_cond_ids": ["prompt", "seconds_start", "seconds_total"],
81
+ "global_cond_ids": ["seconds_start", "seconds_total"],
82
+ "control_ids": ["avclip_signal"],
83
+ "type": "avclip_controlled_dit",
84
+ "config": {
85
+ "only_specific_control": "None",
86
+ "io_channels": 64,
87
+ "embed_dim": 1536,
88
+ "depth": 24,
89
+ "num_heads": 24,
90
+ "cond_token_dim": 768,
91
+ "global_cond_dim": 1536,
92
+ "project_cond_tokens": false,
93
+ "transformer_type": "avclip_controlled_continuous_transformer"
94
+ }
95
+ },
96
+ "ControlNet": {
97
+ "cross_attention_cond_ids": ["prompt", "seconds_start", "seconds_total"],
98
+ "global_cond_ids": ["seconds_start", "seconds_total"],
99
+ "type": "dit",
100
+ "config": {
101
+ "encoder": {
102
+ "type": "oobleck",
103
+ "requires_grad": false,
104
+ "config": {
105
+ "in_channels": 2,
106
+ "channels": 128,
107
+ "c_mults": [1, 2, 4, 8, 16],
108
+ "strides": [2, 4, 4, 8, 8],
109
+ "latent_dim": 128,
110
+ "use_snake": true
111
+ }
112
+ },
113
+ "sample_rate": 44100,
114
+ "io_channels": 64
115
+ }
116
+ },
117
+ "io_channels": 64
118
+ },
119
+ "training": {
120
+ "use_ema": false,
121
+ "log_loss_info": false,
122
+ "optimizer_configs": {
123
+ "diffusion": {
124
+ "optimizer": {
125
+ "type": "AdamW",
126
+ "config": {
127
+ "lr": 5e-5,
128
+ "betas": [0.9, 0.999],
129
+ "weight_decay": 1e-3
130
+ }
131
+ },
132
+ "scheduler": {
133
+ "type": "InverseLR",
134
+ "config": {
135
+ "inv_gamma": 1000000,
136
+ "power": 0.5,
137
+ "warmup": 0.99
138
+ }
139
+ }
140
+ }
141
+ },
142
+ "demo": {
143
+ "demo_every": 2000,
144
+ "demo_steps": 250,
145
+ "num_demos": 4,
146
+ "demo_cond": [
147
+ {"prompt": "Amen break 174 BPM", "seconds_start": 0, "seconds_total": 12, "control_signal": "path"},
148
+ {"prompt": "A beautiful orchestral symphony, classical music", "seconds_start": 0, "seconds_total": 160, "control_signal": "path"},
149
+ {"prompt": "Chill hip-hop beat, chillhop", "seconds_start": 0, "seconds_total": 190, "control_signal": "path"},
150
+ {"prompt": "A pop song about love and loss", "seconds_start": 0, "seconds_total": 180, "control_signal": "path"}
151
+ ],
152
+ "demo_cfg_scales": [3, 6, 9]
153
+ }
154
+ }
155
+ }