AbstractPhil commited on
Commit
8eb89f1
·
verified ·
1 Parent(s): d150013

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +168 -103
config.json CHANGED
@@ -1,186 +1,251 @@
1
  {
2
- "model_type": "GeoDavidCollective",
3
- "architecture": "ProjectiveHead Enhanced Multi-Expert System",
4
- "framework": "pytorch",
5
- "version": "1.0",
6
- "trained_epoch": 24,
7
- "training_date": "2025-10-28T21:07:48.816441",
8
  "num_blocks": 9,
9
- "total_parameters": 690925542,
10
  "num_timestep_bins": 100,
11
  "num_patterns_per_bin": 10,
 
 
12
  "block_configs": {
13
  "down_0": {
14
  "input_dim": 320,
15
  "scale_dim": 64,
16
  "use_belly": true,
17
  "belly_expand": 2.0,
18
- "num_experts": 3,
19
- "num_gate_heads": 3,
20
- "projective_head": "auto"
 
 
 
 
 
 
 
 
 
 
 
 
21
  },
22
  "down_1": {
23
  "input_dim": 640,
24
  "scale_dim": 96,
25
  "use_belly": true,
26
  "belly_expand": 2.0,
27
- "num_experts": 3,
28
- "num_gate_heads": 3,
29
- "projective_head": "auto"
 
 
 
 
 
 
 
 
 
 
 
 
30
  },
31
  "down_2": {
32
  "input_dim": 1280,
33
  "scale_dim": 128,
34
  "use_belly": true,
35
  "belly_expand": 2.0,
 
 
 
 
 
 
 
36
  "num_experts": 3,
 
37
  "num_gate_heads": 3,
38
- "projective_head": "auto"
 
 
 
 
39
  },
40
  "down_3": {
41
  "input_dim": 1280,
42
  "scale_dim": 128,
43
  "use_belly": true,
44
  "belly_expand": 2.0,
 
 
 
 
 
 
 
45
  "num_experts": 3,
 
46
  "num_gate_heads": 3,
47
- "projective_head": "auto"
 
 
 
 
48
  },
49
  "mid": {
50
  "input_dim": 1280,
51
  "scale_dim": 256,
52
  "use_belly": true,
53
- "belly_expand": 1.5,
 
 
 
 
 
 
 
54
  "num_experts": 4,
 
55
  "num_gate_heads": 4,
56
- "projective_head": "custom"
 
 
 
 
57
  },
58
  "up_0": {
59
  "input_dim": 1280,
60
  "scale_dim": 128,
61
  "use_belly": true,
62
  "belly_expand": 2.0,
 
 
 
 
 
 
 
63
  "num_experts": 3,
 
64
  "num_gate_heads": 3,
65
- "projective_head": "auto"
 
 
 
 
66
  },
67
  "up_1": {
68
  "input_dim": 1280,
69
  "scale_dim": 128,
70
  "use_belly": true,
71
  "belly_expand": 2.0,
 
 
 
 
 
 
 
72
  "num_experts": 3,
 
73
  "num_gate_heads": 3,
74
- "projective_head": "auto"
 
 
 
 
75
  },
76
  "up_2": {
77
  "input_dim": 640,
78
  "scale_dim": 96,
79
  "use_belly": true,
80
  "belly_expand": 2.0,
81
- "num_experts": 3,
82
- "num_gate_heads": 3,
83
- "projective_head": "auto"
 
 
 
 
 
 
 
 
 
 
 
 
84
  },
85
  "up_3": {
86
  "input_dim": 320,
87
  "scale_dim": 64,
88
  "use_belly": true,
89
  "belly_expand": 1.5,
90
- "num_experts": 3,
91
- "num_gate_heads": 3,
92
- "projective_head": "auto"
 
 
 
 
 
 
 
 
 
 
 
 
93
  }
94
  },
 
95
  "block_weights": {
96
- "down_0": 0.8,
97
- "down_1": 1.0,
98
- "down_2": 1.2,
99
- "down_3": 1.3,
100
- "mid": 1.5,
101
- "up_0": 1.3,
102
- "up_1": 1.2,
103
- "up_2": 1.0,
104
- "up_3": 0.8
105
  },
 
106
  "loss_config": {
107
- "feature_similarity_weight": 0.4,
108
  "rose_weight": 0.25,
109
  "ce_weight": 0.15,
110
  "pattern_diversity_weight": 0.05,
111
- "cayley_weight": 0.1,
112
  "cantor_coherence_weight": 0.05,
113
  "use_soft_assignment": true,
114
- "temperature": 0.1,
115
  "cayley_volume_floor": 0.0001,
116
  "cayley_chaos_scale": 1.0,
117
  "cayley_edge_weight": 0.5,
118
- "cayley_gram_weight": 0.1
 
 
 
119
  },
120
- "training": {
121
- "base_model": "runwayml/stable-diffusion-v1-5",
122
- "sd_blocks_used": [
123
- "down_0",
124
- "down_1",
125
- "down_2",
126
- "down_3",
127
- "mid",
128
- "up_0",
129
- "up_1",
130
- "up_2",
131
- "up_3"
132
- ],
133
- "dataset": {
134
- "type": "SymbolicPromptDataset",
135
- "num_samples": 50000,
136
- "complexity_distribution": {
137
- "1": 0.05,
138
- "2": 0.15,
139
- "3": 0.4,
140
- "4": 0.25,
141
- "5": 0.15
142
- },
143
- "seed": 42
144
- },
145
- "batch_size": 16,
146
- "num_epochs": 10,
147
- "optimizer": {
148
- "type": "AdamW",
149
- "learning_rate": 0.001,
150
- "weight_decay": 0.001
151
- },
152
- "pool_mode": "mean",
153
- "checkpoint_interval": 2,
154
- "num_workers": 2,
155
- "pin_memory": true
156
- },
157
- "feature_extraction": {
158
- "method": "SD1.5 UNet Hooks",
159
- "spatial_features": true,
160
- "pooling": "mean",
161
- "dtype": "float32"
162
- },
163
- "capabilities": {
164
- "timestep_classification": true,
165
- "pattern_classification": true,
166
- "joint_classification": true,
167
- "num_classes": 1000,
168
- "geometric_constraints": true,
169
- "multi_expert_routing": true
170
  },
171
- "companions": {
172
- "type": "GeoDavidCompanion",
173
- "timestep_head": "ProjectiveHead",
174
- "pattern_head": "ProjectiveHead",
175
- "geometric_features": [
176
- "cayley_menger_volume",
177
- "edge_lengths",
178
- "gram_matrix"
179
- ],
180
- "loss_functions": [
181
- "rose",
182
- "cayley",
183
- "cantor"
184
- ]
185
  }
186
- }
 
1
  {
2
+ "architecture": "GeoDavidCollective (ENHANCED with ProjectiveHead)",
3
+ "model_version": "1.0.0",
 
 
 
 
4
  "num_blocks": 9,
 
5
  "num_timestep_bins": 100,
6
  "num_patterns_per_bin": 10,
7
+ "total_classes": 1000,
8
+
9
  "block_configs": {
10
  "down_0": {
11
  "input_dim": 320,
12
  "scale_dim": 64,
13
  "use_belly": true,
14
  "belly_expand": 2.0,
15
+ "temperature": 0.07,
16
+ "cantor_alpha_init": 0.5,
17
+ "cantor_tau": 0.25,
18
+ "cantor_levels": 12,
19
+ "cantor_base": 3,
20
+ "simplex_k": 4,
21
+ "simplex_seed_base": 42,
22
+ "num_experts": 2,
23
+ "compression_ratio": 6,
24
+ "num_gate_heads": 2,
25
+ "expert_dropout": 0.1,
26
+ "attention_dropout": 0.1,
27
+ "head_temperature": 0.5,
28
+ "use_head_sparsity": true,
29
+ "head_sparsity_threshold": 0.1
30
  },
31
  "down_1": {
32
  "input_dim": 640,
33
  "scale_dim": 96,
34
  "use_belly": true,
35
  "belly_expand": 2.0,
36
+ "temperature": 0.07,
37
+ "cantor_alpha_init": 0.5,
38
+ "cantor_tau": 0.25,
39
+ "cantor_levels": 12,
40
+ "cantor_base": 3,
41
+ "simplex_k": 4,
42
+ "simplex_seed_base": 42,
43
+ "num_experts": 2,
44
+ "compression_ratio": 6,
45
+ "num_gate_heads": 2,
46
+ "expert_dropout": 0.1,
47
+ "attention_dropout": 0.1,
48
+ "head_temperature": 0.5,
49
+ "use_head_sparsity": true,
50
+ "head_sparsity_threshold": 0.1
51
  },
52
  "down_2": {
53
  "input_dim": 1280,
54
  "scale_dim": 128,
55
  "use_belly": true,
56
  "belly_expand": 2.0,
57
+ "temperature": 0.07,
58
+ "cantor_alpha_init": 0.5,
59
+ "cantor_tau": 0.25,
60
+ "cantor_levels": 12,
61
+ "cantor_base": 3,
62
+ "simplex_k": 4,
63
+ "simplex_seed_base": 42,
64
  "num_experts": 3,
65
+ "compression_ratio": 4,
66
  "num_gate_heads": 3,
67
+ "expert_dropout": 0.1,
68
+ "attention_dropout": 0.1,
69
+ "head_temperature": 0.5,
70
+ "use_head_sparsity": true,
71
+ "head_sparsity_threshold": 0.1
72
  },
73
  "down_3": {
74
  "input_dim": 1280,
75
  "scale_dim": 128,
76
  "use_belly": true,
77
  "belly_expand": 2.0,
78
+ "temperature": 0.07,
79
+ "cantor_alpha_init": 0.5,
80
+ "cantor_tau": 0.25,
81
+ "cantor_levels": 12,
82
+ "cantor_base": 3,
83
+ "simplex_k": 4,
84
+ "simplex_seed_base": 42,
85
  "num_experts": 3,
86
+ "compression_ratio": 4,
87
  "num_gate_heads": 3,
88
+ "expert_dropout": 0.1,
89
+ "attention_dropout": 0.1,
90
+ "head_temperature": 0.5,
91
+ "use_head_sparsity": true,
92
+ "head_sparsity_threshold": 0.1
93
  },
94
  "mid": {
95
  "input_dim": 1280,
96
  "scale_dim": 256,
97
  "use_belly": true,
98
+ "belly_expand": 4.0,
99
+ "temperature": 0.07,
100
+ "cantor_alpha_init": 0.5,
101
+ "cantor_tau": 0.25,
102
+ "cantor_levels": 12,
103
+ "cantor_base": 3,
104
+ "simplex_k": 4,
105
+ "simplex_seed_base": 42,
106
  "num_experts": 4,
107
+ "compression_ratio": 4,
108
  "num_gate_heads": 4,
109
+ "expert_dropout": 0.1,
110
+ "attention_dropout": 0.1,
111
+ "head_temperature": 0.5,
112
+ "use_head_sparsity": true,
113
+ "head_sparsity_threshold": 0.1
114
  },
115
  "up_0": {
116
  "input_dim": 1280,
117
  "scale_dim": 128,
118
  "use_belly": true,
119
  "belly_expand": 2.0,
120
+ "temperature": 0.07,
121
+ "cantor_alpha_init": 0.5,
122
+ "cantor_tau": 0.25,
123
+ "cantor_levels": 12,
124
+ "cantor_base": 3,
125
+ "simplex_k": 4,
126
+ "simplex_seed_base": 42,
127
  "num_experts": 3,
128
+ "compression_ratio": 4,
129
  "num_gate_heads": 3,
130
+ "expert_dropout": 0.1,
131
+ "attention_dropout": 0.1,
132
+ "head_temperature": 0.5,
133
+ "use_head_sparsity": true,
134
+ "head_sparsity_threshold": 0.1
135
  },
136
  "up_1": {
137
  "input_dim": 1280,
138
  "scale_dim": 128,
139
  "use_belly": true,
140
  "belly_expand": 2.0,
141
+ "temperature": 0.07,
142
+ "cantor_alpha_init": 0.5,
143
+ "cantor_tau": 0.25,
144
+ "cantor_levels": 12,
145
+ "cantor_base": 3,
146
+ "simplex_k": 4,
147
+ "simplex_seed_base": 42,
148
  "num_experts": 3,
149
+ "compression_ratio": 4,
150
  "num_gate_heads": 3,
151
+ "expert_dropout": 0.1,
152
+ "attention_dropout": 0.1,
153
+ "head_temperature": 0.5,
154
+ "use_head_sparsity": true,
155
+ "head_sparsity_threshold": 0.1
156
  },
157
  "up_2": {
158
  "input_dim": 640,
159
  "scale_dim": 96,
160
  "use_belly": true,
161
  "belly_expand": 2.0,
162
+ "temperature": 0.07,
163
+ "cantor_alpha_init": 0.5,
164
+ "cantor_tau": 0.25,
165
+ "cantor_levels": 12,
166
+ "cantor_base": 3,
167
+ "simplex_k": 4,
168
+ "simplex_seed_base": 42,
169
+ "num_experts": 2,
170
+ "compression_ratio": 6,
171
+ "num_gate_heads": 2,
172
+ "expert_dropout": 0.1,
173
+ "attention_dropout": 0.1,
174
+ "head_temperature": 0.5,
175
+ "use_head_sparsity": true,
176
+ "head_sparsity_threshold": 0.1
177
  },
178
  "up_3": {
179
  "input_dim": 320,
180
  "scale_dim": 64,
181
  "use_belly": true,
182
  "belly_expand": 1.5,
183
+ "temperature": 0.07,
184
+ "cantor_alpha_init": 0.5,
185
+ "cantor_tau": 0.25,
186
+ "cantor_levels": 12,
187
+ "cantor_base": 3,
188
+ "simplex_k": 4,
189
+ "simplex_seed_base": 42,
190
+ "num_experts": 2,
191
+ "compression_ratio": 6,
192
+ "num_gate_heads": 2,
193
+ "expert_dropout": 0.1,
194
+ "attention_dropout": 0.1,
195
+ "head_temperature": 0.5,
196
+ "use_head_sparsity": true,
197
+ "head_sparsity_threshold": 0.1
198
  }
199
  },
200
+
201
  "block_weights": {
202
+ "down_0": 0.7,
203
+ "down_1": 0.9,
204
+ "down_2": 1.0,
205
+ "down_3": 1.1,
206
+ "mid": 1.2,
207
+ "up_0": 1.1,
208
+ "up_1": 1.0,
209
+ "up_2": 0.9,
210
+ "up_3": 0.7
211
  },
212
+
213
  "loss_config": {
214
+ "feature_similarity_weight": 0.5,
215
  "rose_weight": 0.25,
216
  "ce_weight": 0.15,
217
  "pattern_diversity_weight": 0.05,
218
+ "cayley_weight": 0.05,
219
  "cantor_coherence_weight": 0.05,
220
  "use_soft_assignment": true,
221
+ "temperature": 0.01,
222
  "cayley_volume_floor": 0.0001,
223
  "cayley_chaos_scale": 1.0,
224
  "cayley_edge_weight": 0.5,
225
+ "cayley_gram_weight": 0.1,
226
+ "rose_margin": 1.0,
227
+ "rose_temperature": 0.07,
228
+ "cantor_bandwidth": 0.1
229
  },
230
+
231
+ "training_info": {
232
+ "total_epochs": 40,
233
+ "learning_rate": 0.0001,
234
+ "weight_decay": 0.001,
235
+ "batch_size": 128,
236
+ "num_samples": 50000,
237
+ "pooling_mode": "mean",
238
+ "dataset": "SymbolicPromptDataset",
239
+ "complexity_distribution": {
240
+ "1": 0.15,
241
+ "2": 0.20,
242
+ "3": 0.40,
243
+ "4": 0.25
244
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  },
246
+
247
+ "model_stats": {
248
+ "total_parameters": 872498670,
249
+ "trainable_parameters": 872498670
 
 
 
 
 
 
 
 
 
 
250
  }
251
+ }