AbstractPhil's picture
Update config.json
8eb89f1 verified
{
"architecture": "GeoDavidCollective (ENHANCED with ProjectiveHead)",
"model_version": "1.0.0",
"num_blocks": 9,
"num_timestep_bins": 100,
"num_patterns_per_bin": 10,
"total_classes": 1000,
"block_configs": {
"down_0": {
"input_dim": 320,
"scale_dim": 64,
"use_belly": true,
"belly_expand": 2.0,
"temperature": 0.07,
"cantor_alpha_init": 0.5,
"cantor_tau": 0.25,
"cantor_levels": 12,
"cantor_base": 3,
"simplex_k": 4,
"simplex_seed_base": 42,
"num_experts": 2,
"compression_ratio": 6,
"num_gate_heads": 2,
"expert_dropout": 0.1,
"attention_dropout": 0.1,
"head_temperature": 0.5,
"use_head_sparsity": true,
"head_sparsity_threshold": 0.1
},
"down_1": {
"input_dim": 640,
"scale_dim": 96,
"use_belly": true,
"belly_expand": 2.0,
"temperature": 0.07,
"cantor_alpha_init": 0.5,
"cantor_tau": 0.25,
"cantor_levels": 12,
"cantor_base": 3,
"simplex_k": 4,
"simplex_seed_base": 42,
"num_experts": 2,
"compression_ratio": 6,
"num_gate_heads": 2,
"expert_dropout": 0.1,
"attention_dropout": 0.1,
"head_temperature": 0.5,
"use_head_sparsity": true,
"head_sparsity_threshold": 0.1
},
"down_2": {
"input_dim": 1280,
"scale_dim": 128,
"use_belly": true,
"belly_expand": 2.0,
"temperature": 0.07,
"cantor_alpha_init": 0.5,
"cantor_tau": 0.25,
"cantor_levels": 12,
"cantor_base": 3,
"simplex_k": 4,
"simplex_seed_base": 42,
"num_experts": 3,
"compression_ratio": 4,
"num_gate_heads": 3,
"expert_dropout": 0.1,
"attention_dropout": 0.1,
"head_temperature": 0.5,
"use_head_sparsity": true,
"head_sparsity_threshold": 0.1
},
"down_3": {
"input_dim": 1280,
"scale_dim": 128,
"use_belly": true,
"belly_expand": 2.0,
"temperature": 0.07,
"cantor_alpha_init": 0.5,
"cantor_tau": 0.25,
"cantor_levels": 12,
"cantor_base": 3,
"simplex_k": 4,
"simplex_seed_base": 42,
"num_experts": 3,
"compression_ratio": 4,
"num_gate_heads": 3,
"expert_dropout": 0.1,
"attention_dropout": 0.1,
"head_temperature": 0.5,
"use_head_sparsity": true,
"head_sparsity_threshold": 0.1
},
"mid": {
"input_dim": 1280,
"scale_dim": 256,
"use_belly": true,
"belly_expand": 4.0,
"temperature": 0.07,
"cantor_alpha_init": 0.5,
"cantor_tau": 0.25,
"cantor_levels": 12,
"cantor_base": 3,
"simplex_k": 4,
"simplex_seed_base": 42,
"num_experts": 4,
"compression_ratio": 4,
"num_gate_heads": 4,
"expert_dropout": 0.1,
"attention_dropout": 0.1,
"head_temperature": 0.5,
"use_head_sparsity": true,
"head_sparsity_threshold": 0.1
},
"up_0": {
"input_dim": 1280,
"scale_dim": 128,
"use_belly": true,
"belly_expand": 2.0,
"temperature": 0.07,
"cantor_alpha_init": 0.5,
"cantor_tau": 0.25,
"cantor_levels": 12,
"cantor_base": 3,
"simplex_k": 4,
"simplex_seed_base": 42,
"num_experts": 3,
"compression_ratio": 4,
"num_gate_heads": 3,
"expert_dropout": 0.1,
"attention_dropout": 0.1,
"head_temperature": 0.5,
"use_head_sparsity": true,
"head_sparsity_threshold": 0.1
},
"up_1": {
"input_dim": 1280,
"scale_dim": 128,
"use_belly": true,
"belly_expand": 2.0,
"temperature": 0.07,
"cantor_alpha_init": 0.5,
"cantor_tau": 0.25,
"cantor_levels": 12,
"cantor_base": 3,
"simplex_k": 4,
"simplex_seed_base": 42,
"num_experts": 3,
"compression_ratio": 4,
"num_gate_heads": 3,
"expert_dropout": 0.1,
"attention_dropout": 0.1,
"head_temperature": 0.5,
"use_head_sparsity": true,
"head_sparsity_threshold": 0.1
},
"up_2": {
"input_dim": 640,
"scale_dim": 96,
"use_belly": true,
"belly_expand": 2.0,
"temperature": 0.07,
"cantor_alpha_init": 0.5,
"cantor_tau": 0.25,
"cantor_levels": 12,
"cantor_base": 3,
"simplex_k": 4,
"simplex_seed_base": 42,
"num_experts": 2,
"compression_ratio": 6,
"num_gate_heads": 2,
"expert_dropout": 0.1,
"attention_dropout": 0.1,
"head_temperature": 0.5,
"use_head_sparsity": true,
"head_sparsity_threshold": 0.1
},
"up_3": {
"input_dim": 320,
"scale_dim": 64,
"use_belly": true,
"belly_expand": 1.5,
"temperature": 0.07,
"cantor_alpha_init": 0.5,
"cantor_tau": 0.25,
"cantor_levels": 12,
"cantor_base": 3,
"simplex_k": 4,
"simplex_seed_base": 42,
"num_experts": 2,
"compression_ratio": 6,
"num_gate_heads": 2,
"expert_dropout": 0.1,
"attention_dropout": 0.1,
"head_temperature": 0.5,
"use_head_sparsity": true,
"head_sparsity_threshold": 0.1
}
},
"block_weights": {
"down_0": 0.7,
"down_1": 0.9,
"down_2": 1.0,
"down_3": 1.1,
"mid": 1.2,
"up_0": 1.1,
"up_1": 1.0,
"up_2": 0.9,
"up_3": 0.7
},
"loss_config": {
"feature_similarity_weight": 0.5,
"rose_weight": 0.25,
"ce_weight": 0.15,
"pattern_diversity_weight": 0.05,
"cayley_weight": 0.05,
"cantor_coherence_weight": 0.05,
"use_soft_assignment": true,
"temperature": 0.01,
"cayley_volume_floor": 0.0001,
"cayley_chaos_scale": 1.0,
"cayley_edge_weight": 0.5,
"cayley_gram_weight": 0.1,
"rose_margin": 1.0,
"rose_temperature": 0.07,
"cantor_bandwidth": 0.1
},
"training_info": {
"total_epochs": 40,
"learning_rate": 0.0001,
"weight_decay": 0.001,
"batch_size": 128,
"num_samples": 50000,
"pooling_mode": "mean",
"dataset": "SymbolicPromptDataset",
"complexity_distribution": {
"1": 0.15,
"2": 0.20,
"3": 0.40,
"4": 0.25
}
},
"model_stats": {
"total_parameters": 872498670,
"trainable_parameters": 872498670
}
}