geo-david-collective-sd15-base-e40 / config.json

Update config.json

8eb89f1 verified 2 months ago

6.32 kB

	{
	"architecture": "GeoDavidCollective (ENHANCED with ProjectiveHead)",
	"model_version": "1.0.0",
	"num_blocks": 9,
	"num_timestep_bins": 100,
	"num_patterns_per_bin": 10,
	"total_classes": 1000,

	"block_configs": {
	"down_0": {
	"input_dim": 320,
	"scale_dim": 64,
	"use_belly": true,
	"belly_expand": 2.0,
	"temperature": 0.07,
	"cantor_alpha_init": 0.5,
	"cantor_tau": 0.25,
	"cantor_levels": 12,
	"cantor_base": 3,
	"simplex_k": 4,
	"simplex_seed_base": 42,
	"num_experts": 2,
	"compression_ratio": 6,
	"num_gate_heads": 2,
	"expert_dropout": 0.1,
	"attention_dropout": 0.1,
	"head_temperature": 0.5,
	"use_head_sparsity": true,
	"head_sparsity_threshold": 0.1
	},
	"down_1": {
	"input_dim": 640,
	"scale_dim": 96,
	"use_belly": true,
	"belly_expand": 2.0,
	"temperature": 0.07,
	"cantor_alpha_init": 0.5,
	"cantor_tau": 0.25,
	"cantor_levels": 12,
	"cantor_base": 3,
	"simplex_k": 4,
	"simplex_seed_base": 42,
	"num_experts": 2,
	"compression_ratio": 6,
	"num_gate_heads": 2,
	"expert_dropout": 0.1,
	"attention_dropout": 0.1,
	"head_temperature": 0.5,
	"use_head_sparsity": true,
	"head_sparsity_threshold": 0.1
	},
	"down_2": {
	"input_dim": 1280,
	"scale_dim": 128,
	"use_belly": true,
	"belly_expand": 2.0,
	"temperature": 0.07,
	"cantor_alpha_init": 0.5,
	"cantor_tau": 0.25,
	"cantor_levels": 12,
	"cantor_base": 3,
	"simplex_k": 4,
	"simplex_seed_base": 42,
	"num_experts": 3,
	"compression_ratio": 4,
	"num_gate_heads": 3,
	"expert_dropout": 0.1,
	"attention_dropout": 0.1,
	"head_temperature": 0.5,
	"use_head_sparsity": true,
	"head_sparsity_threshold": 0.1
	},
	"down_3": {
	"input_dim": 1280,
	"scale_dim": 128,
	"use_belly": true,
	"belly_expand": 2.0,
	"temperature": 0.07,
	"cantor_alpha_init": 0.5,
	"cantor_tau": 0.25,
	"cantor_levels": 12,
	"cantor_base": 3,
	"simplex_k": 4,
	"simplex_seed_base": 42,
	"num_experts": 3,
	"compression_ratio": 4,
	"num_gate_heads": 3,
	"expert_dropout": 0.1,
	"attention_dropout": 0.1,
	"head_temperature": 0.5,
	"use_head_sparsity": true,
	"head_sparsity_threshold": 0.1
	},
	"mid": {
	"input_dim": 1280,
	"scale_dim": 256,
	"use_belly": true,
	"belly_expand": 4.0,
	"temperature": 0.07,
	"cantor_alpha_init": 0.5,
	"cantor_tau": 0.25,
	"cantor_levels": 12,
	"cantor_base": 3,
	"simplex_k": 4,
	"simplex_seed_base": 42,
	"num_experts": 4,
	"compression_ratio": 4,
	"num_gate_heads": 4,
	"expert_dropout": 0.1,
	"attention_dropout": 0.1,
	"head_temperature": 0.5,
	"use_head_sparsity": true,
	"head_sparsity_threshold": 0.1
	},
	"up_0": {
	"input_dim": 1280,
	"scale_dim": 128,
	"use_belly": true,
	"belly_expand": 2.0,
	"temperature": 0.07,
	"cantor_alpha_init": 0.5,
	"cantor_tau": 0.25,
	"cantor_levels": 12,
	"cantor_base": 3,
	"simplex_k": 4,
	"simplex_seed_base": 42,
	"num_experts": 3,
	"compression_ratio": 4,
	"num_gate_heads": 3,
	"expert_dropout": 0.1,
	"attention_dropout": 0.1,
	"head_temperature": 0.5,
	"use_head_sparsity": true,
	"head_sparsity_threshold": 0.1
	},
	"up_1": {
	"input_dim": 1280,
	"scale_dim": 128,
	"use_belly": true,
	"belly_expand": 2.0,
	"temperature": 0.07,
	"cantor_alpha_init": 0.5,
	"cantor_tau": 0.25,
	"cantor_levels": 12,
	"cantor_base": 3,
	"simplex_k": 4,
	"simplex_seed_base": 42,
	"num_experts": 3,
	"compression_ratio": 4,
	"num_gate_heads": 3,
	"expert_dropout": 0.1,
	"attention_dropout": 0.1,
	"head_temperature": 0.5,
	"use_head_sparsity": true,
	"head_sparsity_threshold": 0.1
	},
	"up_2": {
	"input_dim": 640,
	"scale_dim": 96,
	"use_belly": true,
	"belly_expand": 2.0,
	"temperature": 0.07,
	"cantor_alpha_init": 0.5,
	"cantor_tau": 0.25,
	"cantor_levels": 12,
	"cantor_base": 3,
	"simplex_k": 4,
	"simplex_seed_base": 42,
	"num_experts": 2,
	"compression_ratio": 6,
	"num_gate_heads": 2,
	"expert_dropout": 0.1,
	"attention_dropout": 0.1,
	"head_temperature": 0.5,
	"use_head_sparsity": true,
	"head_sparsity_threshold": 0.1
	},
	"up_3": {
	"input_dim": 320,
	"scale_dim": 64,
	"use_belly": true,
	"belly_expand": 1.5,
	"temperature": 0.07,
	"cantor_alpha_init": 0.5,
	"cantor_tau": 0.25,
	"cantor_levels": 12,
	"cantor_base": 3,
	"simplex_k": 4,
	"simplex_seed_base": 42,
	"num_experts": 2,
	"compression_ratio": 6,
	"num_gate_heads": 2,
	"expert_dropout": 0.1,
	"attention_dropout": 0.1,
	"head_temperature": 0.5,
	"use_head_sparsity": true,
	"head_sparsity_threshold": 0.1
	}
	},

	"block_weights": {
	"down_0": 0.7,
	"down_1": 0.9,
	"down_2": 1.0,
	"down_3": 1.1,
	"mid": 1.2,
	"up_0": 1.1,
	"up_1": 1.0,
	"up_2": 0.9,
	"up_3": 0.7
	},

	"loss_config": {
	"feature_similarity_weight": 0.5,
	"rose_weight": 0.25,
	"ce_weight": 0.15,
	"pattern_diversity_weight": 0.05,
	"cayley_weight": 0.05,
	"cantor_coherence_weight": 0.05,
	"use_soft_assignment": true,
	"temperature": 0.01,
	"cayley_volume_floor": 0.0001,
	"cayley_chaos_scale": 1.0,
	"cayley_edge_weight": 0.5,
	"cayley_gram_weight": 0.1,
	"rose_margin": 1.0,
	"rose_temperature": 0.07,
	"cantor_bandwidth": 0.1
	},

	"training_info": {
	"total_epochs": 40,
	"learning_rate": 0.0001,
	"weight_decay": 0.001,
	"batch_size": 128,
	"num_samples": 50000,
	"pooling_mode": "mean",
	"dataset": "SymbolicPromptDataset",
	"complexity_distribution": {
	"1": 0.15,
	"2": 0.20,
	"3": 0.40,
	"4": 0.25
	}
	},

	"model_stats": {
	"total_parameters": 872498670,
	"trainable_parameters": 872498670
	}
	}