| { | |
| "architecture": "GeoDavidCollective (ENHANCED with ProjectiveHead)", | |
| "model_version": "1.0.0", | |
| "num_blocks": 9, | |
| "num_timestep_bins": 100, | |
| "num_patterns_per_bin": 10, | |
| "total_classes": 1000, | |
| "block_configs": { | |
| "down_0": { | |
| "input_dim": 320, | |
| "scale_dim": 64, | |
| "use_belly": true, | |
| "belly_expand": 2.0, | |
| "temperature": 0.07, | |
| "cantor_alpha_init": 0.5, | |
| "cantor_tau": 0.25, | |
| "cantor_levels": 12, | |
| "cantor_base": 3, | |
| "simplex_k": 4, | |
| "simplex_seed_base": 42, | |
| "num_experts": 2, | |
| "compression_ratio": 6, | |
| "num_gate_heads": 2, | |
| "expert_dropout": 0.1, | |
| "attention_dropout": 0.1, | |
| "head_temperature": 0.5, | |
| "use_head_sparsity": true, | |
| "head_sparsity_threshold": 0.1 | |
| }, | |
| "down_1": { | |
| "input_dim": 640, | |
| "scale_dim": 96, | |
| "use_belly": true, | |
| "belly_expand": 2.0, | |
| "temperature": 0.07, | |
| "cantor_alpha_init": 0.5, | |
| "cantor_tau": 0.25, | |
| "cantor_levels": 12, | |
| "cantor_base": 3, | |
| "simplex_k": 4, | |
| "simplex_seed_base": 42, | |
| "num_experts": 2, | |
| "compression_ratio": 6, | |
| "num_gate_heads": 2, | |
| "expert_dropout": 0.1, | |
| "attention_dropout": 0.1, | |
| "head_temperature": 0.5, | |
| "use_head_sparsity": true, | |
| "head_sparsity_threshold": 0.1 | |
| }, | |
| "down_2": { | |
| "input_dim": 1280, | |
| "scale_dim": 128, | |
| "use_belly": true, | |
| "belly_expand": 2.0, | |
| "temperature": 0.07, | |
| "cantor_alpha_init": 0.5, | |
| "cantor_tau": 0.25, | |
| "cantor_levels": 12, | |
| "cantor_base": 3, | |
| "simplex_k": 4, | |
| "simplex_seed_base": 42, | |
| "num_experts": 3, | |
| "compression_ratio": 4, | |
| "num_gate_heads": 3, | |
| "expert_dropout": 0.1, | |
| "attention_dropout": 0.1, | |
| "head_temperature": 0.5, | |
| "use_head_sparsity": true, | |
| "head_sparsity_threshold": 0.1 | |
| }, | |
| "down_3": { | |
| "input_dim": 1280, | |
| "scale_dim": 128, | |
| "use_belly": true, | |
| "belly_expand": 2.0, | |
| "temperature": 0.07, | |
| "cantor_alpha_init": 0.5, | |
| "cantor_tau": 0.25, | |
| "cantor_levels": 12, | |
| "cantor_base": 3, | |
| "simplex_k": 4, | |
| "simplex_seed_base": 42, | |
| "num_experts": 3, | |
| "compression_ratio": 4, | |
| "num_gate_heads": 3, | |
| "expert_dropout": 0.1, | |
| "attention_dropout": 0.1, | |
| "head_temperature": 0.5, | |
| "use_head_sparsity": true, | |
| "head_sparsity_threshold": 0.1 | |
| }, | |
| "mid": { | |
| "input_dim": 1280, | |
| "scale_dim": 256, | |
| "use_belly": true, | |
| "belly_expand": 4.0, | |
| "temperature": 0.07, | |
| "cantor_alpha_init": 0.5, | |
| "cantor_tau": 0.25, | |
| "cantor_levels": 12, | |
| "cantor_base": 3, | |
| "simplex_k": 4, | |
| "simplex_seed_base": 42, | |
| "num_experts": 4, | |
| "compression_ratio": 4, | |
| "num_gate_heads": 4, | |
| "expert_dropout": 0.1, | |
| "attention_dropout": 0.1, | |
| "head_temperature": 0.5, | |
| "use_head_sparsity": true, | |
| "head_sparsity_threshold": 0.1 | |
| }, | |
| "up_0": { | |
| "input_dim": 1280, | |
| "scale_dim": 128, | |
| "use_belly": true, | |
| "belly_expand": 2.0, | |
| "temperature": 0.07, | |
| "cantor_alpha_init": 0.5, | |
| "cantor_tau": 0.25, | |
| "cantor_levels": 12, | |
| "cantor_base": 3, | |
| "simplex_k": 4, | |
| "simplex_seed_base": 42, | |
| "num_experts": 3, | |
| "compression_ratio": 4, | |
| "num_gate_heads": 3, | |
| "expert_dropout": 0.1, | |
| "attention_dropout": 0.1, | |
| "head_temperature": 0.5, | |
| "use_head_sparsity": true, | |
| "head_sparsity_threshold": 0.1 | |
| }, | |
| "up_1": { | |
| "input_dim": 1280, | |
| "scale_dim": 128, | |
| "use_belly": true, | |
| "belly_expand": 2.0, | |
| "temperature": 0.07, | |
| "cantor_alpha_init": 0.5, | |
| "cantor_tau": 0.25, | |
| "cantor_levels": 12, | |
| "cantor_base": 3, | |
| "simplex_k": 4, | |
| "simplex_seed_base": 42, | |
| "num_experts": 3, | |
| "compression_ratio": 4, | |
| "num_gate_heads": 3, | |
| "expert_dropout": 0.1, | |
| "attention_dropout": 0.1, | |
| "head_temperature": 0.5, | |
| "use_head_sparsity": true, | |
| "head_sparsity_threshold": 0.1 | |
| }, | |
| "up_2": { | |
| "input_dim": 640, | |
| "scale_dim": 96, | |
| "use_belly": true, | |
| "belly_expand": 2.0, | |
| "temperature": 0.07, | |
| "cantor_alpha_init": 0.5, | |
| "cantor_tau": 0.25, | |
| "cantor_levels": 12, | |
| "cantor_base": 3, | |
| "simplex_k": 4, | |
| "simplex_seed_base": 42, | |
| "num_experts": 2, | |
| "compression_ratio": 6, | |
| "num_gate_heads": 2, | |
| "expert_dropout": 0.1, | |
| "attention_dropout": 0.1, | |
| "head_temperature": 0.5, | |
| "use_head_sparsity": true, | |
| "head_sparsity_threshold": 0.1 | |
| }, | |
| "up_3": { | |
| "input_dim": 320, | |
| "scale_dim": 64, | |
| "use_belly": true, | |
| "belly_expand": 1.5, | |
| "temperature": 0.07, | |
| "cantor_alpha_init": 0.5, | |
| "cantor_tau": 0.25, | |
| "cantor_levels": 12, | |
| "cantor_base": 3, | |
| "simplex_k": 4, | |
| "simplex_seed_base": 42, | |
| "num_experts": 2, | |
| "compression_ratio": 6, | |
| "num_gate_heads": 2, | |
| "expert_dropout": 0.1, | |
| "attention_dropout": 0.1, | |
| "head_temperature": 0.5, | |
| "use_head_sparsity": true, | |
| "head_sparsity_threshold": 0.1 | |
| } | |
| }, | |
| "block_weights": { | |
| "down_0": 0.7, | |
| "down_1": 0.9, | |
| "down_2": 1.0, | |
| "down_3": 1.1, | |
| "mid": 1.2, | |
| "up_0": 1.1, | |
| "up_1": 1.0, | |
| "up_2": 0.9, | |
| "up_3": 0.7 | |
| }, | |
| "loss_config": { | |
| "feature_similarity_weight": 0.5, | |
| "rose_weight": 0.25, | |
| "ce_weight": 0.15, | |
| "pattern_diversity_weight": 0.05, | |
| "cayley_weight": 0.05, | |
| "cantor_coherence_weight": 0.05, | |
| "use_soft_assignment": true, | |
| "temperature": 0.01, | |
| "cayley_volume_floor": 0.0001, | |
| "cayley_chaos_scale": 1.0, | |
| "cayley_edge_weight": 0.5, | |
| "cayley_gram_weight": 0.1, | |
| "rose_margin": 1.0, | |
| "rose_temperature": 0.07, | |
| "cantor_bandwidth": 0.1 | |
| }, | |
| "training_info": { | |
| "total_epochs": 40, | |
| "learning_rate": 0.0001, | |
| "weight_decay": 0.001, | |
| "batch_size": 128, | |
| "num_samples": 50000, | |
| "pooling_mode": "mean", | |
| "dataset": "SymbolicPromptDataset", | |
| "complexity_distribution": { | |
| "1": 0.15, | |
| "2": 0.20, | |
| "3": 0.40, | |
| "4": 0.25 | |
| } | |
| }, | |
| "model_stats": { | |
| "total_parameters": 872498670, | |
| "trainable_parameters": 872498670 | |
| } | |
| } |