AETHER-Micro-0.5B / config.json
Be2Jay's picture
Upload AETHER-Micro 0.5B Phase 1 checkpoint (Step 57000)
de40e7d verified
{
"alpha_end": 0.1,
"alpha_start": 0.5,
"architectures": [
"AETHERMicroForCausalLM"
],
"attention_dropout": 0.0,
"beta_end": 0.2,
"beta_start": 0.3,
"bos_token_id": 1,
"enable_annealing": true,
"enable_hetero_moe": true,
"enable_latent_thought": true,
"enable_magic_init": true,
"enable_magic_square": true,
"enable_mtp_loss": true,
"enable_quality_head": true,
"enable_rlp": false,
"enable_self_eval": true,
"enable_wuxing": true,
"eos_token_id": 2,
"gamma_end": 0.7,
"gamma_start": 0.2,
"hidden_size": 1024,
"intermediate_size": 4096,
"latent_dim": 512,
"max_k": 2,
"max_position_embeddings": 2048,
"model_type": "aether_micro",
"mtp_num_predictions": 4,
"num_attention_heads": 16,
"num_experts_per_tok": 2,
"num_hidden_layers": 24,
"num_key_value_heads": 4,
"num_latents": 8,
"num_shared_experts": 2,
"num_大_experts": 5,
"num_小_experts": 15,
"pad_token_id": 0,
"quality_head_dim": 4,
"rlp_info_gain_clip": 5.0,
"rlp_ntp_weight": 0.7,
"rlp_quality_weight": 1.0,
"rlp_target_reward_weight": 0.3,
"rlp_warmup_steps": 1500,
"rms_norm_eps": 1e-06,
"rope_theta": 10000.0,
"self_eval_dims": 4,
"shared_intermediate_size": 1536,
"tie_word_embeddings": false,
"top_k": 2,
"torch_dtype": "float32",
"transformers_version": "4.55.2",
"use_cache": true,
"vocab_size": 64000,
"大_intermediate_size": 2048,
"小_intermediate_size": 1024,
"auto_map": {
"AutoConfig": "configuration_aether_micro.AETHERMicroConfig",
"AutoModelForCausalLM": "modeling_aether_micro.AETHERMicroForCausalLM"
}
}