{
  "alpha_end": 0.1,
  "alpha_start": 0.5,
  "architectures": [
    "AETHERMicroForCausalLM"
  ],
  "attention_dropout": 0.0,
  "beta_end": 0.2,
  "beta_start": 0.3,
  "bos_token_id": 1,
  "enable_annealing": true,
  "enable_hetero_moe": true,
  "enable_latent_thought": true,
  "enable_magic_init": true,
  "enable_magic_square": true,
  "enable_mtp_loss": true,
  "enable_quality_head": true,
  "enable_rlp": false,
  "enable_self_eval": true,
  "enable_wuxing": true,
  "eos_token_id": 2,
  "gamma_end": 0.7,
  "gamma_start": 0.2,
  "hidden_size": 1024,
  "intermediate_size": 4096,
  "latent_dim": 512,
  "max_k": 2,
  "max_position_embeddings": 2048,
  "model_type": "aether_micro",
  "mtp_num_predictions": 4,
  "num_attention_heads": 16,
  "num_experts_per_tok": 2,
  "num_hidden_layers": 24,
  "num_key_value_heads": 4,
  "num_latents": 8,
  "num_shared_experts": 2,
  "num_大_experts": 5,
  "num_小_experts": 15,
  "pad_token_id": 0,
  "quality_head_dim": 4,
  "rlp_info_gain_clip": 5.0,
  "rlp_ntp_weight": 0.7,
  "rlp_quality_weight": 1.0,
  "rlp_target_reward_weight": 0.3,
  "rlp_warmup_steps": 1500,
  "rms_norm_eps": 1e-06,
  "rope_theta": 10000.0,
  "self_eval_dims": 4,
  "shared_intermediate_size": 1536,
  "tie_word_embeddings": false,
  "top_k": 2,
  "torch_dtype": "float32",
  "transformers_version": "4.55.2",
  "use_cache": true,
  "vocab_size": 64000,
  "大_intermediate_size": 2048,
  "小_intermediate_size": 1024,
  "auto_map": {
    "AutoConfig": "configuration_aether_micro.AETHERMicroConfig",
    "AutoModelForCausalLM": "modeling_aether_micro.AETHERMicroForCausalLM"
  }
}