File size: 1,510 Bytes
1b8e9c6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | {
"dim": 4096,
"n_layers": 36,
"head_dim": 128,
"hidden_dim": 12288,
"n_heads": 32,
"n_kv_heads": 32,
"rope_theta": 10000.0,
"norm_eps": 1e-06,
"vocab_size": 131072,
"tied_embeddings": false,
"max_position_embeddings": 1048576,
"llama_4_scaling": {
"original_max_position_embeddings": 8192,
"beta": 0.1
},
"q_lora_rank": 1024,
"qk_rope_head_dim": 64,
"qk_nope_head_dim": 64,
"kv_lora_rank": 256,
"v_head_dim": 128,
"quantization": {
"qformat_weight": "fp8_e4m3",
"qscheme_act": "TENSOR"
},
"yarn": {
"original_max_position_embeddings": 8192,
"factor": 128,
"apply_scale": false,
"beta": 32,
"alpha": 1
},
"moe": {
"expert_parallel": 1,
"expert_model_parallel": 1,
"route_every_n": 1,
"first_k_dense_replace": 0,
"num_experts": 128,
"num_experts_per_tok": 4,
"num_expert_groups": 1,
"num_expert_groups_per_tok": 1,
"routed_scale": 1.0,
"expert_hidden_dim": 2048,
"num_shared_experts": 1
},
"vision_encoder": {
"image_token_id": 10,
"image_break_token_id": 12,
"image_end_token_id": 13,
"intermediate_size": 4096,
"num_hidden_layers": 24,
"num_attention_heads": 16,
"mm_projector_id": "patch_merge",
"spatial_merge_size": 2,
"hidden_size": 1024,
"num_channels": 3,
"image_size": 1540,
"max_image_size": 1540,
"patch_size": 14,
"rope_theta": 10000.0,
"add_pre_mm_projector_layer_norm": true,
"adapter_bias": false
}
} |