{ "architectures": [ "LlamaForCausalLM" ], "attention_bias": false, "attention_dropout": 0.0, "auto_map": { "AutoConfig": "configuration_darwinlm.DarwinLMConfig", "AutoModelForCausalLM": "modeling_darwinlm.LlamaForCausalLM" }, "bos_token_id": 128000, "dim_each_mlp": { "0.mlp.down_proj": 10048, "1.mlp.down_proj": 8608, "10.mlp.down_proj": 8608, "11.mlp.down_proj": 5728, "12.mlp.down_proj": 4288, "13.mlp.down_proj": 4288, "14.mlp.down_proj": 8608, "15.mlp.down_proj": 11456, "16.mlp.down_proj": 8608, "17.mlp.down_proj": 10048, "18.mlp.down_proj": 10048, "19.mlp.down_proj": 10048, "2.mlp.down_proj": 7168, "20.mlp.down_proj": 5728, "21.mlp.down_proj": 7168, "22.mlp.down_proj": 5728, "23.mlp.down_proj": 5728, "24.mlp.down_proj": 2880, "25.mlp.down_proj": 5728, "26.mlp.down_proj": 4288, "27.mlp.down_proj": 2880, "28.mlp.down_proj": 4288, "29.mlp.down_proj": 4288, "3.mlp.down_proj": 7168, "30.mlp.down_proj": 5728, "31.mlp.down_proj": 7168, "4.mlp.down_proj": 8608, "5.mlp.down_proj": 11456, "6.mlp.down_proj": 8608, "7.mlp.down_proj": 11456, "8.mlp.down_proj": 7168, "9.mlp.down_proj": 5728 }, "eos_token_id": 128001, "head_dim": 128, "heads_each_attn": { "0.self_attn.o_proj": [ 13, 15, 18, 19, 20, 22, 25, 27, 28, 30, 31 ], "1.self_attn.o_proj": [ 0, 2, 8, 9, 11, 12, 13, 14, 15, 19, 22, 25, 27, 29 ], "10.self_attn.o_proj": [ 2, 3, 4, 5, 6, 7, 8, 12, 13, 15, 20, 24, 25, 31 ], "11.self_attn.o_proj": [ 1, 3, 7, 11, 13, 14, 15, 18, 19, 20, 21, 22, 28, 29 ], "12.self_attn.o_proj": [ 2, 7, 10, 11, 13, 14, 27 ], "13.self_attn.o_proj": [ 0, 1, 2, 5, 6, 10, 12, 13, 16, 17, 20, 23, 26, 29 ], "14.self_attn.o_proj": [ 0, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 20, 21, 23, 24, 25, 27, 28, 29, 30, 31 ], "15.self_attn.o_proj": [ 1, 2, 3, 9, 10, 11, 13, 15, 17, 18, 22, 23, 28, 29 ], "16.self_attn.o_proj": [ 1, 2, 5, 7, 21, 23, 28 ], "17.self_attn.o_proj": [ 1, 2, 5, 6, 7, 9, 10, 14, 15, 16, 17, 19, 21, 22, 23, 26, 28, 31 ], "18.self_attn.o_proj": [ 1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 13, 15, 16, 18, 19, 21, 24, 31 ], "19.self_attn.o_proj": [ 0, 1, 4, 5, 6, 12, 13, 14, 15, 16, 19, 21, 22, 24, 25, 28, 29, 31 ], "2.self_attn.o_proj": [ 1, 2, 3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 21, 23, 24, 25, 27 ], "20.self_attn.o_proj": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31 ], "21.self_attn.o_proj": [ 0, 1, 2, 3, 4, 5, 6, 8, 9, 11, 12, 13, 15, 16, 17, 18, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 ], "22.self_attn.o_proj": [ 0, 1, 2, 3, 4, 5, 6, 12, 13, 15, 16, 18, 20, 21, 23, 25, 26, 31 ], "23.self_attn.o_proj": [ 0, 2, 3, 5, 7, 10, 11, 12, 13, 14, 15, 18, 19, 20, 21, 24, 26, 27 ], "24.self_attn.o_proj": [ 0, 4, 5, 6, 7, 8, 9, 11, 13, 14, 23, 25, 26, 31 ], "25.self_attn.o_proj": [ 0, 3, 8, 9, 10, 16, 17, 19, 25, 26, 27, 28, 29, 30 ], "26.self_attn.o_proj": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 ], "27.self_attn.o_proj": [ 0, 2, 3, 4, 5, 10, 11, 13, 15, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31 ], "28.self_attn.o_proj": [ 1, 2, 3, 4, 5, 6, 7, 8, 11, 13, 14, 15, 16, 17, 21, 22, 24, 25, 27, 28, 29, 31 ], "29.self_attn.o_proj": [ 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 17, 18, 19, 23, 29, 30 ], "3.self_attn.o_proj": [ 1, 4, 9, 11, 13, 17, 27 ], "30.self_attn.o_proj": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 ], "31.self_attn.o_proj": [ 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31 ], "4.self_attn.o_proj": [ 0, 3, 9, 10, 11, 14, 15, 18, 19, 20, 21, 28, 29, 30 ], "5.self_attn.o_proj": [ 0, 1, 2, 5, 6, 7, 9, 10, 11, 12, 14, 15, 18, 20, 21, 22, 23, 24, 26, 27, 28, 30 ], "6.self_attn.o_proj": [ 2, 3, 8, 9, 11, 14, 15, 17, 23, 24, 26, 27, 28, 31 ], "7.self_attn.o_proj": [ 3, 6, 7, 8, 11, 13, 16, 18, 20, 22, 24, 25, 26, 28 ], "8.self_attn.o_proj": [ 0, 1, 2, 3, 6, 7, 8, 9, 10, 15, 17, 20, 22, 24, 26, 27, 28, 30 ], "9.self_attn.o_proj": [ 12, 20, 22, 27 ] }, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "kv_ignore": true, "max_position_embeddings": 131072, "mlp_bias": false, "model_type": "darwinlm", "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": { "factor": 8.0, "high_freq_factor": 4.0, "low_freq_factor": 1.0, "original_max_position_embeddings": 8192, "rope_type": "llama3" }, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "float32", "transformers_version": "4.45.0.dev0", "use_cache": true, "vocab_size": 128256 }