{ "architectures" : [ "LlamaForCausalLM" ], "attention_bias" : 0, "attention_dropout" : 0, "bos_token_id" : 1, "eos_token_id" : 2, "hidden_act" : "silu", "hidden_size" : 960, "initializer_range" : 0.02, "intermediate_size" : 2560, "is_llama_config" : 1, "max_position_embeddings" : 8192, "mlp_bias" : 0, "model_type" : "llama", "num_attention_heads" : 15, "num_hidden_layers" : 32, "num_key_value_heads" : 5, "pad_token_id" : 2, "pretraining_tp" : 1, "quantization" : { "bits" : 4, "group_size" : 64, "mode" : "affine" }, "quantization_config" : { "bits" : 4, "group_size" : 64, "mode" : "affine" }, "rms_norm_eps" : 1.0000000000000001e-05, "rope_interleaved" : 0, "rope_scaling" : null, "rope_theta" : 100000, "tie_word_embeddings" : 1, "torch_dtype" : "bfloat16", "transformers_version" : "4.42.3", "transformers.js_config" : { "kv_cache_dtype" : { "fp16" : "float16", "q4f16" : "float16" } }, "use_cache" : 1, "vocab_size" : 49152 }