{ "_name_or_path" : "HuggingFaceTB\/SmolLM-360M", "architectures" : [ "LlamaForCausalLM" ], "attention_bias" : 0, "attention_dropout" : 0, "bos_token_id" : 1, "eos_token_id" : 2, "hidden_act" : "silu", "hidden_size" : 960, "initializer_range" : 0.02, "intermediate_size" : 2560, "max_position_embeddings" : 2048, "mlp_bias" : 0, "model_type" : "llama", "num_attention_heads" : 15, "num_hidden_layers" : 32, "num_key_value_heads" : 5, "pad_token_id" : 2, "pretraining_tp" : 1, "quantization" : { "bits" : 4, "group_size" : 64, "mode" : "affine" }, "quantization_config" : { "bits" : 4, "group_size" : 64, "mode" : "affine" }, "rms_norm_eps" : 1.0000000000000001e-05, "rope_scaling" : null, "rope_theta" : 10000, "tie_word_embeddings" : 1, "torch_dtype" : "bfloat16", "transformers_version" : "4.42.3", "use_cache" : 1, "vocab_size" : 49152 }