{
  "architectures" : [
    "Starcoder2ForCausalLM"
  ],
  "attention_dropout" : 0.10000000000000001,
  "bos_token_id" : 0,
  "embedding_dropout" : 0.10000000000000001,
  "eos_token_id" : 0,
  "hidden_act" : "gelu_pytorch_tanh",
  "hidden_size" : 3072,
  "initializer_range" : 0.018041999999999999,
  "intermediate_size" : 12288,
  "max_position_embeddings" : 16384,
  "mlp_type" : "default",
  "model_type" : "starcoder2",
  "norm_epsilon" : 1.0000000000000001e-05,
  "norm_type" : "layer_norm",
  "num_attention_heads" : 24,
  "num_hidden_layers" : 30,
  "num_key_value_heads" : 2,
  "quantization" : {
    "bits" : 4,
    "group_size" : 64,
    "mode" : "affine"
  },
  "quantization_config" : {
    "bits" : 4,
    "group_size" : 64,
    "mode" : "affine"
  },
  "residual_dropout" : 0.10000000000000001,
  "rope_theta" : 999999.44203588134,
  "sliding_window" : 4096,
  "transformers_version" : "4.37.0.dev0",
  "use_bias" : 1,
  "use_cache" : 1,
  "vocab_size" : 49152
}