Upload model_config.yaml with huggingface_hub
Browse files- model_config.yaml +36 -0
model_config.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
attention_logit_softcapping: 50.0
|
| 2 |
+
attention_scores_scalar: 256
|
| 3 |
+
bias: false
|
| 4 |
+
block_size: 8192
|
| 5 |
+
final_logit_softcapping: 30.0
|
| 6 |
+
gelu_approximate: tanh
|
| 7 |
+
head_size: 256
|
| 8 |
+
hf_config:
|
| 9 |
+
name: gemma-2-2b
|
| 10 |
+
org: google
|
| 11 |
+
intermediate_size: 9216
|
| 12 |
+
lm_head_bias: false
|
| 13 |
+
mlp_class_name: GemmaMLP
|
| 14 |
+
n_embd: 2304
|
| 15 |
+
n_expert: 0
|
| 16 |
+
n_expert_per_token: 0
|
| 17 |
+
n_head: 8
|
| 18 |
+
n_layer: 26
|
| 19 |
+
n_query_groups: 4
|
| 20 |
+
name: Gemma-2-2b
|
| 21 |
+
norm_class_name: RMSNorm
|
| 22 |
+
norm_eps: 1.0e-05
|
| 23 |
+
padded_vocab_size: 256000
|
| 24 |
+
padding_multiple: 512
|
| 25 |
+
parallel_residual: false
|
| 26 |
+
post_attention_norm: true
|
| 27 |
+
post_mlp_norm: true
|
| 28 |
+
rope_base: 10000
|
| 29 |
+
rope_condense_ratio: 1
|
| 30 |
+
rotary_percentage: 1.0
|
| 31 |
+
scale_embeddings: true
|
| 32 |
+
shared_attention_norm: false
|
| 33 |
+
sliding_window_layer_placing: 2
|
| 34 |
+
sliding_window_size: 4096
|
| 35 |
+
use_flash_attn: true
|
| 36 |
+
vocab_size: 256000
|