Upload swa/model_config.json with huggingface_hub
Browse files- swa/model_config.json +4 -11
swa/model_config.json
CHANGED
|
@@ -1,23 +1,16 @@
|
|
| 1 |
{
|
| 2 |
-
"model_name": "gemma4-e2b-swa-
|
| 3 |
"architecture": "gemma4",
|
| 4 |
"hidden_size": 1536,
|
| 5 |
"num_hidden_layers": 35,
|
| 6 |
-
"context_length":
|
| 7 |
"sliding_window": 512,
|
| 8 |
"vocab_size": 262144,
|
| 9 |
"bos_token_id": 2,
|
| 10 |
"eos_token_id": 1,
|
| 11 |
"per_layer_dim": 256,
|
| 12 |
-
"max_head_dim": 512,
|
| 13 |
"embed_scale": 39.191835884530846,
|
| 14 |
"per_layer_model_projection_scale": 0.02551551815399144,
|
| 15 |
"per_layer_input_scale": 0.7071067811865476,
|
| 16 |
-
"per_layer_embed_scale": 16.0
|
| 17 |
-
|
| 18 |
-
"has_multimodal": true,
|
| 19 |
-
"stateless": true,
|
| 20 |
-
"sliding_window_attention": true,
|
| 21 |
-
"ple_inside_chunk1": true,
|
| 22 |
-
"num_chunks": 4
|
| 23 |
-
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"model_name": "gemma4-e2b-swa-8k",
|
| 3 |
"architecture": "gemma4",
|
| 4 |
"hidden_size": 1536,
|
| 5 |
"num_hidden_layers": 35,
|
| 6 |
+
"context_length": 8192,
|
| 7 |
"sliding_window": 512,
|
| 8 |
"vocab_size": 262144,
|
| 9 |
"bos_token_id": 2,
|
| 10 |
"eos_token_id": 1,
|
| 11 |
"per_layer_dim": 256,
|
|
|
|
| 12 |
"embed_scale": 39.191835884530846,
|
| 13 |
"per_layer_model_projection_scale": 0.02551551815399144,
|
| 14 |
"per_layer_input_scale": 0.7071067811865476,
|
| 15 |
+
"per_layer_embed_scale": 16.0
|
| 16 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|