Update config.json
Browse filesadd the rope scaling
- config.json +6 -2
config.json
CHANGED
|
@@ -49,7 +49,7 @@
|
|
| 49 |
"full_attention",
|
| 50 |
"full_attention"
|
| 51 |
],
|
| 52 |
-
"max_position_embeddings":
|
| 53 |
"max_window_layers": 28,
|
| 54 |
"mlp_bias": false,
|
| 55 |
"model_type": "smollm3",
|
|
@@ -98,7 +98,11 @@
|
|
| 98 |
"pad_token_id": 128004,
|
| 99 |
"pretraining_tp": 2,
|
| 100 |
"rms_norm_eps": 1e-06,
|
| 101 |
-
"rope_scaling":
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
"rope_theta": 5000000.0,
|
| 103 |
"sliding_window": null,
|
| 104 |
"transformers_version": "4.57.0",
|
|
|
|
| 49 |
"full_attention",
|
| 50 |
"full_attention"
|
| 51 |
],
|
| 52 |
+
"max_position_embeddings": 131072,
|
| 53 |
"max_window_layers": 28,
|
| 54 |
"mlp_bias": false,
|
| 55 |
"model_type": "smollm3",
|
|
|
|
| 98 |
"pad_token_id": 128004,
|
| 99 |
"pretraining_tp": 2,
|
| 100 |
"rms_norm_eps": 1e-06,
|
| 101 |
+
"rope_scaling": {
|
| 102 |
+
"type": "yarn",
|
| 103 |
+
"factor": 2.0,
|
| 104 |
+
"original_max_position_embeddings": 65536
|
| 105 |
+
},
|
| 106 |
"rope_theta": 5000000.0,
|
| 107 |
"sliding_window": null,
|
| 108 |
"transformers_version": "4.57.0",
|