norm_eps -> rms_norm_eps
Browse files- config.json +4 -4
config.json
CHANGED
|
@@ -31,7 +31,7 @@
|
|
| 31 |
"num_attention_heads": 12,
|
| 32 |
"num_key_value_heads": null,
|
| 33 |
"max_position_embeddings": 8192,
|
| 34 |
-
"
|
| 35 |
"dropout": 0.0,
|
| 36 |
"rope_theta": 10000.0,
|
| 37 |
"attn_impl": "xformers",
|
|
@@ -48,7 +48,7 @@
|
|
| 48 |
"num_attention_heads": 16,
|
| 49 |
"num_key_value_heads": null,
|
| 50 |
"num_hidden_layers": 1,
|
| 51 |
-
"
|
| 52 |
"dropout": 0.0,
|
| 53 |
"max_position_embeddings": 24576,
|
| 54 |
"rope_theta": 500000.0,
|
|
@@ -68,7 +68,7 @@
|
|
| 68 |
"num_attention_heads": 16,
|
| 69 |
"num_key_value_heads": null,
|
| 70 |
"num_hidden_layers": 9,
|
| 71 |
-
"
|
| 72 |
"dropout": 0.0,
|
| 73 |
"max_position_embeddings": 24576,
|
| 74 |
"rope_theta": 500000.0,
|
|
@@ -84,7 +84,7 @@
|
|
| 84 |
"num_attention_heads": 16,
|
| 85 |
"num_key_value_heads": null,
|
| 86 |
"num_hidden_layers": 25,
|
| 87 |
-
"
|
| 88 |
"dropout": 0.0,
|
| 89 |
"max_position_embeddings": 4096,
|
| 90 |
"rope_theta": 500000.0,
|
|
|
|
| 31 |
"num_attention_heads": 12,
|
| 32 |
"num_key_value_heads": null,
|
| 33 |
"max_position_embeddings": 8192,
|
| 34 |
+
"rms_norm_eps": 1e-05,
|
| 35 |
"dropout": 0.0,
|
| 36 |
"rope_theta": 10000.0,
|
| 37 |
"attn_impl": "xformers",
|
|
|
|
| 48 |
"num_attention_heads": 16,
|
| 49 |
"num_key_value_heads": null,
|
| 50 |
"num_hidden_layers": 1,
|
| 51 |
+
"rms_norm_eps": 1e-05,
|
| 52 |
"dropout": 0.0,
|
| 53 |
"max_position_embeddings": 24576,
|
| 54 |
"rope_theta": 500000.0,
|
|
|
|
| 68 |
"num_attention_heads": 16,
|
| 69 |
"num_key_value_heads": null,
|
| 70 |
"num_hidden_layers": 9,
|
| 71 |
+
"rms_norm_eps": 1e-05,
|
| 72 |
"dropout": 0.0,
|
| 73 |
"max_position_embeddings": 24576,
|
| 74 |
"rope_theta": 500000.0,
|
|
|
|
| 84 |
"num_attention_heads": 16,
|
| 85 |
"num_key_value_heads": null,
|
| 86 |
"num_hidden_layers": 25,
|
| 87 |
+
"rms_norm_eps": 1e-05,
|
| 88 |
"dropout": 0.0,
|
| 89 |
"max_position_embeddings": 4096,
|
| 90 |
"rope_theta": 500000.0,
|