Upload model
#5
by
eustlb
HF Staff
- opened
- config.json +9 -1
- model.safetensors +2 -2
config.json
CHANGED
|
@@ -112,6 +112,7 @@
|
|
| 112 |
"tie_word_embeddings": true,
|
| 113 |
"tokenizer_class": null
|
| 114 |
},
|
|
|
|
| 115 |
"head_dim": 128,
|
| 116 |
"hidden_act": "silu",
|
| 117 |
"hidden_size": 768,
|
|
@@ -1241,6 +1242,7 @@
|
|
| 1241 |
"text_config": {
|
| 1242 |
"attention_bias": false,
|
| 1243 |
"attention_dropout": 0.0,
|
|
|
|
| 1244 |
"classifier_activation": "gelu",
|
| 1245 |
"classifier_bias": false,
|
| 1246 |
"classifier_dropout": 0.0,
|
|
@@ -1248,7 +1250,9 @@
|
|
| 1248 |
"cls_token_id": 50281,
|
| 1249 |
"decoder_bias": true,
|
| 1250 |
"deterministic_flash_attn": false,
|
|
|
|
| 1251 |
"embedding_dropout": 0.0,
|
|
|
|
| 1252 |
"global_attn_every_n_layers": 3,
|
| 1253 |
"hidden_activation": "gelu",
|
| 1254 |
"hidden_size": 1024,
|
|
@@ -1288,6 +1292,7 @@
|
|
| 1288 |
"norm_eps": 1e-05,
|
| 1289 |
"num_attention_heads": 16,
|
| 1290 |
"num_hidden_layers": 22,
|
|
|
|
| 1291 |
"repad_logits_with_grad": false,
|
| 1292 |
"rope_parameters": {
|
| 1293 |
"full_attention": {
|
|
@@ -1299,9 +1304,12 @@
|
|
| 1299 |
"rope_type": "default"
|
| 1300 |
}
|
| 1301 |
},
|
|
|
|
| 1302 |
"sparse_pred_ignore_index": -100,
|
| 1303 |
"sparse_prediction": false,
|
|
|
|
| 1304 |
"vocab_size": 50368
|
| 1305 |
},
|
| 1306 |
-
"
|
|
|
|
| 1307 |
}
|
|
|
|
| 112 |
"tie_word_embeddings": true,
|
| 113 |
"tokenizer_class": null
|
| 114 |
},
|
| 115 |
+
"dtype": "float32",
|
| 116 |
"head_dim": 128,
|
| 117 |
"hidden_act": "silu",
|
| 118 |
"hidden_size": 768,
|
|
|
|
| 1242 |
"text_config": {
|
| 1243 |
"attention_bias": false,
|
| 1244 |
"attention_dropout": 0.0,
|
| 1245 |
+
"bos_token_id": 50281,
|
| 1246 |
"classifier_activation": "gelu",
|
| 1247 |
"classifier_bias": false,
|
| 1248 |
"classifier_dropout": 0.0,
|
|
|
|
| 1250 |
"cls_token_id": 50281,
|
| 1251 |
"decoder_bias": true,
|
| 1252 |
"deterministic_flash_attn": false,
|
| 1253 |
+
"dtype": "float32",
|
| 1254 |
"embedding_dropout": 0.0,
|
| 1255 |
+
"eos_token_id": 50282,
|
| 1256 |
"global_attn_every_n_layers": 3,
|
| 1257 |
"hidden_activation": "gelu",
|
| 1258 |
"hidden_size": 1024,
|
|
|
|
| 1292 |
"norm_eps": 1e-05,
|
| 1293 |
"num_attention_heads": 16,
|
| 1294 |
"num_hidden_layers": 22,
|
| 1295 |
+
"pad_token_id": 50283,
|
| 1296 |
"repad_logits_with_grad": false,
|
| 1297 |
"rope_parameters": {
|
| 1298 |
"full_attention": {
|
|
|
|
| 1304 |
"rope_type": "default"
|
| 1305 |
}
|
| 1306 |
},
|
| 1307 |
+
"sep_token_id": 50282,
|
| 1308 |
"sparse_pred_ignore_index": -100,
|
| 1309 |
"sparse_prediction": false,
|
| 1310 |
+
"tie_word_embeddings": true,
|
| 1311 |
"vocab_size": 50368
|
| 1312 |
},
|
| 1313 |
+
"tie_word_embeddings": true,
|
| 1314 |
+
"transformers_version": "5.0.1.dev0"
|
| 1315 |
}
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a8fc31329df9fab7fb3467873d4e8e40df40a486d9e6645feb9de6fa5c7b75d9
|
| 3 |
+
size 3388082648
|