Upload model

#5
by eustlb HF Staff - opened
Files changed (2) hide show
  1. config.json +9 -1
  2. model.safetensors +2 -2
config.json CHANGED
@@ -112,6 +112,7 @@
112
  "tie_word_embeddings": true,
113
  "tokenizer_class": null
114
  },
 
115
  "head_dim": 128,
116
  "hidden_act": "silu",
117
  "hidden_size": 1024,
@@ -1241,6 +1242,7 @@
1241
  "text_config": {
1242
  "attention_bias": false,
1243
  "attention_dropout": 0.0,
 
1244
  "classifier_activation": "gelu",
1245
  "classifier_bias": false,
1246
  "classifier_dropout": 0.0,
@@ -1248,7 +1250,9 @@
1248
  "cls_token_id": 50281,
1249
  "decoder_bias": true,
1250
  "deterministic_flash_attn": false,
 
1251
  "embedding_dropout": 0.0,
 
1252
  "global_attn_every_n_layers": 3,
1253
  "hidden_activation": "gelu",
1254
  "hidden_size": 1024,
@@ -1288,6 +1292,7 @@
1288
  "norm_eps": 1e-05,
1289
  "num_attention_heads": 16,
1290
  "num_hidden_layers": 22,
 
1291
  "repad_logits_with_grad": false,
1292
  "rope_parameters": {
1293
  "full_attention": {
@@ -1299,9 +1304,12 @@
1299
  "rope_type": "default"
1300
  }
1301
  },
 
1302
  "sparse_pred_ignore_index": -100,
1303
  "sparse_prediction": false,
 
1304
  "vocab_size": 50368
1305
  },
1306
- "transformers_version": "5.0.0.dev0"
 
1307
  }
 
112
  "tie_word_embeddings": true,
113
  "tokenizer_class": null
114
  },
115
+ "dtype": "float32",
116
  "head_dim": 128,
117
  "hidden_act": "silu",
118
  "hidden_size": 1024,
 
1242
  "text_config": {
1243
  "attention_bias": false,
1244
  "attention_dropout": 0.0,
1245
+ "bos_token_id": 50281,
1246
  "classifier_activation": "gelu",
1247
  "classifier_bias": false,
1248
  "classifier_dropout": 0.0,
 
1250
  "cls_token_id": 50281,
1251
  "decoder_bias": true,
1252
  "deterministic_flash_attn": false,
1253
+ "dtype": "float32",
1254
  "embedding_dropout": 0.0,
1255
+ "eos_token_id": 50282,
1256
  "global_attn_every_n_layers": 3,
1257
  "hidden_activation": "gelu",
1258
  "hidden_size": 1024,
 
1292
  "norm_eps": 1e-05,
1293
  "num_attention_heads": 16,
1294
  "num_hidden_layers": 22,
1295
+ "pad_token_id": 50283,
1296
  "repad_logits_with_grad": false,
1297
  "rope_parameters": {
1298
  "full_attention": {
 
1304
  "rope_type": "default"
1305
  }
1306
  },
1307
+ "sep_token_id": 50282,
1308
  "sparse_pred_ignore_index": -100,
1309
  "sparse_prediction": false,
1310
+ "tie_word_embeddings": true,
1311
  "vocab_size": 50368
1312
  },
1313
+ "tie_word_embeddings": true,
1314
+ "transformers_version": "5.0.1.dev0"
1315
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa8046961d09730df10c0c2707565c9610a671643a6a064c42985aa26b2dbcd3
3
- size 9162993936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c87718fb93e00f3b2e5c886b3a6a589ee3bc3d29ea0c1a2e1f6b1e141c19170e
3
+ size 4134930120