daoqm123 commited on
Commit
dcddd4a
·
1 Parent(s): 6a3ab25

Update weights

Browse files
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:341ade02e4366270f99f2cdf3cf684b55821a4893aefb28c03422539fb0cf1dc
3
- size 4915467032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18f6240e255c381876ddb8f3eec068f5e737e2184df8f6f48d0b05ee08ba3d9a
3
+ size 4965798912
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1c26408c3dcf391d20077d954f05cb224deeb3272fb6f0bf016581a1d7928dd
3
- size 3120871248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31bcbcbfe00248bab2b283e021997e56ba5aa06c1e09277506e7af80bdf0eb11
3
+ size 2265183848
model.safetensors.index.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "metadata": {
3
  "total_parameters": 3212774400,
4
- "total_size": 8036308992
5
  },
6
  "weight_map": {
7
  "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
@@ -124,8 +124,8 @@
124
  "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
125
  "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
126
  "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
127
- "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
128
- "model.layers.20.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
129
  "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
130
  "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
131
  "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
 
1
  {
2
  "metadata": {
3
  "total_parameters": 3212774400,
4
+ "total_size": 7230953472
5
  },
6
  "weight_map": {
7
  "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
 
124
  "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
125
  "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
126
  "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
127
+ "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
128
+ "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
129
  "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
130
  "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
131
  "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9d4fd2d4afa82d8a7dadae3490fdc20b26f06e32cec78a8dc96521b4dc79038
3
- size 17210200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
3
+ size 17209920
tokenizer_config.json CHANGED
@@ -2053,18 +2053,11 @@
2053
  "clean_up_tokenization_spaces": true,
2054
  "eos_token": "<|end_of_text|>",
2055
  "extra_special_tokens": {},
2056
- "max_length": 512,
2057
  "model_input_names": [
2058
  "input_ids",
2059
  "attention_mask"
2060
  ],
2061
  "model_max_length": 131072,
2062
- "pad_to_multiple_of": null,
2063
  "pad_token": "<|end_of_text|>",
2064
- "pad_token_type_id": 0,
2065
- "padding_side": "right",
2066
- "stride": 0,
2067
- "tokenizer_class": "PreTrainedTokenizerFast",
2068
- "truncation_side": "right",
2069
- "truncation_strategy": "longest_first"
2070
  }
 
2053
  "clean_up_tokenization_spaces": true,
2054
  "eos_token": "<|end_of_text|>",
2055
  "extra_special_tokens": {},
 
2056
  "model_input_names": [
2057
  "input_ids",
2058
  "attention_mask"
2059
  ],
2060
  "model_max_length": 131072,
 
2061
  "pad_token": "<|end_of_text|>",
2062
+ "tokenizer_class": "PreTrainedTokenizerFast"
 
 
 
 
 
2063
  }
training_metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "epoch": 12,
3
- "train_acc": 0.995978915348876,
4
- "val_acc": 0.9951678746979922,
5
- "val_loss": 0.054994360926616276
6
  }
 
1
  {
2
+ "epoch": 10,
3
+ "train_acc": 0.9972498281142571,
4
+ "val_acc": 0.9814213113388319,
5
+ "val_loss": 0.11411569089561124
6
  }