abhishekchohan commited on
Commit
988fc64
·
verified ·
1 Parent(s): 739cfb0

Upload quantized model

Browse files
.ipynb_checkpoints/chat_template-checkpoint.jinja ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- endif %}
6
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
+ {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
+ {%- endif %}
16
+ {%- endif %}
17
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
+ {%- for message in messages[::-1] %}
19
+ {%- set index = (messages|length - 1) - loop.index0 %}
20
+ {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
21
+ {%- set ns.multi_step_tool = false %}
22
+ {%- set ns.last_query_index = index %}
23
+ {%- endif %}
24
+ {%- endfor %}
25
+ {%- for message in messages %}
26
+ {%- if message.content is string %}
27
+ {%- set content = message.content %}
28
+ {%- else %}
29
+ {%- set content = '' %}
30
+ {%- endif %}
31
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
32
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
33
+ {%- elif message.role == "assistant" %}
34
+ {%- set reasoning_content = '' %}
35
+ {%- if message.reasoning_content is string %}
36
+ {%- set reasoning_content = message.reasoning_content %}
37
+ {%- else %}
38
+ {%- if '</think>' in content %}
39
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
40
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
41
+ {%- endif %}
42
+ {%- endif %}
43
+ {%- if loop.index0 > ns.last_query_index %}
44
+ {%- if loop.last or (not loop.last and reasoning_content) %}
45
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
46
+ {%- else %}
47
+ {{- '<|im_start|>' + message.role + '\n' + content }}
48
+ {%- endif %}
49
+ {%- else %}
50
+ {{- '<|im_start|>' + message.role + '\n' + content }}
51
+ {%- endif %}
52
+ {%- if message.tool_calls %}
53
+ {%- for tool_call in message.tool_calls %}
54
+ {%- if (loop.first and content) or (not loop.first) %}
55
+ {{- '\n' }}
56
+ {%- endif %}
57
+ {%- if tool_call.function %}
58
+ {%- set tool_call = tool_call.function %}
59
+ {%- endif %}
60
+ {{- '<tool_call>\n{"name": "' }}
61
+ {{- tool_call.name }}
62
+ {{- '", "arguments": ' }}
63
+ {%- if tool_call.arguments is string %}
64
+ {{- tool_call.arguments }}
65
+ {%- else %}
66
+ {{- tool_call.arguments | tojson }}
67
+ {%- endif %}
68
+ {{- '}\n</tool_call>' }}
69
+ {%- endfor %}
70
+ {%- endif %}
71
+ {{- '<|im_end|>\n' }}
72
+ {%- elif message.role == "tool" %}
73
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
74
+ {{- '<|im_start|>user' }}
75
+ {%- endif %}
76
+ {{- '\n<tool_response>\n' }}
77
+ {{- content }}
78
+ {{- '\n</tool_response>' }}
79
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
80
+ {{- '<|im_end|>\n' }}
81
+ {%- endif %}
82
+ {%- endif %}
83
+ {%- endfor %}
84
+ {%- if add_generation_prompt %}
85
+ {{- '<|im_start|>assistant\n' }}
86
+ {%- endif %}
config.json CHANGED
@@ -31,8 +31,8 @@
31
  "router_aux_loss_coef": 0.001,
32
  "sliding_window": null,
33
  "tie_word_embeddings": false,
34
- "transformers_version": "4.56.2",
35
  "use_cache": true,
36
  "use_sliding_window": false,
37
- "vocab_size": 151669
38
  }
 
31
  "router_aux_loss_coef": 0.001,
32
  "sliding_window": null,
33
  "tie_word_embeddings": false,
34
+ "transformers_version": "4.57.1",
35
  "use_cache": true,
36
  "use_sliding_window": false,
37
+ "vocab_size": 151936
38
  }
generation_config.json CHANGED
@@ -1,6 +1,13 @@
1
  {
2
- "_from_model_config": true,
3
  "bos_token_id": 151643,
4
- "eos_token_id": 151645,
5
- "transformers_version": "4.56.2"
 
 
 
 
 
 
 
 
6
  }
 
1
  {
 
2
  "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "temperature": 0.6,
10
+ "top_k": 20,
11
+ "top_p": 0.95,
12
+ "transformers_version": "4.57.1"
13
  }
model-00001-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc92b35f7a3667e08ea0fbf0a3eab49123444f4ef7fe281f624122d6fd2ebbac
3
- size 4999237192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4aa8450ca63391bad06265b0ae7b718178856f166f999f65978879d872da42ad
3
+ size 4997184968
model-00002-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30516b55cd3973b443dc986662443b8d9b3482820c3561d1e3248878f7a0a3dd
3
  size 4997741608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:007e9ee0cd568cd37b0f13116d0e9bad865ca7fcfb834257b39b2465a1de92a9
3
  size 4997741608
model-00003-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87e1ba18eaf3e8694651929a2562f67971cc1c604ff7934096e5f27d55b65b1a
3
  size 4997742208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2b304de65e62ef7592fbce88874b435ffc811d3e2c235eed50cf2465369cdb5
3
  size 4997742208
model-00004-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28111c7817b043fee76e7c892b3753b73a7b761c867e231aec3cd4d44376e03c
3
  size 4997743184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92966cc36e1ea040feee69dd6dd7a5c0c57c7d94d4156b0fd7c6a00a23719900
3
  size 4997743184
model-00005-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b158f2c03abcb2dfbddc3b1866a4f3d6716eec156e093d4cee7cfefa5cd9c9f
3
  size 4997743184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f3d283ed68d7ea71f92a51f8acff58d066d787e5cb8b6a1708b4d6a338cc795
3
  size 4997743184
model-00006-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f5d22b23d121b21a76c67c1b6e71911beaa78eeac8257a6f63f559a40efffae
3
  size 4997743184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd2c86ce5b45b1aa706a8ba4d51efeb828ae5d87b73a6e3a17a6e5f54d969b12
3
  size 4997743184
model-00007-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:483d33971cb409ac9b314adbaa773bdbf19290cc83b845d2129fce3d63ffb9f2
3
  size 4997743184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5402756b508dcc73e63aa3815e0a28d578d910e62ba567f8f6f9827cb13c7aaa
3
  size 4997743184
model-00008-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a52ee08cb9940bbb3820cddc72d3777dc1b38ffdd4db8d3dfbb62befb7808508
3
  size 4997743184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54cdd133e5fee2c6a557405e69def75912e9536e87b26b77e1e6a4c9355ce3e7
3
  size 4997743184
model-00009-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7f535dd75ef4962ed75074c321f94ef220ac93ecf3fa14126c2cc3cf7247950
3
  size 4997743184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdbf105a0227a2569447761ed4db403080db9947902207fc06f9bd61fb76329d
3
  size 4997743184
model-00010-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e56b4f2a2c3b26336ecea5a27fe0a30052e0160a5b9678bfe68cd512e83794d9
3
  size 4997743184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:249cbce6282e957555c518610ae50b4ee98ddf5c2ea8ccfd693ac1c5b0807e0e
3
  size 4997743184
model-00011-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd490511abf8ce0e534dd6f2d8898e10f9d48cfc605da66916ba773b182299b2
3
  size 4997743184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8768da1bae8b6f81b0639ea987b128c4269e10dcfc1a58164484f8f4c0ceda0b
3
  size 4997743184
model-00012-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0b9e6f415a2c5947db4ffb9e5aa0647ccad4f36db58915c63e8a0dfe4d884ff
3
  size 4997743184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c47af572facc5db4caa396b23029f13a8fdc5d30a92f65c1b7e3053938a5e6f
3
  size 4997743184
model-00013-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e7faf6c925aa9b508fb9dfe346abeb6b40354e13243abca495ecea9fa45a620
3
- size 1089980800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5c8479bed6cc760a2fe88c927ba37766212844ebb74ab09ef0beeab682b7289
3
+ size 1094220288
model.safetensors.index.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "metadata": {
3
- "total_parameters": 30531028992,
4
- "total_size": 61062057984
5
  },
6
  "weight_map": {
7
  "lm_head.weight": "model-00013-of-00013.safetensors",
@@ -1460,7 +1460,7 @@
1460
  "model.layers.11.mlp.experts.65.gate_proj.weight": "model-00003-of-00013.safetensors",
1461
  "model.layers.11.mlp.experts.65.up_proj.weight": "model-00003-of-00013.safetensors",
1462
  "model.layers.11.mlp.experts.66.down_proj.weight": "model-00004-of-00013.safetensors",
1463
- "model.layers.11.mlp.experts.66.gate_proj.weight": "model-00003-of-00013.safetensors",
1464
  "model.layers.11.mlp.experts.66.up_proj.weight": "model-00004-of-00013.safetensors",
1465
  "model.layers.11.mlp.experts.67.down_proj.weight": "model-00004-of-00013.safetensors",
1466
  "model.layers.11.mlp.experts.67.gate_proj.weight": "model-00004-of-00013.safetensors",
@@ -3036,7 +3036,7 @@
3036
  "model.layers.15.mlp.experts.66.up_proj.weight": "model-00004-of-00013.safetensors",
3037
  "model.layers.15.mlp.experts.67.down_proj.weight": "model-00005-of-00013.safetensors",
3038
  "model.layers.15.mlp.experts.67.gate_proj.weight": "model-00004-of-00013.safetensors",
3039
- "model.layers.15.mlp.experts.67.up_proj.weight": "model-00004-of-00013.safetensors",
3040
  "model.layers.15.mlp.experts.68.down_proj.weight": "model-00005-of-00013.safetensors",
3041
  "model.layers.15.mlp.experts.68.gate_proj.weight": "model-00005-of-00013.safetensors",
3042
  "model.layers.15.mlp.experts.68.up_proj.weight": "model-00005-of-00013.safetensors",
@@ -4609,7 +4609,7 @@
4609
  "model.layers.19.mlp.experts.67.down_proj.weight": "model-00005-of-00013.safetensors",
4610
  "model.layers.19.mlp.experts.67.gate_proj.weight": "model-00005-of-00013.safetensors",
4611
  "model.layers.19.mlp.experts.67.up_proj.weight": "model-00005-of-00013.safetensors",
4612
- "model.layers.19.mlp.experts.68.down_proj.weight": "model-00005-of-00013.safetensors",
4613
  "model.layers.19.mlp.experts.68.gate_proj.weight": "model-00005-of-00013.safetensors",
4614
  "model.layers.19.mlp.experts.68.up_proj.weight": "model-00005-of-00013.safetensors",
4615
  "model.layers.19.mlp.experts.69.down_proj.weight": "model-00006-of-00013.safetensors",
@@ -6584,7 +6584,7 @@
6584
  "model.layers.23.mlp.experts.7.gate_proj.weight": "model-00006-of-00013.safetensors",
6585
  "model.layers.23.mlp.experts.7.up_proj.weight": "model-00006-of-00013.safetensors",
6586
  "model.layers.23.mlp.experts.70.down_proj.weight": "model-00007-of-00013.safetensors",
6587
- "model.layers.23.mlp.experts.70.gate_proj.weight": "model-00006-of-00013.safetensors",
6588
  "model.layers.23.mlp.experts.70.up_proj.weight": "model-00007-of-00013.safetensors",
6589
  "model.layers.23.mlp.experts.71.down_proj.weight": "model-00007-of-00013.safetensors",
6590
  "model.layers.23.mlp.experts.71.gate_proj.weight": "model-00007-of-00013.safetensors",
@@ -8160,7 +8160,7 @@
8160
  "model.layers.27.mlp.experts.70.up_proj.weight": "model-00007-of-00013.safetensors",
8161
  "model.layers.27.mlp.experts.71.down_proj.weight": "model-00008-of-00013.safetensors",
8162
  "model.layers.27.mlp.experts.71.gate_proj.weight": "model-00007-of-00013.safetensors",
8163
- "model.layers.27.mlp.experts.71.up_proj.weight": "model-00007-of-00013.safetensors",
8164
  "model.layers.27.mlp.experts.72.down_proj.weight": "model-00008-of-00013.safetensors",
8165
  "model.layers.27.mlp.experts.72.gate_proj.weight": "model-00008-of-00013.safetensors",
8166
  "model.layers.27.mlp.experts.72.up_proj.weight": "model-00008-of-00013.safetensors",
@@ -9312,7 +9312,7 @@
9312
  "model.layers.3.mlp.experts.62.up_proj.weight": "model-00001-of-00013.safetensors",
9313
  "model.layers.3.mlp.experts.63.down_proj.weight": "model-00002-of-00013.safetensors",
9314
  "model.layers.3.mlp.experts.63.gate_proj.weight": "model-00001-of-00013.safetensors",
9315
- "model.layers.3.mlp.experts.63.up_proj.weight": "model-00001-of-00013.safetensors",
9316
  "model.layers.3.mlp.experts.64.down_proj.weight": "model-00002-of-00013.safetensors",
9317
  "model.layers.3.mlp.experts.64.gate_proj.weight": "model-00002-of-00013.safetensors",
9318
  "model.layers.3.mlp.experts.64.up_proj.weight": "model-00002-of-00013.safetensors",
@@ -10126,7 +10126,7 @@
10126
  "model.layers.31.mlp.experts.71.down_proj.weight": "model-00008-of-00013.safetensors",
10127
  "model.layers.31.mlp.experts.71.gate_proj.weight": "model-00008-of-00013.safetensors",
10128
  "model.layers.31.mlp.experts.71.up_proj.weight": "model-00008-of-00013.safetensors",
10129
- "model.layers.31.mlp.experts.72.down_proj.weight": "model-00008-of-00013.safetensors",
10130
  "model.layers.31.mlp.experts.72.gate_proj.weight": "model-00008-of-00013.safetensors",
10131
  "model.layers.31.mlp.experts.72.up_proj.weight": "model-00008-of-00013.safetensors",
10132
  "model.layers.31.mlp.experts.73.down_proj.weight": "model-00009-of-00013.safetensors",
@@ -11705,7 +11705,7 @@
11705
  "model.layers.35.mlp.experts.73.gate_proj.weight": "model-00009-of-00013.safetensors",
11706
  "model.layers.35.mlp.experts.73.up_proj.weight": "model-00009-of-00013.safetensors",
11707
  "model.layers.35.mlp.experts.74.down_proj.weight": "model-00010-of-00013.safetensors",
11708
- "model.layers.35.mlp.experts.74.gate_proj.weight": "model-00009-of-00013.safetensors",
11709
  "model.layers.35.mlp.experts.74.up_proj.weight": "model-00010-of-00013.safetensors",
11710
  "model.layers.35.mlp.experts.75.down_proj.weight": "model-00010-of-00013.safetensors",
11711
  "model.layers.35.mlp.experts.75.gate_proj.weight": "model-00010-of-00013.safetensors",
@@ -13281,7 +13281,7 @@
13281
  "model.layers.39.mlp.experts.74.up_proj.weight": "model-00010-of-00013.safetensors",
13282
  "model.layers.39.mlp.experts.75.down_proj.weight": "model-00011-of-00013.safetensors",
13283
  "model.layers.39.mlp.experts.75.gate_proj.weight": "model-00010-of-00013.safetensors",
13284
- "model.layers.39.mlp.experts.75.up_proj.weight": "model-00010-of-00013.safetensors",
13285
  "model.layers.39.mlp.experts.76.down_proj.weight": "model-00011-of-00013.safetensors",
13286
  "model.layers.39.mlp.experts.76.gate_proj.weight": "model-00011-of-00013.safetensors",
13287
  "model.layers.39.mlp.experts.76.up_proj.weight": "model-00011-of-00013.safetensors",
@@ -15247,7 +15247,7 @@
15247
  "model.layers.43.mlp.experts.75.down_proj.weight": "model-00011-of-00013.safetensors",
15248
  "model.layers.43.mlp.experts.75.gate_proj.weight": "model-00011-of-00013.safetensors",
15249
  "model.layers.43.mlp.experts.75.up_proj.weight": "model-00011-of-00013.safetensors",
15250
- "model.layers.43.mlp.experts.76.down_proj.weight": "model-00011-of-00013.safetensors",
15251
  "model.layers.43.mlp.experts.76.gate_proj.weight": "model-00011-of-00013.safetensors",
15252
  "model.layers.43.mlp.experts.76.up_proj.weight": "model-00011-of-00013.safetensors",
15253
  "model.layers.43.mlp.experts.77.down_proj.weight": "model-00012-of-00013.safetensors",
@@ -16826,7 +16826,7 @@
16826
  "model.layers.47.mlp.experts.77.gate_proj.weight": "model-00012-of-00013.safetensors",
16827
  "model.layers.47.mlp.experts.77.up_proj.weight": "model-00012-of-00013.safetensors",
16828
  "model.layers.47.mlp.experts.78.down_proj.weight": "model-00013-of-00013.safetensors",
16829
- "model.layers.47.mlp.experts.78.gate_proj.weight": "model-00012-of-00013.safetensors",
16830
  "model.layers.47.mlp.experts.78.up_proj.weight": "model-00013-of-00013.safetensors",
16831
  "model.layers.47.mlp.experts.79.down_proj.weight": "model-00013-of-00013.safetensors",
16832
  "model.layers.47.mlp.experts.79.gate_proj.weight": "model-00013-of-00013.safetensors",
@@ -17959,7 +17959,7 @@
17959
  "model.layers.7.mlp.experts.63.down_proj.weight": "model-00002-of-00013.safetensors",
17960
  "model.layers.7.mlp.experts.63.gate_proj.weight": "model-00002-of-00013.safetensors",
17961
  "model.layers.7.mlp.experts.63.up_proj.weight": "model-00002-of-00013.safetensors",
17962
- "model.layers.7.mlp.experts.64.down_proj.weight": "model-00002-of-00013.safetensors",
17963
  "model.layers.7.mlp.experts.64.gate_proj.weight": "model-00002-of-00013.safetensors",
17964
  "model.layers.7.mlp.experts.64.up_proj.weight": "model-00002-of-00013.safetensors",
17965
  "model.layers.7.mlp.experts.65.down_proj.weight": "model-00003-of-00013.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_parameters": 30532122624,
4
+ "total_size": 61064245248
5
  },
6
  "weight_map": {
7
  "lm_head.weight": "model-00013-of-00013.safetensors",
 
1460
  "model.layers.11.mlp.experts.65.gate_proj.weight": "model-00003-of-00013.safetensors",
1461
  "model.layers.11.mlp.experts.65.up_proj.weight": "model-00003-of-00013.safetensors",
1462
  "model.layers.11.mlp.experts.66.down_proj.weight": "model-00004-of-00013.safetensors",
1463
+ "model.layers.11.mlp.experts.66.gate_proj.weight": "model-00004-of-00013.safetensors",
1464
  "model.layers.11.mlp.experts.66.up_proj.weight": "model-00004-of-00013.safetensors",
1465
  "model.layers.11.mlp.experts.67.down_proj.weight": "model-00004-of-00013.safetensors",
1466
  "model.layers.11.mlp.experts.67.gate_proj.weight": "model-00004-of-00013.safetensors",
 
3036
  "model.layers.15.mlp.experts.66.up_proj.weight": "model-00004-of-00013.safetensors",
3037
  "model.layers.15.mlp.experts.67.down_proj.weight": "model-00005-of-00013.safetensors",
3038
  "model.layers.15.mlp.experts.67.gate_proj.weight": "model-00004-of-00013.safetensors",
3039
+ "model.layers.15.mlp.experts.67.up_proj.weight": "model-00005-of-00013.safetensors",
3040
  "model.layers.15.mlp.experts.68.down_proj.weight": "model-00005-of-00013.safetensors",
3041
  "model.layers.15.mlp.experts.68.gate_proj.weight": "model-00005-of-00013.safetensors",
3042
  "model.layers.15.mlp.experts.68.up_proj.weight": "model-00005-of-00013.safetensors",
 
4609
  "model.layers.19.mlp.experts.67.down_proj.weight": "model-00005-of-00013.safetensors",
4610
  "model.layers.19.mlp.experts.67.gate_proj.weight": "model-00005-of-00013.safetensors",
4611
  "model.layers.19.mlp.experts.67.up_proj.weight": "model-00005-of-00013.safetensors",
4612
+ "model.layers.19.mlp.experts.68.down_proj.weight": "model-00006-of-00013.safetensors",
4613
  "model.layers.19.mlp.experts.68.gate_proj.weight": "model-00005-of-00013.safetensors",
4614
  "model.layers.19.mlp.experts.68.up_proj.weight": "model-00005-of-00013.safetensors",
4615
  "model.layers.19.mlp.experts.69.down_proj.weight": "model-00006-of-00013.safetensors",
 
6584
  "model.layers.23.mlp.experts.7.gate_proj.weight": "model-00006-of-00013.safetensors",
6585
  "model.layers.23.mlp.experts.7.up_proj.weight": "model-00006-of-00013.safetensors",
6586
  "model.layers.23.mlp.experts.70.down_proj.weight": "model-00007-of-00013.safetensors",
6587
+ "model.layers.23.mlp.experts.70.gate_proj.weight": "model-00007-of-00013.safetensors",
6588
  "model.layers.23.mlp.experts.70.up_proj.weight": "model-00007-of-00013.safetensors",
6589
  "model.layers.23.mlp.experts.71.down_proj.weight": "model-00007-of-00013.safetensors",
6590
  "model.layers.23.mlp.experts.71.gate_proj.weight": "model-00007-of-00013.safetensors",
 
8160
  "model.layers.27.mlp.experts.70.up_proj.weight": "model-00007-of-00013.safetensors",
8161
  "model.layers.27.mlp.experts.71.down_proj.weight": "model-00008-of-00013.safetensors",
8162
  "model.layers.27.mlp.experts.71.gate_proj.weight": "model-00007-of-00013.safetensors",
8163
+ "model.layers.27.mlp.experts.71.up_proj.weight": "model-00008-of-00013.safetensors",
8164
  "model.layers.27.mlp.experts.72.down_proj.weight": "model-00008-of-00013.safetensors",
8165
  "model.layers.27.mlp.experts.72.gate_proj.weight": "model-00008-of-00013.safetensors",
8166
  "model.layers.27.mlp.experts.72.up_proj.weight": "model-00008-of-00013.safetensors",
 
9312
  "model.layers.3.mlp.experts.62.up_proj.weight": "model-00001-of-00013.safetensors",
9313
  "model.layers.3.mlp.experts.63.down_proj.weight": "model-00002-of-00013.safetensors",
9314
  "model.layers.3.mlp.experts.63.gate_proj.weight": "model-00001-of-00013.safetensors",
9315
+ "model.layers.3.mlp.experts.63.up_proj.weight": "model-00002-of-00013.safetensors",
9316
  "model.layers.3.mlp.experts.64.down_proj.weight": "model-00002-of-00013.safetensors",
9317
  "model.layers.3.mlp.experts.64.gate_proj.weight": "model-00002-of-00013.safetensors",
9318
  "model.layers.3.mlp.experts.64.up_proj.weight": "model-00002-of-00013.safetensors",
 
10126
  "model.layers.31.mlp.experts.71.down_proj.weight": "model-00008-of-00013.safetensors",
10127
  "model.layers.31.mlp.experts.71.gate_proj.weight": "model-00008-of-00013.safetensors",
10128
  "model.layers.31.mlp.experts.71.up_proj.weight": "model-00008-of-00013.safetensors",
10129
+ "model.layers.31.mlp.experts.72.down_proj.weight": "model-00009-of-00013.safetensors",
10130
  "model.layers.31.mlp.experts.72.gate_proj.weight": "model-00008-of-00013.safetensors",
10131
  "model.layers.31.mlp.experts.72.up_proj.weight": "model-00008-of-00013.safetensors",
10132
  "model.layers.31.mlp.experts.73.down_proj.weight": "model-00009-of-00013.safetensors",
 
11705
  "model.layers.35.mlp.experts.73.gate_proj.weight": "model-00009-of-00013.safetensors",
11706
  "model.layers.35.mlp.experts.73.up_proj.weight": "model-00009-of-00013.safetensors",
11707
  "model.layers.35.mlp.experts.74.down_proj.weight": "model-00010-of-00013.safetensors",
11708
+ "model.layers.35.mlp.experts.74.gate_proj.weight": "model-00010-of-00013.safetensors",
11709
  "model.layers.35.mlp.experts.74.up_proj.weight": "model-00010-of-00013.safetensors",
11710
  "model.layers.35.mlp.experts.75.down_proj.weight": "model-00010-of-00013.safetensors",
11711
  "model.layers.35.mlp.experts.75.gate_proj.weight": "model-00010-of-00013.safetensors",
 
13281
  "model.layers.39.mlp.experts.74.up_proj.weight": "model-00010-of-00013.safetensors",
13282
  "model.layers.39.mlp.experts.75.down_proj.weight": "model-00011-of-00013.safetensors",
13283
  "model.layers.39.mlp.experts.75.gate_proj.weight": "model-00010-of-00013.safetensors",
13284
+ "model.layers.39.mlp.experts.75.up_proj.weight": "model-00011-of-00013.safetensors",
13285
  "model.layers.39.mlp.experts.76.down_proj.weight": "model-00011-of-00013.safetensors",
13286
  "model.layers.39.mlp.experts.76.gate_proj.weight": "model-00011-of-00013.safetensors",
13287
  "model.layers.39.mlp.experts.76.up_proj.weight": "model-00011-of-00013.safetensors",
 
15247
  "model.layers.43.mlp.experts.75.down_proj.weight": "model-00011-of-00013.safetensors",
15248
  "model.layers.43.mlp.experts.75.gate_proj.weight": "model-00011-of-00013.safetensors",
15249
  "model.layers.43.mlp.experts.75.up_proj.weight": "model-00011-of-00013.safetensors",
15250
+ "model.layers.43.mlp.experts.76.down_proj.weight": "model-00012-of-00013.safetensors",
15251
  "model.layers.43.mlp.experts.76.gate_proj.weight": "model-00011-of-00013.safetensors",
15252
  "model.layers.43.mlp.experts.76.up_proj.weight": "model-00011-of-00013.safetensors",
15253
  "model.layers.43.mlp.experts.77.down_proj.weight": "model-00012-of-00013.safetensors",
 
16826
  "model.layers.47.mlp.experts.77.gate_proj.weight": "model-00012-of-00013.safetensors",
16827
  "model.layers.47.mlp.experts.77.up_proj.weight": "model-00012-of-00013.safetensors",
16828
  "model.layers.47.mlp.experts.78.down_proj.weight": "model-00013-of-00013.safetensors",
16829
+ "model.layers.47.mlp.experts.78.gate_proj.weight": "model-00013-of-00013.safetensors",
16830
  "model.layers.47.mlp.experts.78.up_proj.weight": "model-00013-of-00013.safetensors",
16831
  "model.layers.47.mlp.experts.79.down_proj.weight": "model-00013-of-00013.safetensors",
16832
  "model.layers.47.mlp.experts.79.gate_proj.weight": "model-00013-of-00013.safetensors",
 
17959
  "model.layers.7.mlp.experts.63.down_proj.weight": "model-00002-of-00013.safetensors",
17960
  "model.layers.7.mlp.experts.63.gate_proj.weight": "model-00002-of-00013.safetensors",
17961
  "model.layers.7.mlp.experts.63.up_proj.weight": "model-00002-of-00013.safetensors",
17962
+ "model.layers.7.mlp.experts.64.down_proj.weight": "model-00003-of-00013.safetensors",
17963
  "model.layers.7.mlp.experts.64.gate_proj.weight": "model-00002-of-00013.safetensors",
17964
  "model.layers.7.mlp.experts.64.up_proj.weight": "model-00002-of-00013.safetensors",
17965
  "model.layers.7.mlp.experts.65.down_proj.weight": "model-00003-of-00013.safetensors",
special_tokens_map.json CHANGED
@@ -21,5 +21,11 @@
21
  "rstrip": false,
22
  "single_word": false
23
  },
24
- "pad_token": "<|im_end|>"
 
 
 
 
 
 
25
  }
 
21
  "rstrip": false,
22
  "single_word": false
23
  },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
  }
tokenizer_config.json CHANGED
@@ -232,7 +232,7 @@
232
  "errors": "replace",
233
  "extra_special_tokens": {},
234
  "model_max_length": 1010000,
235
- "pad_token": "<|im_end|>",
236
  "split_special_tokens": false,
237
  "tokenizer_class": "Qwen2Tokenizer",
238
  "unk_token": null
 
232
  "errors": "replace",
233
  "extra_special_tokens": {},
234
  "model_max_length": 1010000,
235
+ "pad_token": "<|endoftext|>",
236
  "split_special_tokens": false,
237
  "tokenizer_class": "Qwen2Tokenizer",
238
  "unk_token": null