JunHowie commited on
Commit
531f8ee
·
verified ·
1 Parent(s): 6c1dbab

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
chat_template.jinja ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0]['role'] == 'system' %}
4
+ {{- messages[0]['content'] }}
5
+ {%- else %}
6
+ {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
7
+ {%- endif %}
8
+ {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
9
+ {%- for tool in tools %}
10
+ {{- "\n" }}
11
+ {{- tool | tojson }}
12
+ {%- endfor %}
13
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
14
+ {%- else %}
15
+ {%- if messages[0]['role'] == 'system' %}
16
+ {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
17
+ {%- else %}
18
+ {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
19
+ {%- endif %}
20
+ {%- endif %}
21
+ {%- for message in messages %}
22
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
23
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
24
+ {%- elif message.role == "assistant" %}
25
+ {{- '<|im_start|>' + message.role }}
26
+ {%- if message.content %}
27
+ {{- '\n' + message.content }}
28
+ {%- endif %}
29
+ {%- for tool_call in message.tool_calls %}
30
+ {%- if tool_call.function is defined %}
31
+ {%- set tool_call = tool_call.function %}
32
+ {%- endif %}
33
+ {{- '\n<tool_call>\n{"name": "' }}
34
+ {{- tool_call.name }}
35
+ {{- '", "arguments": ' }}
36
+ {{- tool_call.arguments | tojson }}
37
+ {{- '}\n</tool_call>' }}
38
+ {%- endfor %}
39
+ {{- '<|im_end|>\n' }}
40
+ {%- elif message.role == "tool" %}
41
+ {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
42
+ {{- '<|im_start|>user' }}
43
+ {%- endif %}
44
+ {{- '\n<tool_response>\n' }}
45
+ {{- message.content }}
46
+ {{- '\n</tool_response>' }}
47
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
48
+ {{- '<|im_end|>\n' }}
49
+ {%- endif %}
50
+ {%- endif %}
51
+ {%- endfor %}
52
+ {%- if add_generation_prompt %}
53
+ {{- '<|im_start|>assistant\n' }}
54
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2ForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "dtype": "bfloat16",
8
+ "eos_token_id": 151645,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 8192,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 29568,
13
+ "layer_types": [
14
+ "full_attention",
15
+ "full_attention",
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention",
60
+ "full_attention",
61
+ "full_attention",
62
+ "full_attention",
63
+ "full_attention",
64
+ "full_attention",
65
+ "full_attention",
66
+ "full_attention",
67
+ "full_attention",
68
+ "full_attention",
69
+ "full_attention",
70
+ "full_attention",
71
+ "full_attention",
72
+ "full_attention",
73
+ "full_attention",
74
+ "full_attention",
75
+ "full_attention",
76
+ "full_attention",
77
+ "full_attention",
78
+ "full_attention",
79
+ "full_attention",
80
+ "full_attention",
81
+ "full_attention",
82
+ "full_attention",
83
+ "full_attention",
84
+ "full_attention",
85
+ "full_attention",
86
+ "full_attention",
87
+ "full_attention",
88
+ "full_attention",
89
+ "full_attention",
90
+ "full_attention",
91
+ "full_attention",
92
+ "full_attention",
93
+ "full_attention"
94
+ ],
95
+ "max_position_embeddings": 131072,
96
+ "max_window_layers": 70,
97
+ "model_type": "qwen2",
98
+ "num_attention_heads": 64,
99
+ "num_hidden_layers": 80,
100
+ "num_key_value_heads": 8,
101
+ "quantization_config": {
102
+ "bits": 4,
103
+ "checkpoint_format": "gptq",
104
+ "desc_act": false,
105
+ "group_size": 128,
106
+ "lm_head": false,
107
+ "meta": {
108
+ "act_group_aware": false,
109
+ "damp_auto_increment": 0.01,
110
+ "damp_percent": 0.05,
111
+ "mse": 0.0,
112
+ "quantizer": [
113
+ "gptqmodel:4.2.5"
114
+ ],
115
+ "static_groups": false,
116
+ "true_sequential": true,
117
+ "uri": "https://github.com/modelcloud/gptqmodel",
118
+ "v2": false,
119
+ "v2_alpha": 0.25
120
+ },
121
+ "pack_dtype": "int32",
122
+ "quant_method": "gptq",
123
+ "sym": true
124
+ },
125
+ "rms_norm_eps": 1e-06,
126
+ "rope_scaling": null,
127
+ "rope_theta": 1000000.0,
128
+ "sliding_window": null,
129
+ "tie_word_embeddings": false,
130
+ "transformers_version": "4.56.2",
131
+ "use_cache": true,
132
+ "use_sliding_window": false,
133
+ "vocab_size": 152064
134
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 151643,
4
+ "eos_token_id": 151645,
5
+ "transformers_version": "4.56.2"
6
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d5968bb7a6ce5809cb857b31682106e36df69da6745c10a57c5da03293066d6
3
+ size 3939044856
model-00002-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8c45f37e7738e7afcf04848f3c51feabb2551a6e977f54f49d511a8c09424dd
3
+ size 3902490424
model-00003-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0658f2c5f3e989c392641adee14cae37ce0c215b9b120842d0cac44b006d88e3
3
+ size 3981209464
model-00004-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcba10a30ca1c3040a184436dfe87634b5e150dd0d25b9462363f1e8d3762b5f
3
+ size 3981209464
model-00005-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0d9ed80548ab2af9e9cba02243dfa4782973ccf918be1ac2825d153cc97a49e
3
+ size 3902490648
model-00006-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c29119a6e44e6faa0deacdc58d1f14120ecb1ab594b646fe0f67b28c93e6f071
3
+ size 3981209464
model-00007-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ed5b1c27aa549ae980c0bc1767ae221e5fca6e6507ab50e5ace5d337fdded8c
3
+ size 3981209464
model-00008-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49e397e2d96b2121aa3263125c1245726724f9f0180fd40240ce1d41f1896066
3
+ size 3902490648
model-00009-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c23307622fc05f68c5dd8345b82f0b4ba6d8382c563abe3af6ad242750b52b87
3
+ size 3981209464
model-00010-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:684426d08e6af40f0a0b4c468f36c0e1d03de954e65bdfb9752eac80f962d3c5
3
+ size 3446282952
model-00011-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:293c829ec0dd41d9928275f93d39df98035dccb58c0adc046fbac7a26d72ec83
3
+ size 2491416704
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
quant_log.csv ADDED
@@ -0,0 +1,561 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ layer,module,loss,samples,damp,time
2
+ 0,self_attn.k_proj,0.0000000685,0.05000,1.952
3
+ 0,self_attn.v_proj,0.0000000051,0.05000,1.721
4
+ 0,self_attn.q_proj,0.0000003521,0.05000,1.779
5
+ 0,self_attn.o_proj,0.0000000051,0.05000,1.790
6
+ 0,mlp.gate_proj,0.0000002079,0.05000,2.237
7
+ 0,mlp.up_proj,0.0000001977,0.05000,1.960
8
+ 0,mlp.down_proj,0.0000000034,0.05000,10.567
9
+ 1,self_attn.k_proj,0.0000000373,0.05000,1.738
10
+ 1,self_attn.v_proj,0.0000000025,0.05000,1.740
11
+ 1,self_attn.q_proj,0.0000001444,0.05000,1.788
12
+ 1,self_attn.o_proj,0.0000000007,0.05000,1.782
13
+ 1,mlp.gate_proj,0.0000006684,0.05000,1.988
14
+ 1,mlp.up_proj,0.0000006192,0.05000,1.969
15
+ 1,mlp.down_proj,0.0002015895,0.05000,10.579
16
+ 2,self_attn.k_proj,0.0000020729,0.05000,1.724
17
+ 2,self_attn.v_proj,0.0000007911,0.05000,1.714
18
+ 2,self_attn.q_proj,0.0000070333,0.05000,1.775
19
+ 2,self_attn.o_proj,0.0000000117,0.05000,1.795
20
+ 2,mlp.gate_proj,0.0000012298,0.05000,1.968
21
+ 2,mlp.up_proj,0.0000011734,0.05000,1.972
22
+ 2,mlp.down_proj,0.0000000298,0.05000,10.620
23
+ 3,self_attn.k_proj,0.0000015801,0.05000,1.755
24
+ 3,self_attn.v_proj,0.0000006056,0.05000,1.764
25
+ 3,self_attn.q_proj,0.0000047879,0.05000,1.813
26
+ 3,self_attn.o_proj,0.0000000308,0.05000,1.813
27
+ 3,mlp.gate_proj,0.0000019761,0.05000,1.979
28
+ 3,mlp.up_proj,0.0000019347,0.05000,1.974
29
+ 3,mlp.down_proj,0.0000000406,0.05000,10.661
30
+ 4,self_attn.k_proj,0.0000022731,0.05000,1.872
31
+ 4,self_attn.v_proj,0.0000010830,0.05000,1.864
32
+ 4,self_attn.q_proj,0.0000093928,0.05000,1.906
33
+ 4,self_attn.o_proj,0.0000000120,0.05000,1.927
34
+ 4,mlp.gate_proj,0.0000038092,0.05000,2.084
35
+ 4,mlp.up_proj,0.0000037036,0.05000,2.076
36
+ 4,mlp.down_proj,0.0000000750,0.05000,10.953
37
+ 5,self_attn.k_proj,0.0000031021,0.05000,1.733
38
+ 5,self_attn.v_proj,0.0000017217,0.05000,1.725
39
+ 5,self_attn.q_proj,0.0000123386,0.05000,1.788
40
+ 5,self_attn.o_proj,0.0000000084,0.05000,1.795
41
+ 5,mlp.gate_proj,0.0000062782,0.05000,1.972
42
+ 5,mlp.up_proj,0.0000060347,0.05000,1.947
43
+ 5,mlp.down_proj,0.0000001456,0.05000,10.600
44
+ 6,self_attn.k_proj,0.0000020967,0.05000,1.728
45
+ 6,self_attn.v_proj,0.0000010817,0.05000,1.713
46
+ 6,self_attn.q_proj,0.0000080667,0.05000,1.803
47
+ 6,self_attn.o_proj,0.0000000132,0.05000,1.806
48
+ 6,mlp.gate_proj,0.0000083323,0.05000,1.982
49
+ 6,mlp.up_proj,0.0000080155,0.05000,1.969
50
+ 6,mlp.down_proj,0.0000002012,0.05000,10.563
51
+ 7,self_attn.k_proj,0.0000029421,0.05000,1.741
52
+ 7,self_attn.v_proj,0.0000014289,0.05000,1.724
53
+ 7,self_attn.q_proj,0.0000117084,0.05000,1.821
54
+ 7,self_attn.o_proj,0.0000000213,0.05000,1.799
55
+ 7,mlp.gate_proj,0.0000104807,0.05000,1.985
56
+ 7,mlp.up_proj,0.0000101542,0.05000,1.986
57
+ 7,mlp.down_proj,0.0000002535,0.05000,10.585
58
+ 8,self_attn.k_proj,0.0000027552,0.05000,1.744
59
+ 8,self_attn.v_proj,0.0000014088,0.05000,1.738
60
+ 8,self_attn.q_proj,0.0000106035,0.05000,1.794
61
+ 8,self_attn.o_proj,0.0000000926,0.05000,1.805
62
+ 8,mlp.gate_proj,0.0000121578,0.05000,1.984
63
+ 8,mlp.up_proj,0.0000115947,0.05000,1.978
64
+ 8,mlp.down_proj,0.0000003185,0.05000,10.654
65
+ 9,self_attn.k_proj,0.0000073975,0.05000,1.814
66
+ 9,self_attn.v_proj,0.0000041334,0.05000,1.730
67
+ 9,self_attn.q_proj,0.0000333733,0.05000,1.799
68
+ 9,self_attn.o_proj,0.0000001372,0.05000,1.803
69
+ 9,mlp.gate_proj,0.0000130155,0.05000,1.955
70
+ 9,mlp.up_proj,0.0000125228,0.05000,1.950
71
+ 9,mlp.down_proj,0.0000003395,0.05000,10.714
72
+ 10,self_attn.k_proj,0.0000043694,0.05000,1.737
73
+ 10,self_attn.v_proj,0.0000022361,0.05000,1.723
74
+ 10,self_attn.q_proj,0.0000186031,0.05000,1.816
75
+ 10,self_attn.o_proj,0.0000001746,0.05000,1.795
76
+ 10,mlp.gate_proj,0.0000156931,0.05000,1.980
77
+ 10,mlp.up_proj,0.0000150417,0.05000,1.966
78
+ 10,mlp.down_proj,0.0000004690,0.05000,10.710
79
+ 11,self_attn.k_proj,0.0000051370,0.05000,1.747
80
+ 11,self_attn.v_proj,0.0000023946,0.05000,1.736
81
+ 11,self_attn.q_proj,0.0000224542,0.05000,1.796
82
+ 11,self_attn.o_proj,0.0000001991,0.05000,1.796
83
+ 11,mlp.gate_proj,0.0000179378,0.05000,1.990
84
+ 11,mlp.up_proj,0.0000170724,0.05000,1.969
85
+ 11,mlp.down_proj,0.0000006015,0.05000,10.676
86
+ 12,self_attn.k_proj,0.0000068544,0.05000,1.859
87
+ 12,self_attn.v_proj,0.0000036612,0.05000,1.839
88
+ 12,self_attn.q_proj,0.0000317736,0.05000,1.908
89
+ 12,self_attn.o_proj,0.0000003027,0.05000,1.918
90
+ 12,mlp.gate_proj,0.0000199806,0.05000,2.102
91
+ 12,mlp.up_proj,0.0000191298,0.05000,2.082
92
+ 12,mlp.down_proj,0.0000007129,0.05000,11.001
93
+ 13,self_attn.k_proj,0.0000063864,0.05000,1.730
94
+ 13,self_attn.v_proj,0.0000034330,0.05000,1.719
95
+ 13,self_attn.q_proj,0.0000289530,0.05000,1.980
96
+ 13,self_attn.o_proj,0.0000002358,0.05000,1.790
97
+ 13,mlp.gate_proj,0.0000228015,0.05000,2.018
98
+ 13,mlp.up_proj,0.0000218623,0.05000,1.950
99
+ 13,mlp.down_proj,0.0000009059,0.05000,10.562
100
+ 14,self_attn.k_proj,0.0000061387,0.05000,1.805
101
+ 14,self_attn.v_proj,0.0000032689,0.05000,1.739
102
+ 14,self_attn.q_proj,0.0000278935,0.05000,2.213
103
+ 14,self_attn.o_proj,0.0000004098,0.05000,1.805
104
+ 14,mlp.gate_proj,0.0000234183,0.05000,2.002
105
+ 14,mlp.up_proj,0.0000224868,0.05000,1.981
106
+ 14,mlp.down_proj,0.0000009782,0.05000,10.644
107
+ 15,self_attn.k_proj,0.0000076575,0.05000,1.760
108
+ 15,self_attn.v_proj,0.0000041983,0.05000,1.746
109
+ 15,self_attn.q_proj,0.0000377521,0.05000,1.809
110
+ 15,self_attn.o_proj,0.0000003109,0.05000,1.816
111
+ 15,mlp.gate_proj,0.0000293268,0.05000,2.150
112
+ 15,mlp.up_proj,0.0000280428,0.05000,1.963
113
+ 15,mlp.down_proj,0.0000012089,0.05000,10.600
114
+ 16,self_attn.k_proj,0.0000071514,0.05000,1.771
115
+ 16,self_attn.v_proj,0.0000036904,0.05000,1.765
116
+ 16,self_attn.q_proj,0.0000334941,0.05000,1.815
117
+ 16,self_attn.o_proj,0.0000003754,0.05000,1.823
118
+ 16,mlp.gate_proj,0.0000318242,0.05000,2.024
119
+ 16,mlp.up_proj,0.0000304060,0.05000,2.003
120
+ 16,mlp.down_proj,0.0000014082,0.05000,10.840
121
+ 17,self_attn.k_proj,0.0000058214,0.05000,1.763
122
+ 17,self_attn.v_proj,0.0000035011,0.05000,1.745
123
+ 17,self_attn.q_proj,0.0000266938,0.05000,1.815
124
+ 17,self_attn.o_proj,0.0000004904,0.05000,1.805
125
+ 17,mlp.gate_proj,0.0000305605,0.05000,2.189
126
+ 17,mlp.up_proj,0.0000294420,0.05000,1.994
127
+ 17,mlp.down_proj,0.0000015261,0.05000,10.648
128
+ 18,self_attn.k_proj,0.0000113896,0.05000,1.742
129
+ 18,self_attn.v_proj,0.0000064052,0.05000,1.727
130
+ 18,self_attn.q_proj,0.0000516542,0.05000,1.795
131
+ 18,self_attn.o_proj,0.0000004344,0.05000,1.796
132
+ 18,mlp.gate_proj,0.0000367808,0.05000,1.973
133
+ 18,mlp.up_proj,0.0000351525,0.05000,1.962
134
+ 18,mlp.down_proj,0.0000018911,0.05000,10.552
135
+ 19,self_attn.k_proj,0.0000147619,0.05000,1.741
136
+ 19,self_attn.v_proj,0.0000090224,0.05000,1.746
137
+ 19,self_attn.q_proj,0.0000708490,0.05000,1.792
138
+ 19,self_attn.o_proj,0.0000004645,0.05000,1.792
139
+ 19,mlp.gate_proj,0.0000423705,0.05000,1.967
140
+ 19,mlp.up_proj,0.0000406029,0.05000,1.968
141
+ 19,mlp.down_proj,0.0000027029,0.05000,10.781
142
+ 20,self_attn.k_proj,0.0000055276,0.05000,1.750
143
+ 20,self_attn.v_proj,0.0000033355,0.05000,1.743
144
+ 20,self_attn.q_proj,0.0000273500,0.05000,1.793
145
+ 20,self_attn.o_proj,0.0000006263,0.05000,1.793
146
+ 20,mlp.gate_proj,0.0000512864,0.05000,2.252
147
+ 20,mlp.up_proj,0.0000486326,0.05000,1.969
148
+ 20,mlp.down_proj,0.0000035893,0.05000,10.766
149
+ 21,self_attn.k_proj,0.0000045427,0.05000,1.768
150
+ 21,self_attn.v_proj,0.0000029989,0.05000,1.780
151
+ 21,self_attn.q_proj,0.0000227807,0.05000,1.806
152
+ 21,self_attn.o_proj,0.0000011423,0.05000,1.793
153
+ 21,mlp.gate_proj,0.0000463502,0.05000,2.002
154
+ 21,mlp.up_proj,0.0000443138,0.05000,1.962
155
+ 21,mlp.down_proj,0.0000035231,0.05000,10.620
156
+ 22,self_attn.k_proj,0.0000118840,0.05000,1.769
157
+ 22,self_attn.v_proj,0.0000075208,0.05000,1.737
158
+ 22,self_attn.q_proj,0.0000649774,0.05000,1.802
159
+ 22,self_attn.o_proj,0.0000013580,0.05000,1.791
160
+ 22,mlp.gate_proj,0.0000637017,0.05000,2.294
161
+ 22,mlp.up_proj,0.0000610995,0.05000,1.964
162
+ 22,mlp.down_proj,0.0000066878,0.05000,10.608
163
+ 23,self_attn.k_proj,0.0000096768,0.05000,1.758
164
+ 23,self_attn.v_proj,0.0000066151,0.05000,1.746
165
+ 23,self_attn.q_proj,0.0000553509,0.05000,1.801
166
+ 23,self_attn.o_proj,0.0000017100,0.05000,1.799
167
+ 23,mlp.gate_proj,0.0000711954,0.05000,2.026
168
+ 23,mlp.up_proj,0.0000675318,0.05000,1.981
169
+ 23,mlp.down_proj,0.0000065035,0.05000,10.602
170
+ 24,self_attn.k_proj,0.0000083006,0.05000,1.753
171
+ 24,self_attn.v_proj,0.0000067867,0.05000,1.741
172
+ 24,self_attn.q_proj,0.0000467073,0.05000,1.793
173
+ 24,self_attn.o_proj,0.0000016390,0.05000,1.797
174
+ 24,mlp.gate_proj,0.0000832089,0.05000,1.972
175
+ 24,mlp.up_proj,0.0000782947,0.05000,1.960
176
+ 24,mlp.down_proj,0.0000072162,0.05000,10.594
177
+ 25,self_attn.k_proj,0.0000148592,0.05000,1.930
178
+ 25,self_attn.v_proj,0.0000096430,0.05000,1.742
179
+ 25,self_attn.q_proj,0.0000845463,0.05000,1.797
180
+ 25,self_attn.o_proj,0.0000019787,0.05000,1.794
181
+ 25,mlp.gate_proj,0.0000984830,0.05000,1.975
182
+ 25,mlp.up_proj,0.0000933006,0.05000,1.967
183
+ 25,mlp.down_proj,0.0000084473,0.05000,10.587
184
+ 26,self_attn.k_proj,0.0000107672,0.05000,1.781
185
+ 26,self_attn.v_proj,0.0000072195,0.05000,1.763
186
+ 26,self_attn.q_proj,0.0000593120,0.05000,1.823
187
+ 26,self_attn.o_proj,0.0000023962,0.05000,1.816
188
+ 26,mlp.gate_proj,0.0001110225,0.05000,1.998
189
+ 26,mlp.up_proj,0.0001054887,0.05000,1.983
190
+ 26,mlp.down_proj,0.0000088992,0.05000,10.699
191
+ 27,self_attn.k_proj,0.0000106991,0.05000,1.944
192
+ 27,self_attn.v_proj,0.0000075662,0.05000,1.731
193
+ 27,self_attn.q_proj,0.0000575445,0.05000,1.781
194
+ 27,self_attn.o_proj,0.0000020078,0.05000,1.789
195
+ 27,mlp.gate_proj,0.0001217312,0.05000,1.969
196
+ 27,mlp.up_proj,0.0001160810,0.05000,1.964
197
+ 27,mlp.down_proj,0.0000096916,0.05000,10.565
198
+ 28,self_attn.k_proj,0.0000104979,0.05000,1.767
199
+ 28,self_attn.v_proj,0.0000081629,0.05000,1.760
200
+ 28,self_attn.q_proj,0.0000589817,0.05000,1.814
201
+ 28,self_attn.o_proj,0.0000016211,0.05000,1.817
202
+ 28,mlp.gate_proj,0.0001300697,0.05000,2.004
203
+ 28,mlp.up_proj,0.0001248194,0.05000,1.968
204
+ 28,mlp.down_proj,0.0000111233,0.05000,10.678
205
+ 29,self_attn.k_proj,0.0000137042,0.05000,1.752
206
+ 29,self_attn.v_proj,0.0000089406,0.05000,1.733
207
+ 29,self_attn.q_proj,0.0000729328,0.05000,1.784
208
+ 29,self_attn.o_proj,0.0000026417,0.05000,1.797
209
+ 29,mlp.gate_proj,0.0001383661,0.05000,1.988
210
+ 29,mlp.up_proj,0.0001325025,0.05000,1.966
211
+ 29,mlp.down_proj,0.0000118384,0.05000,11.022
212
+ 30,self_attn.k_proj,0.0000135094,0.05000,1.777
213
+ 30,self_attn.v_proj,0.0000106211,0.05000,1.751
214
+ 30,self_attn.q_proj,0.0000781156,0.05000,1.796
215
+ 30,self_attn.o_proj,0.0000030378,0.05000,1.798
216
+ 30,mlp.gate_proj,0.0001510154,0.05000,1.980
217
+ 30,mlp.up_proj,0.0001430569,0.05000,1.968
218
+ 30,mlp.down_proj,0.0000125046,0.05000,10.932
219
+ 31,self_attn.k_proj,0.0000150631,0.05000,1.762
220
+ 31,self_attn.v_proj,0.0000115141,0.05000,1.744
221
+ 31,self_attn.q_proj,0.0000834578,0.05000,1.799
222
+ 31,self_attn.o_proj,0.0000020846,0.05000,1.790
223
+ 31,mlp.gate_proj,0.0001636284,0.05000,2.262
224
+ 31,mlp.up_proj,0.0001546849,0.05000,1.962
225
+ 31,mlp.down_proj,0.0000131806,0.05000,10.579
226
+ 32,self_attn.k_proj,0.0000166102,0.05000,1.781
227
+ 32,self_attn.v_proj,0.0000128024,0.05000,1.740
228
+ 32,self_attn.q_proj,0.0000919194,0.05000,1.790
229
+ 32,self_attn.o_proj,0.0000023728,0.05000,1.815
230
+ 32,mlp.gate_proj,0.0001712901,0.05000,1.978
231
+ 32,mlp.up_proj,0.0001622502,0.05000,1.971
232
+ 32,mlp.down_proj,0.0000140184,0.05000,10.771
233
+ 33,self_attn.k_proj,0.0000155397,0.05000,1.807
234
+ 33,self_attn.v_proj,0.0000141580,0.05000,1.770
235
+ 33,self_attn.q_proj,0.0000830037,0.05000,1.835
236
+ 33,self_attn.o_proj,0.0000017610,0.05000,1.820
237
+ 33,mlp.gate_proj,0.0001827080,0.05000,2.015
238
+ 33,mlp.up_proj,0.0001707335,0.05000,1.991
239
+ 33,mlp.down_proj,0.0000144502,0.05000,10.621
240
+ 34,self_attn.k_proj,0.0000175148,0.05000,3.051
241
+ 34,self_attn.v_proj,0.0000159380,0.05000,1.756
242
+ 34,self_attn.q_proj,0.0000969024,0.05000,1.819
243
+ 34,self_attn.o_proj,0.0000012757,0.05000,1.814
244
+ 34,mlp.gate_proj,0.0001907635,0.05000,1.991
245
+ 34,mlp.up_proj,0.0001778772,0.05000,1.972
246
+ 34,mlp.down_proj,0.0000154265,0.05000,10.691
247
+ 35,self_attn.k_proj,0.0000191118,0.05000,1.755
248
+ 35,self_attn.v_proj,0.0000179529,0.05000,1.751
249
+ 35,self_attn.q_proj,0.0001059562,0.05000,1.857
250
+ 35,self_attn.o_proj,0.0000010396,0.05000,1.815
251
+ 35,mlp.gate_proj,0.0001951154,0.05000,2.000
252
+ 35,mlp.up_proj,0.0001831631,0.05000,1.992
253
+ 35,mlp.down_proj,0.0000158026,0.05000,10.654
254
+ 36,self_attn.k_proj,0.0000201484,0.05000,1.778
255
+ 36,self_attn.v_proj,0.0000189599,0.05000,1.774
256
+ 36,self_attn.q_proj,0.0001111809,0.05000,1.842
257
+ 36,self_attn.o_proj,0.0000018594,0.05000,1.806
258
+ 36,mlp.gate_proj,0.0001979247,0.05000,1.987
259
+ 36,mlp.up_proj,0.0001868471,0.05000,2.024
260
+ 36,mlp.down_proj,0.0000156611,0.05000,10.717
261
+ 37,self_attn.k_proj,0.0000191277,0.05000,2.143
262
+ 37,self_attn.v_proj,0.0000170317,0.05000,1.762
263
+ 37,self_attn.q_proj,0.0001037516,0.05000,1.824
264
+ 37,self_attn.o_proj,0.0000016494,0.05000,1.918
265
+ 37,mlp.gate_proj,0.0002023499,0.05000,2.012
266
+ 37,mlp.up_proj,0.0001926919,0.05000,1.967
267
+ 37,mlp.down_proj,0.0000165328,0.05000,10.620
268
+ 38,self_attn.k_proj,0.0000190743,0.05000,1.760
269
+ 38,self_attn.v_proj,0.0000157472,0.05000,1.751
270
+ 38,self_attn.q_proj,0.0001001458,0.05000,1.804
271
+ 38,self_attn.o_proj,0.0000027518,0.05000,1.816
272
+ 38,mlp.gate_proj,0.0002092387,0.05000,2.005
273
+ 38,mlp.up_proj,0.0001991309,0.05000,1.979
274
+ 38,mlp.down_proj,0.0000174658,0.05000,10.616
275
+ 39,self_attn.k_proj,0.0000183438,0.05000,1.797
276
+ 39,self_attn.v_proj,0.0000157707,0.05000,1.786
277
+ 39,self_attn.q_proj,0.0000962149,0.05000,1.829
278
+ 39,self_attn.o_proj,0.0000025272,0.05000,1.819
279
+ 39,mlp.gate_proj,0.0002148854,0.05000,2.003
280
+ 39,mlp.up_proj,0.0002056378,0.05000,1.984
281
+ 39,mlp.down_proj,0.0000188615,0.05000,10.656
282
+ 40,self_attn.k_proj,0.0000182748,0.05000,1.776
283
+ 40,self_attn.v_proj,0.0000128602,0.05000,1.801
284
+ 40,self_attn.q_proj,0.0000945882,0.05000,1.837
285
+ 40,self_attn.o_proj,0.0000040090,0.05000,1.838
286
+ 40,mlp.gate_proj,0.0002224625,0.05000,1.998
287
+ 40,mlp.up_proj,0.0002146541,0.05000,2.031
288
+ 40,mlp.down_proj,0.0000212490,0.05000,10.668
289
+ 41,self_attn.k_proj,0.0000163700,0.05000,1.771
290
+ 41,self_attn.v_proj,0.0000122546,0.05000,1.760
291
+ 41,self_attn.q_proj,0.0000833327,0.05000,1.834
292
+ 41,self_attn.o_proj,0.0000048864,0.05000,1.838
293
+ 41,mlp.gate_proj,0.0002355262,0.05000,1.998
294
+ 41,mlp.up_proj,0.0002269599,0.05000,1.979
295
+ 41,mlp.down_proj,0.0000229579,0.05000,10.677
296
+ 42,self_attn.k_proj,0.0000204834,0.05000,1.763
297
+ 42,self_attn.v_proj,0.0000159663,0.05000,1.760
298
+ 42,self_attn.q_proj,0.0001116346,0.05000,1.817
299
+ 42,self_attn.o_proj,0.0000038089,0.05000,1.806
300
+ 42,mlp.gate_proj,0.0002455023,0.05000,2.074
301
+ 42,mlp.up_proj,0.0002384295,0.05000,1.989
302
+ 42,mlp.down_proj,0.0000260395,0.05000,10.683
303
+ 43,self_attn.k_proj,0.0000201844,0.05000,1.780
304
+ 43,self_attn.v_proj,0.0000141727,0.05000,1.764
305
+ 43,self_attn.q_proj,0.0001068181,0.05000,1.814
306
+ 43,self_attn.o_proj,0.0000052884,0.05000,1.820
307
+ 43,mlp.gate_proj,0.0002542797,0.05000,1.992
308
+ 43,mlp.up_proj,0.0002492644,0.05000,1.983
309
+ 43,mlp.down_proj,0.0000290737,0.05000,10.705
310
+ 44,self_attn.k_proj,0.0000191750,0.05000,1.750
311
+ 44,self_attn.v_proj,0.0000128655,0.05000,1.774
312
+ 44,self_attn.q_proj,0.0001060084,0.05000,1.799
313
+ 44,self_attn.o_proj,0.0000067395,0.05000,1.800
314
+ 44,mlp.gate_proj,0.0002601013,0.05000,1.982
315
+ 44,mlp.up_proj,0.0002597747,0.05000,1.974
316
+ 44,mlp.down_proj,0.0000352207,0.05000,10.593
317
+ 45,self_attn.k_proj,0.0000203800,0.05000,1.776
318
+ 45,self_attn.v_proj,0.0000116344,0.05000,1.754
319
+ 45,self_attn.q_proj,0.0001127626,0.05000,1.810
320
+ 45,self_attn.o_proj,0.0000093858,0.05000,1.806
321
+ 45,mlp.gate_proj,0.0002680701,0.05000,1.988
322
+ 45,mlp.up_proj,0.0002698083,0.05000,1.975
323
+ 45,mlp.down_proj,0.0000560786,0.05000,10.791
324
+ 46,self_attn.k_proj,0.0000223115,0.05000,1.746
325
+ 46,self_attn.v_proj,0.0000130194,0.05000,1.736
326
+ 46,self_attn.q_proj,0.0001210351,0.05000,1.809
327
+ 46,self_attn.o_proj,0.0000144393,0.05000,1.794
328
+ 46,mlp.gate_proj,0.0002725730,0.05000,1.981
329
+ 46,mlp.up_proj,0.0002767919,0.05000,1.957
330
+ 46,mlp.down_proj,0.0000390935,0.05000,10.800
331
+ 47,self_attn.k_proj,0.0000220920,0.05000,1.766
332
+ 47,self_attn.v_proj,0.0000137250,0.05000,1.765
333
+ 47,self_attn.q_proj,0.0001228667,0.05000,2.014
334
+ 47,self_attn.o_proj,0.0000139756,0.05000,1.816
335
+ 47,mlp.gate_proj,0.0002729540,0.05000,2.008
336
+ 47,mlp.up_proj,0.0002807646,0.05000,1.981
337
+ 47,mlp.down_proj,0.0000435434,0.05000,10.649
338
+ 48,self_attn.k_proj,0.0000213162,0.05000,1.763
339
+ 48,self_attn.v_proj,0.0000155181,0.05000,1.758
340
+ 48,self_attn.q_proj,0.0001272095,0.05000,1.812
341
+ 48,self_attn.o_proj,0.0000127778,0.05000,1.836
342
+ 48,mlp.gate_proj,0.0002877861,0.05000,2.007
343
+ 48,mlp.up_proj,0.0002972881,0.05000,1.988
344
+ 48,mlp.down_proj,0.0000485562,0.05000,10.649
345
+ 49,self_attn.k_proj,0.0000211959,0.05000,1.757
346
+ 49,self_attn.v_proj,0.0000150000,0.05000,1.748
347
+ 49,self_attn.q_proj,0.0001207883,0.05000,1.816
348
+ 49,self_attn.o_proj,0.0000144471,0.05000,1.810
349
+ 49,mlp.gate_proj,0.0002878208,0.05000,2.077
350
+ 49,mlp.up_proj,0.0003003240,0.05000,1.973
351
+ 49,mlp.down_proj,0.0000530659,0.05000,10.765
352
+ 50,self_attn.k_proj,0.0000258553,0.05000,1.758
353
+ 50,self_attn.v_proj,0.0000171321,0.05000,1.749
354
+ 50,self_attn.q_proj,0.0001526195,0.05000,1.809
355
+ 50,self_attn.o_proj,0.0000200374,0.05000,1.810
356
+ 50,mlp.gate_proj,0.0002848091,0.05000,2.031
357
+ 50,mlp.up_proj,0.0002997550,0.05000,2.054
358
+ 50,mlp.down_proj,0.0000592731,0.05000,10.671
359
+ 51,self_attn.k_proj,0.0000231560,0.05000,1.778
360
+ 51,self_attn.v_proj,0.0000151993,0.05000,1.752
361
+ 51,self_attn.q_proj,0.0001321706,0.05000,1.818
362
+ 51,self_attn.o_proj,0.0000205501,0.05000,1.809
363
+ 51,mlp.gate_proj,0.0002997619,0.05000,2.003
364
+ 51,mlp.up_proj,0.0003137411,0.05000,1.984
365
+ 51,mlp.down_proj,0.0000643393,0.05000,11.131
366
+ 52,self_attn.k_proj,0.0000203590,0.05000,1.745
367
+ 52,self_attn.v_proj,0.0000186799,0.05000,1.742
368
+ 52,self_attn.q_proj,0.0001251833,0.05000,1.810
369
+ 52,self_attn.o_proj,0.0000231322,0.05000,1.844
370
+ 52,mlp.gate_proj,0.0003448580,0.05000,1.992
371
+ 52,mlp.up_proj,0.0003562837,0.05000,1.984
372
+ 52,mlp.down_proj,0.0000686877,0.05000,11.025
373
+ 53,self_attn.k_proj,0.0000233242,0.05000,1.752
374
+ 53,self_attn.v_proj,0.0000200947,0.05000,1.762
375
+ 53,self_attn.q_proj,0.0001410289,0.05000,1.855
376
+ 53,self_attn.o_proj,0.0000270083,0.05000,1.809
377
+ 53,mlp.gate_proj,0.0003695233,0.05000,2.113
378
+ 53,mlp.up_proj,0.0003783724,0.05000,1.978
379
+ 53,mlp.down_proj,0.0000720811,0.05000,10.649
380
+ 54,self_attn.k_proj,0.0000225596,0.05000,1.765
381
+ 54,self_attn.v_proj,0.0000258560,0.05000,1.752
382
+ 54,self_attn.q_proj,0.0001495487,0.05000,2.181
383
+ 54,self_attn.o_proj,0.0000239377,0.05000,1.814
384
+ 54,mlp.gate_proj,0.0003636169,0.05000,1.974
385
+ 54,mlp.up_proj,0.0003744466,0.05000,1.987
386
+ 54,mlp.down_proj,0.0000797681,0.05000,10.955
387
+ 55,self_attn.k_proj,0.0000218617,0.05000,1.764
388
+ 55,self_attn.v_proj,0.0000253199,0.05000,1.758
389
+ 55,self_attn.q_proj,0.0001466854,0.05000,1.822
390
+ 55,self_attn.o_proj,0.0000282526,0.05000,1.815
391
+ 55,mlp.gate_proj,0.0003757199,0.05000,1.990
392
+ 55,mlp.up_proj,0.0003881287,0.05000,1.987
393
+ 55,mlp.down_proj,0.0000911115,0.05000,10.635
394
+ 56,self_attn.k_proj,0.0000223928,0.05000,1.794
395
+ 56,self_attn.v_proj,0.0000218095,0.05000,1.772
396
+ 56,self_attn.q_proj,0.0001375662,0.05000,1.816
397
+ 56,self_attn.o_proj,0.0000379754,0.05000,1.812
398
+ 56,mlp.gate_proj,0.0004133328,0.05000,2.341
399
+ 56,mlp.up_proj,0.0004231848,0.05000,1.989
400
+ 56,mlp.down_proj,0.0001026907,0.05000,10.732
401
+ 57,self_attn.k_proj,0.0000240502,0.05000,1.773
402
+ 57,self_attn.v_proj,0.0000254070,0.05000,1.769
403
+ 57,self_attn.q_proj,0.0001487034,0.05000,1.813
404
+ 57,self_attn.o_proj,0.0000396974,0.05000,1.844
405
+ 57,mlp.gate_proj,0.0004108658,0.05000,2.431
406
+ 57,mlp.up_proj,0.0004240784,0.05000,2.008
407
+ 57,mlp.down_proj,0.0001128101,0.05000,10.571
408
+ 58,self_attn.k_proj,0.0000219804,0.05000,1.833
409
+ 58,self_attn.v_proj,0.0000258982,0.05000,1.745
410
+ 58,self_attn.q_proj,0.0001463768,0.05000,1.808
411
+ 58,self_attn.o_proj,0.0000463313,0.05000,1.809
412
+ 58,mlp.gate_proj,0.0004239820,0.05000,1.996
413
+ 58,mlp.up_proj,0.0004381811,0.05000,1.988
414
+ 58,mlp.down_proj,0.0001411034,0.05000,10.627
415
+ 59,self_attn.k_proj,0.0000221796,0.05000,1.749
416
+ 59,self_attn.v_proj,0.0000271249,0.05000,1.760
417
+ 59,self_attn.q_proj,0.0001451733,0.05000,1.847
418
+ 59,self_attn.o_proj,0.0000497049,0.05000,1.813
419
+ 59,mlp.gate_proj,0.0004518706,0.05000,2.006
420
+ 59,mlp.up_proj,0.0004566152,0.05000,2.009
421
+ 59,mlp.down_proj,0.0001616510,0.05000,10.726
422
+ 60,self_attn.k_proj,0.0000241346,0.05000,1.740
423
+ 60,self_attn.v_proj,0.0000341904,0.05000,1.735
424
+ 60,self_attn.q_proj,0.0001691093,0.05000,1.811
425
+ 60,self_attn.o_proj,0.0000432857,0.05000,1.815
426
+ 60,mlp.gate_proj,0.0005488024,0.05000,1.983
427
+ 60,mlp.up_proj,0.0005428663,0.05000,1.967
428
+ 60,mlp.down_proj,0.0001826035,0.05000,10.607
429
+ 61,self_attn.k_proj,0.0000247619,0.05000,1.780
430
+ 61,self_attn.v_proj,0.0000374423,0.05000,1.778
431
+ 61,self_attn.q_proj,0.0001707510,0.05000,1.828
432
+ 61,self_attn.o_proj,0.0000489282,0.05000,1.796
433
+ 61,mlp.gate_proj,0.0006036149,0.05000,2.043
434
+ 61,mlp.up_proj,0.0005943566,0.05000,1.986
435
+ 61,mlp.down_proj,0.0002096645,0.05000,10.626
436
+ 62,self_attn.k_proj,0.0000260108,0.05000,1.744
437
+ 62,self_attn.v_proj,0.0000357173,0.05000,1.747
438
+ 62,self_attn.q_proj,0.0001779701,0.05000,1.806
439
+ 62,self_attn.o_proj,0.0000560566,0.05000,1.800
440
+ 62,mlp.gate_proj,0.0007026849,0.05000,1.983
441
+ 62,mlp.up_proj,0.0006780243,0.05000,1.958
442
+ 62,mlp.down_proj,0.0002315546,0.05000,10.636
443
+ 63,self_attn.k_proj,0.0000259721,0.05000,1.775
444
+ 63,self_attn.v_proj,0.0000370121,0.05000,1.768
445
+ 63,self_attn.q_proj,0.0001751592,0.05000,1.833
446
+ 63,self_attn.o_proj,0.0000582709,0.05000,1.794
447
+ 63,mlp.gate_proj,0.0007427418,0.05000,2.061
448
+ 63,mlp.up_proj,0.0007230977,0.05000,1.987
449
+ 63,mlp.down_proj,0.0002685315,0.05000,10.627
450
+ 64,self_attn.k_proj,0.0000277544,0.05000,1.773
451
+ 64,self_attn.v_proj,0.0000370765,0.05000,1.817
452
+ 64,self_attn.q_proj,0.0001851113,0.05000,1.824
453
+ 64,self_attn.o_proj,0.0000561820,0.05000,1.813
454
+ 64,mlp.gate_proj,0.0008117926,0.05000,1.997
455
+ 64,mlp.up_proj,0.0007852368,0.05000,1.984
456
+ 64,mlp.down_proj,0.0003075914,0.05000,10.902
457
+ 65,self_attn.k_proj,0.0000285461,0.05000,1.787
458
+ 65,self_attn.v_proj,0.0000393212,0.05000,1.763
459
+ 65,self_attn.q_proj,0.0001911329,0.05000,1.814
460
+ 65,self_attn.o_proj,0.0000612525,0.05000,1.806
461
+ 65,mlp.gate_proj,0.0009011056,0.05000,1.996
462
+ 65,mlp.up_proj,0.0008748247,0.05000,1.984
463
+ 65,mlp.down_proj,0.0003410627,0.05000,10.640
464
+ 66,self_attn.k_proj,0.0000250020,0.05000,1.769
465
+ 66,self_attn.v_proj,0.0000412053,0.05000,1.804
466
+ 66,self_attn.q_proj,0.0001777558,0.05000,1.822
467
+ 66,self_attn.o_proj,0.0000593562,0.05000,1.806
468
+ 66,mlp.gate_proj,0.0010464112,0.05000,1.991
469
+ 66,mlp.up_proj,0.0010015315,0.05000,1.975
470
+ 66,mlp.down_proj,0.0003793334,0.05000,10.629
471
+ 67,self_attn.k_proj,0.0000271901,0.05000,1.769
472
+ 67,self_attn.v_proj,0.0000476565,0.05000,1.783
473
+ 67,self_attn.q_proj,0.0001915081,0.05000,1.825
474
+ 67,self_attn.o_proj,0.0000432816,0.05000,1.820
475
+ 67,mlp.gate_proj,0.0011060469,0.05000,2.151
476
+ 67,mlp.up_proj,0.0010709054,0.05000,1.993
477
+ 67,mlp.down_proj,0.0004122393,0.05000,10.675
478
+ 68,self_attn.k_proj,0.0000280968,0.05000,1.749
479
+ 68,self_attn.v_proj,0.0000545287,0.05000,1.744
480
+ 68,self_attn.q_proj,0.0002037208,0.05000,1.818
481
+ 68,self_attn.o_proj,0.0000551032,0.05000,1.805
482
+ 68,mlp.gate_proj,0.0011910696,0.05000,2.007
483
+ 68,mlp.up_proj,0.0011660263,0.05000,2.002
484
+ 68,mlp.down_proj,0.0004548276,0.05000,10.648
485
+ 69,self_attn.k_proj,0.0000288387,0.05000,1.752
486
+ 69,self_attn.v_proj,0.0000463445,0.05000,1.772
487
+ 69,self_attn.q_proj,0.0001985468,0.05000,1.822
488
+ 69,self_attn.o_proj,0.0000651683,0.05000,1.812
489
+ 69,mlp.gate_proj,0.0012707321,0.05000,2.017
490
+ 69,mlp.up_proj,0.0012687438,0.05000,1.987
491
+ 69,mlp.down_proj,0.0005535664,0.05000,10.638
492
+ 70,self_attn.k_proj,0.0000278806,0.05000,1.833
493
+ 70,self_attn.v_proj,0.0000634856,0.05000,1.766
494
+ 70,self_attn.q_proj,0.0002063552,0.05000,1.824
495
+ 70,self_attn.o_proj,0.0000850229,0.05000,1.822
496
+ 70,mlp.gate_proj,0.0014331741,0.05000,2.091
497
+ 70,mlp.up_proj,0.0014520215,0.05000,1.987
498
+ 70,mlp.down_proj,0.0006782743,0.05000,10.642
499
+ 71,self_attn.k_proj,0.0000291759,0.05000,1.771
500
+ 71,self_attn.v_proj,0.0000665733,0.05000,1.748
501
+ 71,self_attn.q_proj,0.0002151191,0.05000,1.818
502
+ 71,self_attn.o_proj,0.0000977527,0.05000,1.800
503
+ 71,mlp.gate_proj,0.0015705100,0.05000,2.357
504
+ 71,mlp.up_proj,0.0016157514,0.05000,1.970
505
+ 71,mlp.down_proj,0.0008346530,0.05000,10.610
506
+ 72,self_attn.k_proj,0.0000277748,0.05000,1.781
507
+ 72,self_attn.v_proj,0.0000769196,0.05000,1.770
508
+ 72,self_attn.q_proj,0.0002174281,0.05000,1.811
509
+ 72,self_attn.o_proj,0.0000868489,0.05000,1.822
510
+ 72,mlp.gate_proj,0.0017338871,0.05000,2.008
511
+ 72,mlp.up_proj,0.0018062971,0.05000,2.012
512
+ 72,mlp.down_proj,0.0010927289,0.05000,10.713
513
+ 73,self_attn.k_proj,0.0000286837,0.05000,1.855
514
+ 73,self_attn.v_proj,0.0000898291,0.05000,1.765
515
+ 73,self_attn.q_proj,0.0002256701,0.05000,1.823
516
+ 73,self_attn.o_proj,0.0001178069,0.05000,1.812
517
+ 73,mlp.gate_proj,0.0018677687,0.05000,2.000
518
+ 73,mlp.up_proj,0.0019719667,0.05000,1.974
519
+ 73,mlp.down_proj,0.0013667705,0.05000,10.622
520
+ 74,self_attn.k_proj,0.0000291870,0.05000,1.782
521
+ 74,self_attn.v_proj,0.0000854093,0.05000,1.765
522
+ 74,self_attn.q_proj,0.0002426886,0.05000,1.822
523
+ 74,self_attn.o_proj,0.0001362484,0.05000,1.820
524
+ 74,mlp.gate_proj,0.0019655873,0.05000,1.998
525
+ 74,mlp.up_proj,0.0021086635,0.05000,1.984
526
+ 74,mlp.down_proj,0.0018890411,0.05000,10.693
527
+ 75,self_attn.k_proj,0.0000286997,0.05000,1.761
528
+ 75,self_attn.v_proj,0.0001000469,0.05000,1.738
529
+ 75,self_attn.q_proj,0.0002336207,0.05000,1.786
530
+ 75,self_attn.o_proj,0.0001834656,0.05000,1.802
531
+ 75,mlp.gate_proj,0.0021311569,0.05000,1.993
532
+ 75,mlp.up_proj,0.0023135146,0.05000,2.004
533
+ 75,mlp.down_proj,0.0029731325,0.05000,10.627
534
+ 76,self_attn.k_proj,0.0000291649,0.05000,1.759
535
+ 76,self_attn.v_proj,0.0001630616,0.05000,1.747
536
+ 76,self_attn.q_proj,0.0002645282,0.05000,1.796
537
+ 76,self_attn.o_proj,0.0002224826,0.05000,1.891
538
+ 76,mlp.gate_proj,0.0022946957,0.05000,2.193
539
+ 76,mlp.up_proj,0.0025133457,0.05000,1.982
540
+ 76,mlp.down_proj,0.0047117417,0.05000,10.648
541
+ 77,self_attn.k_proj,0.0000274915,0.05000,1.782
542
+ 77,self_attn.v_proj,0.0001038129,0.05000,1.765
543
+ 77,self_attn.q_proj,0.0002454595,0.05000,1.843
544
+ 77,self_attn.o_proj,0.0003246888,0.05000,1.824
545
+ 77,mlp.gate_proj,0.0024383952,0.05000,2.030
546
+ 77,mlp.up_proj,0.0026753585,0.05000,1.995
547
+ 77,mlp.down_proj,0.0054391157,0.05000,10.677
548
+ 78,self_attn.k_proj,0.0000255497,0.05000,1.753
549
+ 78,self_attn.v_proj,0.0001116223,0.05000,1.773
550
+ 78,self_attn.q_proj,0.0002304750,0.05000,1.823
551
+ 78,self_attn.o_proj,0.0003565016,0.05000,1.820
552
+ 78,mlp.gate_proj,0.0022525496,0.05000,1.985
553
+ 78,mlp.up_proj,0.0024347074,0.05000,1.993
554
+ 78,mlp.down_proj,0.0097805633,0.05000,10.660
555
+ 79,self_attn.k_proj,0.0000219384,0.05000,1.772
556
+ 79,self_attn.v_proj,0.0000395143,0.05000,1.750
557
+ 79,self_attn.q_proj,0.0001646463,0.05000,1.812
558
+ 79,self_attn.o_proj,0.0001054719,0.05000,1.787
559
+ 79,mlp.gate_proj,0.0021020814,0.05000,2.001
560
+ 79,mlp.up_proj,0.0022070014,0.05000,2.006
561
+ 79,mlp.down_proj,0.0153322140,0.05000,10.670
quantize_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 4,
3
+ "group_size": 128,
4
+ "desc_act": false,
5
+ "sym": true,
6
+ "lm_head": false,
7
+ "quant_method": "gptq",
8
+ "checkpoint_format": "gptq",
9
+ "pack_dtype": "int32",
10
+ "meta": {
11
+ "quantizer": [
12
+ "gptqmodel:4.2.5"
13
+ ],
14
+ "uri": "https://github.com/modelcloud/gptqmodel",
15
+ "damp_percent": 0.05,
16
+ "damp_auto_increment": 0.01,
17
+ "static_groups": false,
18
+ "true_sequential": true,
19
+ "mse": 0.0,
20
+ "v2": false,
21
+ "v2_alpha": 0.25,
22
+ "act_group_aware": false
23
+ }
24
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": "<|endoftext|>"
25
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
+ size 11421896
tokenizer_config.json ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "clean_up_tokenization_spaces": false,
199
+ "eos_token": "<|im_end|>",
200
+ "errors": "replace",
201
+ "extra_special_tokens": {},
202
+ "model_max_length": 131072,
203
+ "pad_token": "<|endoftext|>",
204
+ "split_special_tokens": false,
205
+ "tokenizer_class": "Qwen2TokenizerFast",
206
+ "unk_token": null,
207
+ "_commit_hash": null
208
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff