Abdohaaland commited on
Commit
6611689
·
verified ·
1 Parent(s): e155d09

Upload GPTQ quantized model

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
chat_template.jinja ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- bos_token }}
2
+ {%- if custom_tools is defined %}
3
+ {%- set tools = custom_tools %}
4
+ {%- endif %}
5
+ {%- if not tools_in_user_message is defined %}
6
+ {%- set tools_in_user_message = true %}
7
+ {%- endif %}
8
+ {%- if not date_string is defined %}
9
+ {%- if strftime_now is defined %}
10
+ {%- set date_string = strftime_now("%d %b %Y") %}
11
+ {%- else %}
12
+ {%- set date_string = "26 Jul 2024" %}
13
+ {%- endif %}
14
+ {%- endif %}
15
+ {%- if not tools is defined %}
16
+ {%- set tools = none %}
17
+ {%- endif %}
18
+
19
+ {#- This block extracts the system message, so we can slot it into the right place. #}
20
+ {%- if messages[0]['role'] == 'system' %}
21
+ {%- set system_message = messages[0]['content']|trim %}
22
+ {%- set messages = messages[1:] %}
23
+ {%- else %}
24
+ {%- set system_message = "" %}
25
+ {%- endif %}
26
+
27
+ {#- System message #}
28
+ {{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
29
+ {%- if tools is not none %}
30
+ {{- "Environment: ipython\n" }}
31
+ {%- endif %}
32
+ {{- "Cutting Knowledge Date: December 2023\n" }}
33
+ {{- "Today Date: " + date_string + "\n\n" }}
34
+ {%- if tools is not none and not tools_in_user_message %}
35
+ {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
36
+ {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
37
+ {{- "Do not use variables.\n\n" }}
38
+ {%- for t in tools %}
39
+ {{- t | tojson(indent=4) }}
40
+ {{- "\n\n" }}
41
+ {%- endfor %}
42
+ {%- endif %}
43
+ {{- system_message }}
44
+ {{- "<|eot_id|>" }}
45
+
46
+ {#- Custom tools are passed in a user message with some extra guidance #}
47
+ {%- if tools_in_user_message and not tools is none %}
48
+ {#- Extract the first user message so we can plug it in here #}
49
+ {%- if messages | length != 0 %}
50
+ {%- set first_user_message = messages[0]['content']|trim %}
51
+ {%- set messages = messages[1:] %}
52
+ {%- else %}
53
+ {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
54
+ {%- endif %}
55
+ {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
56
+ {{- "Given the following functions, please respond with a JSON for a function call " }}
57
+ {{- "with its proper arguments that best answers the given prompt.\n\n" }}
58
+ {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
59
+ {{- "Do not use variables.\n\n" }}
60
+ {%- for t in tools %}
61
+ {{- t | tojson(indent=4) }}
62
+ {{- "\n\n" }}
63
+ {%- endfor %}
64
+ {{- first_user_message + "<|eot_id|>"}}
65
+ {%- endif %}
66
+
67
+ {%- for message in messages %}
68
+ {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
69
+ {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
70
+ {%- elif 'tool_calls' in message %}
71
+ {%- if not message.tool_calls|length == 1 %}
72
+ {{- raise_exception("This model only supports single tool-calls at once!") }}
73
+ {%- endif %}
74
+ {%- set tool_call = message.tool_calls[0].function %}
75
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
76
+ {{- '{"name": "' + tool_call.name + '", ' }}
77
+ {{- '"parameters": ' }}
78
+ {{- tool_call.arguments | tojson }}
79
+ {{- "}" }}
80
+ {{- "<|eot_id|>" }}
81
+ {%- elif message.role == "tool" or message.role == "ipython" %}
82
+ {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
83
+ {%- if message.content is mapping or message.content is iterable %}
84
+ {{- message.content | tojson }}
85
+ {%- else %}
86
+ {{- message.content }}
87
+ {%- endif %}
88
+ {{- "<|eot_id|>" }}
89
+ {%- endif %}
90
+ {%- endfor %}
91
+ {%- if add_generation_prompt %}
92
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
93
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 128000,
8
+ "dtype": "bfloat16",
9
+ "eos_token_id": [
10
+ 128001,
11
+ 128008,
12
+ 128009
13
+ ],
14
+ "head_dim": 128,
15
+ "hidden_act": "silu",
16
+ "hidden_size": 3072,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 8192,
19
+ "max_position_embeddings": 131072,
20
+ "mlp_bias": false,
21
+ "model_type": "llama",
22
+ "num_attention_heads": 24,
23
+ "num_hidden_layers": 28,
24
+ "num_key_value_heads": 8,
25
+ "pad_token_id": null,
26
+ "pretraining_tp": 1,
27
+ "quantization_config": {
28
+ "bits": 4,
29
+ "checkpoint_format": "gptq",
30
+ "desc_act": false,
31
+ "format": "gptq",
32
+ "group_size": 128,
33
+ "lm_head": false,
34
+ "meta": {
35
+ "act_group_aware": true,
36
+ "auto_forward_data_parallel": true,
37
+ "damp_auto_increment": 0.01,
38
+ "damp_percent": 0.05,
39
+ "failsafe": {
40
+ "smooth": {
41
+ "group_size_threshold": 128,
42
+ "k": 2.75,
43
+ "type": "mad"
44
+ },
45
+ "strategy": "rtn",
46
+ "threshold": "0.5%"
47
+ },
48
+ "gc_mode": "interval",
49
+ "gptaq": null,
50
+ "hessian": {
51
+ "chunk_bytes": null,
52
+ "chunk_size": null,
53
+ "staging_dtype": "float32"
54
+ },
55
+ "mock_quantization": false,
56
+ "mse": 0.0,
57
+ "offload_to_disk": true,
58
+ "offload_to_disk_path": "./gptqmodel_offload/znzwasrn-gqcllywg/",
59
+ "pack_impl": "cpu",
60
+ "quantizer": [
61
+ "gptqmodel:5.7.0"
62
+ ],
63
+ "static_groups": false,
64
+ "true_sequential": true,
65
+ "uri": "https://github.com/modelcloud/gptqmodel",
66
+ "vram_strategy": "exclusive",
67
+ "wait_for_submodule_finalizers": false
68
+ },
69
+ "pack_dtype": "int32",
70
+ "quant_method": "gptq",
71
+ "sym": true
72
+ },
73
+ "rms_norm_eps": 1e-05,
74
+ "rope_parameters": {
75
+ "factor": 32.0,
76
+ "high_freq_factor": 4.0,
77
+ "low_freq_factor": 1.0,
78
+ "original_max_position_embeddings": 8192,
79
+ "rope_theta": 500000.0,
80
+ "rope_type": "llama3"
81
+ },
82
+ "tie_word_embeddings": true,
83
+ "transformers_version": "5.3.0",
84
+ "use_cache": true,
85
+ "vocab_size": 128256
86
+ }
generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 128000,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 128001,
6
+ 128008,
7
+ 128009
8
+ ],
9
+ "temperature": 0.6,
10
+ "top_p": 0.9,
11
+ "transformers_version": "5.3.0"
12
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecdcaefe73ea1bc089b009a972240e8f514dfb7d13e077d803575e6627d5c213
3
+ size 2255767876
quant_log.csv ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ layer,module,loss,samples,damp,time
2
+ 0,self_attn.k_proj,0.0000833145,0.05000,5.504
3
+ 0,self_attn.v_proj,0.0000048262,0.05000,5.610
4
+ 0,self_attn.q_proj,0.0001627265,0.05000,5.606
5
+ 0,self_attn.o_proj,0.0000002214,0.05000,1.191
6
+ 0,mlp.gate_proj,0.0000997544,0.05000,3.078
7
+ 0,mlp.up_proj,0.0000880482,0.05000,3.099
8
+ 0,mlp.down_proj,0.0000010819,0.05000,3.332
9
+ 1,self_attn.q_proj,0.0002339984,0.05000,5.468
10
+ 1,self_attn.k_proj,0.0001366074,0.05000,5.615
11
+ 1,self_attn.v_proj,0.0000161811,0.05000,5.641
12
+ 1,self_attn.o_proj,0.0000006392,0.05000,1.165
13
+ 1,mlp.gate_proj,0.0001622974,0.05000,2.888
14
+ 1,mlp.up_proj,0.0001428511,0.05000,2.918
15
+ 1,mlp.down_proj,0.0003240824,0.05000,3.307
16
+ 2,self_attn.q_proj,0.0009462966,0.05000,5.619
17
+ 2,self_attn.k_proj,0.0005598217,0.05000,5.643
18
+ 2,self_attn.v_proj,0.0000587609,0.05000,5.671
19
+ 2,self_attn.o_proj,0.0000006137,0.05000,1.139
20
+ 2,mlp.up_proj,0.0002262556,0.05000,2.879
21
+ 2,mlp.gate_proj,0.0002659620,0.05000,2.937
22
+ 2,mlp.down_proj,0.0000028477,0.05000,2.979
23
+ 3,self_attn.q_proj,0.0006456479,0.05000,5.110
24
+ 3,self_attn.v_proj,0.0000623240,0.05000,5.181
25
+ 3,self_attn.k_proj,0.0003549168,0.05000,5.208
26
+ 3,self_attn.o_proj,0.0000012870,0.05000,1.070
27
+ 3,mlp.up_proj,0.0002929948,0.05000,2.290
28
+ 3,mlp.gate_proj,0.0003876079,0.05000,2.322
29
+ 3,mlp.down_proj,0.0000042261,0.05000,3.058
30
+ 4,self_attn.v_proj,0.0000641730,0.05000,5.203
31
+ 4,self_attn.q_proj,0.0006326817,0.05000,5.248
32
+ 4,self_attn.k_proj,0.0003315510,0.05000,5.274
33
+ 4,self_attn.o_proj,0.0000023387,0.05000,1.078
34
+ 4,mlp.gate_proj,0.0005043794,0.05000,2.391
35
+ 4,mlp.up_proj,0.0003418379,0.05000,2.413
36
+ 4,mlp.down_proj,0.0000061632,0.05000,3.072
37
+ 5,self_attn.k_proj,0.0005265745,0.05000,5.141
38
+ 5,self_attn.v_proj,0.0000638105,0.05000,5.204
39
+ 5,self_attn.q_proj,0.0008782418,0.05000,5.240
40
+ 5,self_attn.o_proj,0.0000025126,0.05000,1.060
41
+ 5,mlp.up_proj,0.0003865409,0.05000,2.458
42
+ 5,mlp.gate_proj,0.0005401021,0.05000,2.486
43
+ 5,mlp.down_proj,0.0000082887,0.05000,3.048
44
+ 6,self_attn.v_proj,0.0000612041,0.05000,5.136
45
+ 6,self_attn.k_proj,0.0003914881,0.05000,5.177
46
+ 6,self_attn.q_proj,0.0007378610,0.05000,5.211
47
+ 6,self_attn.o_proj,0.0000043394,0.05000,1.034
48
+ 6,mlp.gate_proj,0.0005627311,0.05000,2.457
49
+ 6,mlp.up_proj,0.0004075962,0.05000,2.475
50
+ 6,mlp.down_proj,0.0000099089,0.05000,2.958
51
+ 7,self_attn.k_proj,0.0003767969,0.05000,5.226
52
+ 7,self_attn.v_proj,0.0000546954,0.05000,5.279
53
+ 7,self_attn.q_proj,0.0006362990,0.05000,5.295
54
+ 7,self_attn.o_proj,0.0000061320,0.05000,1.056
55
+ 7,mlp.up_proj,0.0004317052,0.05000,2.434
56
+ 7,mlp.gate_proj,0.0005475797,0.05000,2.443
57
+ 7,mlp.down_proj,0.0000111637,0.05000,3.088
58
+ 8,self_attn.k_proj,0.0004685118,0.05000,5.168
59
+ 8,self_attn.v_proj,0.0000670339,0.05000,5.199
60
+ 8,self_attn.q_proj,0.0007847853,0.05000,5.229
61
+ 8,self_attn.o_proj,0.0000080611,0.05000,1.108
62
+ 8,mlp.up_proj,0.0004457112,0.05000,2.385
63
+ 8,mlp.gate_proj,0.0005756153,0.05000,2.414
64
+ 8,mlp.down_proj,0.0000119709,0.05000,3.063
65
+ 9,self_attn.q_proj,0.0007281643,0.05000,5.169
66
+ 9,self_attn.v_proj,0.0000788263,0.05000,5.243
67
+ 9,self_attn.k_proj,0.0004247850,0.05000,5.248
68
+ 9,self_attn.o_proj,0.0000077604,0.05000,1.016
69
+ 9,mlp.gate_proj,0.0005563830,0.05000,2.452
70
+ 9,mlp.up_proj,0.0004468315,0.05000,2.474
71
+ 9,mlp.down_proj,0.0000119611,0.05000,2.982
72
+ 10,self_attn.v_proj,0.0000621950,0.05000,5.226
73
+ 10,self_attn.k_proj,0.0004504600,0.05000,5.274
74
+ 10,self_attn.q_proj,0.0007390235,0.05000,5.337
75
+ 10,self_attn.o_proj,0.0000074142,0.05000,1.079
76
+ 10,mlp.gate_proj,0.0005601441,0.05000,2.303
77
+ 10,mlp.up_proj,0.0004797459,0.05000,2.322
78
+ 10,mlp.down_proj,0.0000133788,0.05000,3.071
79
+ 11,self_attn.v_proj,0.0000770151,0.05000,5.048
80
+ 11,self_attn.k_proj,0.0003612847,0.05000,5.134
81
+ 11,self_attn.q_proj,0.0006474613,0.05000,5.161
82
+ 11,self_attn.o_proj,0.0000095606,0.05000,1.116
83
+ 11,mlp.up_proj,0.0005172904,0.05000,2.391
84
+ 11,mlp.gate_proj,0.0005825480,0.05000,2.425
85
+ 11,mlp.down_proj,0.0000147894,0.05000,3.155
86
+ 12,self_attn.k_proj,0.0005030657,0.05000,5.040
87
+ 12,self_attn.q_proj,0.0008600825,0.05000,5.172
88
+ 12,self_attn.v_proj,0.0000796377,0.05000,5.186
89
+ 12,self_attn.o_proj,0.0000104103,0.05000,1.071
90
+ 12,mlp.up_proj,0.0005417673,0.05000,2.423
91
+ 12,mlp.gate_proj,0.0006048538,0.05000,2.449
92
+ 12,mlp.down_proj,0.0000159788,0.05000,3.090
93
+ 13,self_attn.k_proj,0.0005679255,0.05000,5.098
94
+ 13,self_attn.q_proj,0.0009026564,0.05000,5.188
95
+ 13,self_attn.v_proj,0.0000920754,0.05000,5.205
96
+ 13,self_attn.o_proj,0.0000111667,0.05000,1.057
97
+ 13,mlp.up_proj,0.0005947397,0.05000,2.364
98
+ 13,mlp.gate_proj,0.0007042694,0.05000,2.385
99
+ 13,mlp.down_proj,0.0000207118,0.05000,3.023
100
+ 14,self_attn.v_proj,0.0001053973,0.05000,5.222
101
+ 14,self_attn.k_proj,0.0004498990,0.05000,5.278
102
+ 14,self_attn.q_proj,0.0009584807,0.05000,5.306
103
+ 14,self_attn.o_proj,0.0000123116,0.05000,1.090
104
+ 14,mlp.up_proj,0.0006279950,0.05000,2.357
105
+ 14,mlp.gate_proj,0.0007503058,0.05000,2.383
106
+ 14,mlp.down_proj,0.0000250014,0.05000,3.074
107
+ 15,self_attn.k_proj,0.0004797868,0.05000,4.990
108
+ 15,self_attn.v_proj,0.0001005991,0.05000,5.166
109
+ 15,self_attn.q_proj,0.0009365002,0.05000,5.185
110
+ 15,self_attn.o_proj,0.0000076168,0.05000,1.067
111
+ 15,mlp.up_proj,0.0006121564,0.05000,2.409
112
+ 15,mlp.gate_proj,0.0007879107,0.05000,2.440
113
+ 15,mlp.down_proj,0.0000245487,0.05000,3.107
114
+ 16,self_attn.v_proj,0.0001098983,0.05000,5.208
115
+ 16,self_attn.q_proj,0.0009610252,0.05000,5.322
116
+ 16,self_attn.k_proj,0.0005322082,0.05000,5.361
117
+ 16,self_attn.o_proj,0.0000051471,0.05000,1.175
118
+ 16,mlp.up_proj,0.0005994866,0.05000,2.891
119
+ 16,mlp.gate_proj,0.0007955586,0.05000,2.940
120
+ 16,mlp.down_proj,0.0000232563,0.05000,3.234
121
+ 17,self_attn.v_proj,0.0001056392,0.05000,5.383
122
+ 17,self_attn.k_proj,0.0004881229,0.05000,5.419
123
+ 17,self_attn.q_proj,0.0009143897,0.05000,5.493
124
+ 17,self_attn.o_proj,0.0000044561,0.05000,1.173
125
+ 17,mlp.gate_proj,0.0008321888,0.05000,2.826
126
+ 17,mlp.up_proj,0.0006171612,0.05000,2.839
127
+ 17,mlp.down_proj,0.0000246102,0.05000,3.241
128
+ 18,self_attn.v_proj,0.0001294144,0.05000,5.402
129
+ 18,self_attn.q_proj,0.0010036130,0.05000,5.491
130
+ 18,self_attn.k_proj,0.0005490085,0.05000,5.539
131
+ 18,self_attn.o_proj,0.0000047686,0.05000,1.202
132
+ 18,mlp.up_proj,0.0006625156,0.05000,2.816
133
+ 18,mlp.gate_proj,0.0008736592,0.05000,2.858
134
+ 18,mlp.down_proj,0.0000259963,0.05000,3.274
135
+ 19,self_attn.q_proj,0.0009206464,0.05000,5.306
136
+ 19,self_attn.v_proj,0.0001300041,0.05000,5.346
137
+ 19,self_attn.k_proj,0.0005263201,0.05000,5.421
138
+ 19,self_attn.o_proj,0.0000081053,0.05000,1.206
139
+ 19,mlp.up_proj,0.0007115615,0.05000,2.476
140
+ 19,mlp.gate_proj,0.0009267995,0.05000,2.501
141
+ 19,mlp.down_proj,0.0000313765,0.05000,3.005
142
+ 20,self_attn.k_proj,0.0005501762,0.05000,5.269
143
+ 20,self_attn.q_proj,0.0009316968,0.05000,5.310
144
+ 20,self_attn.v_proj,0.0001533229,0.05000,5.339
145
+ 20,self_attn.o_proj,0.0000053971,0.05000,1.074
146
+ 20,mlp.gate_proj,0.0008958201,0.05000,2.386
147
+ 20,mlp.up_proj,0.0007214489,0.05000,2.403
148
+ 20,mlp.down_proj,0.0000298560,0.05000,3.048
149
+ 21,self_attn.k_proj,0.0005322746,0.05000,5.115
150
+ 21,self_attn.v_proj,0.0001965262,0.05000,5.151
151
+ 21,self_attn.q_proj,0.0009163225,0.05000,5.171
152
+ 21,self_attn.o_proj,0.0000058034,0.05000,1.071
153
+ 21,mlp.up_proj,0.0007544485,0.05000,2.394
154
+ 21,mlp.gate_proj,0.0009466076,0.05000,2.413
155
+ 21,mlp.down_proj,0.0000325444,0.05000,2.984
156
+ 22,self_attn.v_proj,0.0001977625,0.05000,5.113
157
+ 22,self_attn.q_proj,0.0009122455,0.05000,5.193
158
+ 22,self_attn.k_proj,0.0005100145,0.05000,5.229
159
+ 22,self_attn.o_proj,0.0000058460,0.05000,1.057
160
+ 22,mlp.gate_proj,0.0010337803,0.05000,2.355
161
+ 22,mlp.up_proj,0.0008206790,0.05000,2.376
162
+ 22,mlp.down_proj,0.0000370742,0.05000,2.997
163
+ 23,self_attn.v_proj,0.0001945850,0.05000,5.120
164
+ 23,self_attn.k_proj,0.0005421275,0.05000,5.187
165
+ 23,self_attn.q_proj,0.0009014102,0.05000,5.207
166
+ 23,self_attn.o_proj,0.0000118679,0.05000,1.071
167
+ 23,mlp.gate_proj,0.0011874684,0.05000,2.391
168
+ 23,mlp.up_proj,0.0009013988,0.05000,2.412
169
+ 23,mlp.down_proj,0.0000456437,0.05000,3.112
170
+ 24,self_attn.k_proj,0.0006013965,0.05000,5.117
171
+ 24,self_attn.q_proj,0.0009966169,0.05000,5.178
172
+ 24,self_attn.v_proj,0.0002814000,0.05000,5.192
173
+ 24,self_attn.o_proj,0.0000167604,0.05000,1.067
174
+ 24,mlp.up_proj,0.0009939129,0.05000,2.419
175
+ 24,mlp.gate_proj,0.0013279702,0.05000,2.446
176
+ 24,mlp.down_proj,0.0000607813,0.05000,3.056
177
+ 25,self_attn.k_proj,0.0004979556,0.05000,5.164
178
+ 25,self_attn.q_proj,0.0009840556,0.05000,5.252
179
+ 25,self_attn.v_proj,0.0002535410,0.05000,5.264
180
+ 25,self_attn.o_proj,0.0000184899,0.05000,1.043
181
+ 25,mlp.gate_proj,0.0014792334,0.05000,2.438
182
+ 25,mlp.up_proj,0.0011075135,0.05000,2.447
183
+ 25,mlp.down_proj,0.0000877130,0.05000,3.067
184
+ 26,self_attn.v_proj,0.0003436589,0.05000,5.014
185
+ 26,self_attn.q_proj,0.0008804976,0.05000,5.161
186
+ 26,self_attn.k_proj,0.0005433848,0.05000,5.189
187
+ 26,self_attn.o_proj,0.0000359726,0.05000,1.044
188
+ 26,mlp.up_proj,0.0011477621,0.05000,2.349
189
+ 26,mlp.gate_proj,0.0015504545,0.05000,2.368
190
+ 26,mlp.down_proj,0.0001262205,0.05000,3.025
191
+ 27,self_attn.v_proj,0.0002239405,0.05000,5.051
192
+ 27,self_attn.k_proj,0.0003883892,0.05000,5.105
193
+ 27,self_attn.q_proj,0.0007127493,0.05000,5.130
194
+ 27,self_attn.o_proj,0.0000817960,0.05000,1.054
195
+ 27,mlp.up_proj,0.0012877010,0.05000,2.463
196
+ 27,mlp.gate_proj,0.0015451877,0.05000,2.483
197
+ 27,mlp.down_proj,0.0004653256,0.05000,3.056
quantize_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 4,
3
+ "group_size": 128,
4
+ "desc_act": false,
5
+ "lm_head": false,
6
+ "quant_method": "gptq",
7
+ "checkpoint_format": "gptq",
8
+ "pack_dtype": "int32",
9
+ "meta": {
10
+ "quantizer": [
11
+ "gptqmodel:5.7.0"
12
+ ],
13
+ "uri": "https://github.com/modelcloud/gptqmodel",
14
+ "damp_percent": 0.05,
15
+ "damp_auto_increment": 0.01,
16
+ "static_groups": false,
17
+ "true_sequential": true,
18
+ "mse": 0.0,
19
+ "gptaq": null,
20
+ "act_group_aware": true,
21
+ "failsafe": {
22
+ "strategy": "rtn",
23
+ "threshold": "0.5%",
24
+ "smooth": {
25
+ "type": "mad",
26
+ "group_size_threshold": 128,
27
+ "k": 2.75
28
+ }
29
+ },
30
+ "offload_to_disk": true,
31
+ "offload_to_disk_path": "./gptqmodel_offload/znzwasrn-gqcllywg/",
32
+ "pack_impl": "cpu",
33
+ "mock_quantization": false,
34
+ "gc_mode": "interval",
35
+ "wait_for_submodule_finalizers": false,
36
+ "auto_forward_data_parallel": true,
37
+ "hessian": {
38
+ "chunk_size": null,
39
+ "chunk_bytes": null,
40
+ "staging_dtype": "float32"
41
+ },
42
+ "vram_strategy": "exclusive"
43
+ },
44
+ "sym": true,
45
+ "format": "gptq"
46
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c85066e7642934ed09b44155e6566b0b5dab2637fb9433439ba5c9c7f8b50d3
3
+ size 17210018
tokenizer_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<|begin_of_text|>",
4
+ "clean_up_tokenization_spaces": true,
5
+ "eos_token": "<|eot_id|>",
6
+ "is_local": false,
7
+ "model_input_names": [
8
+ "input_ids",
9
+ "attention_mask"
10
+ ],
11
+ "model_max_length": 131072,
12
+ "tokenizer_class": "TokenizersBackend"
13
+ }