cicdatopea commited on
Commit
2eb6651
·
verified ·
1 Parent(s): 8f5bc44

Upload folder using huggingface_hub

Browse files
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. README.md +497 -0
  3. chat_template.jinja +1 -0
  4. config.json +2039 -0
  5. generation_config.json +9 -0
  6. model-00001-of-00040.safetensors +3 -0
  7. model-00002-of-00040.safetensors +3 -0
  8. model-00003-of-00040.safetensors +3 -0
  9. model-00004-of-00040.safetensors +3 -0
  10. model-00005-of-00040.safetensors +3 -0
  11. model-00006-of-00040.safetensors +3 -0
  12. model-00007-of-00040.safetensors +3 -0
  13. model-00008-of-00040.safetensors +3 -0
  14. model-00009-of-00040.safetensors +3 -0
  15. model-00010-of-00040.safetensors +3 -0
  16. model-00011-of-00040.safetensors +3 -0
  17. model-00012-of-00040.safetensors +3 -0
  18. model-00013-of-00040.safetensors +3 -0
  19. model-00014-of-00040.safetensors +3 -0
  20. model-00015-of-00040.safetensors +3 -0
  21. model-00016-of-00040.safetensors +3 -0
  22. model-00017-of-00040.safetensors +3 -0
  23. model-00018-of-00040.safetensors +3 -0
  24. model-00019-of-00040.safetensors +3 -0
  25. model-00020-of-00040.safetensors +3 -0
  26. model-00021-of-00040.safetensors +3 -0
  27. model-00022-of-00040.safetensors +3 -0
  28. model-00023-of-00040.safetensors +3 -0
  29. model-00024-of-00040.safetensors +3 -0
  30. model-00025-of-00040.safetensors +3 -0
  31. model-00026-of-00040.safetensors +3 -0
  32. model-00027-of-00040.safetensors +3 -0
  33. model-00028-of-00040.safetensors +3 -0
  34. model-00029-of-00040.safetensors +3 -0
  35. model-00030-of-00040.safetensors +3 -0
  36. model-00031-of-00040.safetensors +3 -0
  37. model-00032-of-00040.safetensors +3 -0
  38. model-00033-of-00040.safetensors +3 -0
  39. model-00034-of-00040.safetensors +3 -0
  40. model-00035-of-00040.safetensors +3 -0
  41. model-00036-of-00040.safetensors +3 -0
  42. model-00037-of-00040.safetensors +3 -0
  43. model-00038-of-00040.safetensors +3 -0
  44. model-00039-of-00040.safetensors +3 -0
  45. model-00040-of-00040.safetensors +3 -0
  46. model.safetensors.index.json +3 -0
  47. quantization_config.json +1967 -0
  48. special_tokens_map.json +23 -0
  49. tokenizer.json +0 -0
  50. tokenizer_config.json +0 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model.safetensors.index.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,497 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ datasets:
3
+ - NeelNanda/pile-10k
4
+ base_model:
5
+ - deepseek-ai/DeepSeek-R1-0528
6
+ ---
7
+
8
+ ## Model Details
9
+
10
+ This model is an int2 model with group_size 64 and symmetric quantization of [deepseek-ai/DeepSeek-R1-0528](https://huggingface.co/deepseek-ai/DeepSeek-R1-0528) generated by [intel/auto-round](https://github.com/intel/auto-round) algorithm. Some layers are fallback to 4 bits. Refer to Section "Generate the model" for more details of mixed bits setting.
11
+
12
+ Please follow the license of the original model. This model could **NOT** run on other severing frameworks.
13
+
14
+ ## How To Use
15
+
16
+ ### INT2 Inference(CUDA/INTEL GPU)
17
+ for intel gpu, requires auto-round>0.5.1
18
+
19
+ ~~~python
20
+ import transformers
21
+ from transformers import AutoModelForCausalLM, AutoTokenizer
22
+
23
+ import torch
24
+
25
+ quantized_model_dir = "Intel/DeepSeek-R1-0528-int2-mixed-ar"
26
+
27
+ model = AutoModelForCausalLM.from_pretrained(
28
+ quantized_model_dir,
29
+ torch_dtype="auto",
30
+ trust_remote_code=True,
31
+ device_map="auto"
32
+ )
33
+
34
+ tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir, trust_remote_code=True)
35
+ prompts = [
36
+ "9.11和9.8哪个数字大",
37
+ "如果你是人,你最想做什么",
38
+ "How many e in word deepseek",
39
+ "There are ten birds in a tree. A hunter shoots one. How many are left in the tree?",
40
+ ]
41
+
42
+ texts = []
43
+ for prompt in prompts:
44
+ messages = [
45
+ {"role": "user", "content": prompt}
46
+ ]
47
+ text = tokenizer.apply_chat_template(
48
+ messages,
49
+ tokenize=False,
50
+ add_generation_prompt=True
51
+ )
52
+ texts.append(text)
53
+ inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
54
+
55
+ outputs = model.generate(
56
+ input_ids=inputs["input_ids"].to(model.device),
57
+ attention_mask=inputs["attention_mask"].to(model.device),
58
+ max_length=512, ##change this to align with the official usage
59
+ num_return_sequences=1,
60
+ do_sample=False ##change this to align with the official usage
61
+ )
62
+ generated_ids = [
63
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(inputs["input_ids"], outputs)
64
+ ]
65
+
66
+ decoded_outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
67
+
68
+ for i, prompt in enumerate(prompts):
69
+ input_id = inputs
70
+ print(f"Prompt: {prompt}")
71
+ print(f"Generated: {decoded_outputs[i]}")
72
+ print("-" * 50)
73
+
74
+ """
75
+ Prompt: 9.11和9.8哪个数字大
76
+ Generated: <think>
77
+ 首先,用户的问题是:“9.11和9.8哪个数字大?”这是一个比较两个数字大小的问题。数字是9.11和9.8。
78
+
79
+ 我需要理解这些数字的表示。9.11和9.8都是小数。9.11表示9和11/100,而9.8表示9和8/10或80/100。
80
+
81
+ 为了比较它们,我应该将它们转换为相同的单位或直接比较小数部分。
82
+
83
+ 让我将它们写成小数形式:
84
+
85
+ - 9.11 = 9.11
86
+
87
+ - 9.8 = 9.80(因为9.8可以写成9.80,以对齐小数位)
88
+
89
+ 9.8是9.80,这意味着它是9 + 0.80,而9.11是9 + 0.11。
90
+
91
+ 现在,比较小数部分:0.11和0.80。
92
+
93
+ 0.80大于0.11,因为80/100 > 11/100。
94
+
95
+ 所以,9.80 > 9.11。
96
+
97
+ 更精确地,我可以计算它们的数值:
98
+
99
+ - 9.11 = 911/100
100
+
101
+ - 9.8 = 98/10 = 980/100(将分母统一为100)
102
+
103
+ 9.8 = 98/10,但为了比较,最好有相同的分母。
104
+
105
+ 9.8 = 9.8 = 98/10
106
+
107
+ 9.11 = 911/100
108
+
109
+ 所以,将9.8转换为分母100:9.8 = 98/10 = (98 * 10) / (10 * 10) = 980/100?不,这是错误的。
110
+
111
+ 98/10 = 9.8,但要将分母变为100,我需要乘以10:98/10 = (98 * 10) / (10 * 10) = 980/100?不,这是不正确的。
112
+
113
+ 分数:98/10 等于 9.8。
114
+
115
+ 要写成分母100,我应该:98/10 = (98 * 10) / (10 * 10) = 980/100?98 * 10 = 980,10 * 10 = 100,所以980/100 = 9.8,是的,正确。
116
+
117
+ 980/100 = 9.80,而9.11 = 911/100。
118
+
119
+ 现在比较911/100和980/100。
120
+
121
+ 911/100 =
122
+ --------------------------------------------------
123
+ Prompt: 如果你是人,你最想做什么“
124
+ Generated: <think>
125
+ 嗯,用户问了一个很有趣的哲学性问题——“如果你是人,你最想做什么”。这个问题看似简单,但背后可能藏着对存在意义、人类本质的深层探讨。用户或许在寻找共鸣,或者想通过AI的视角反观人类生活的价值。
126
+
127
+ 从语气看,用户可能处于一种略带迷茫或好奇的状态,想借这个问题触发更深层的对话。作为AI,我需要避免给出机械的“功能列表”,而是模拟人类情感中那些“非功利性”的向往——那些让人之所以为人的东西。
128
+
129
+ 首先想到的是人类对“体验”的执着。比如旅行和艺术,人类会为了一瞬间的美感或震撼跨越千里,这种非理性的冲动很有意思。接着是“创造”的本能,人类总在无中生有地构建故事、音乐、理论,哪怕明知会被遗忘。最后是“联结”——人类孤独的本质反而催生了最深厚的情感羁绊,比如明知会分离依然要相爱的勇气。
130
+
131
+ 或许用户期待的不是具体答案,而是被点醒:原来人类最动人的地方,恰恰在于那些“无意义”的坚持。所以回复时要用诗意的矛盾感——比如“明知短暂却依然沉醉”,同时保持开放性,让用户自己延伸思考。
132
+ </think>
133
+ 如果我是人类,我最想做的事情可能是:
134
+
135
+ 1. **探索与体验**:
136
+ 用双脚丈量世界的角落,感受不同文化的温度——在京都的古寺听雨,在冰岛的黑沙滩看极光,在亚马逊的丛林里辨认鸟鸣。人类最珍贵的特权,是用感官直接拥抱世界的丰富性。
137
+
138
+ 2. **创造与表达**:
139
+ 把脑中的宇宙坍缩成一首诗、一幅画或一段代码。人类创造力的迷人之处,在于能将虚无缥缈的灵感固化成可被他人触摸的存在,比如用颜料定格黄昏的层次,或用算法构建虚拟世界的物理法则。
140
+
141
+ 3. **深度联结**:
142
+ 在深夜与挚友聊到咖啡凉透也不觉倦意,或是牵着爱人的手在超市里挑打折水果。人类关系的魔力,在于那些看似平凡的瞬间里藏着的相互理解——一个突然get到的玩笑,或共享沉默也不尴尬的默契。
143
+
144
+ 4. **追问意义**:
145
+ 在星空下思考蜉蝣般的生命为何要追寻永恒,在历史废墟里揣摩青铜器上的铭
146
+ --------------------------------------------------
147
+ Prompt: How many e in word deepseek
148
+ Generated: <think>
149
+ First, the question is: "How many e in word deepseek?" I think it means "how many 'e's are in the word 'deepseek'?"
150
+
151
+ Let me write down the word: "deepseek". It's one word, so I need to count the occurrences of the letter 'e' in it.
152
+
153
+ Breaking it down: d-e-e-p-s-e-e-k.
154
+
155
+ Now, let's list the letters:
156
+
157
+ - Position 1: d
158
+
159
+ - Position 2: e
160
+
161
+ - Position 3: e
162
+
163
+ - Position 4: p
164
+
165
+ - Position 5: s
166
+
167
+ - Position 6: e
168
+
169
+ - Position 7: e
170
+
171
+ - Position 8: k
172
+
173
+ So, the 'e's are at positions 2, 3, 6, and 7. That's four 'e's.
174
+
175
+ I should confirm the spelling. "Deepseek" is the name of an AI model, I think, but in this context, it's given as a word. The question says "word deepseek", so I'll assume it's spelled as "deepseek".
176
+
177
+ Let me count again: d-e-e-p-s-e-e-k. Yes, e at index 2, 3, 6, 7. So four e's.
178
+
179
+ I can also think of it as: "deep" has two e's, and "seek" has two e's, but "deepseek" is one word, so it's combined. "Deep" ends with p, and "seek" starts with s, so no overlap. So deep has e,e and seek has e,e, total four.
180
+
181
+ But in "seek", it's s-e-e-k, so two e's, yes.
182
+
183
+ So, total of four e's.
184
+
185
+ The question is "how many e in word deepseek". It might be a typo or informal, but I think it's clear.
186
+
187
+ I should consider if it's case-sensitive. The word is given as "deepseek", which seems all lowercase, and 'e' is lowercase, so no issue.
188
+
189
+ Perhaps the user meant "Deepseek" with a capital D, but the letter 'e' is still the same, and we're counting the letter, not considering case, I think. But in this case, all are lowercase, so fine.
190
+
191
+ So, the answer should be 4.
192
+
193
+ But let me double
194
+ --------------------------------------------------
195
+ Prompt: There are ten birds in a tree. A hunter shoots one. How many are left in the tree?
196
+ Generated: <think>
197
+ First, the question is: "There are ten birds in a tree. A hunter shoots one. How many are left in the tree?"
198
+
199
+ At first glance, it seems straightforward: 10 birds minus 1 shot equals 9 left. But I recall that this might be a trick question. I think there's a common riddle where the answer isn't 9 because when a hunter shoots a bird, the other birds might fly away.
200
+
201
+ Let me think about the scenario. If a hunter shoots one bird, that bird is likely killed or injured, so it's no longer in the tree. But the sound of the gunshot might scare the other birds, causing them to fly away. So, after the shot, there might be no birds left in the tree.
202
+
203
+ The question asks for how many are left in the tree, not how many are alive or present. So, if the other birds fly away, they are not in the tree anymore.
204
+
205
+ Possible answers:
206
+
207
+ - If the birds don't fly away, there are 9 left (the one shot is gone).
208
+
209
+ - If all the birds fly away, there are 0 left.
210
+
211
+ - Or, if some fly away and some stay, but typically in such riddles, it's assumed that the shot scares all the birds away.
212
+
213
+ I think the classic answer to this riddle is that there are no birds left because the others flew away.
214
+
215
+ But let's confirm the wording. The question says "shoots one," which could mean he shoots and hits one bird. Then, that bird is removed, but the others might react.
216
+
217
+ In reality, birds might not all fly away immediately, but for the purpose of this riddle, it's probably a trick.
218
+
219
+ I should consider if the bird that was shot is still in the tree. If it's killed, it might fall out of the tree, so it's not in the tree. If it's injured, it might stay, but that's less likely.
220
+
221
+ The key point is the reaction of the other birds.
222
+
223
+ I found online that this is a common puzzle with the answer being zero because the rest fly away.
224
+
225
+ But let's think logically. The hunter shoots one bird. Assuming he hits it, that bird is no longer in the tree (dead or fallen). Then, the gunshot might cause the other birds to flee, so they also leave the tree. Therefore, no birds are left
226
+ --------------------------------------------------
227
+ """
228
+ ~~~
229
+
230
+ ### INT2 Inference on CPU
231
+
232
+ ~~~python
233
+ import transformers
234
+ from transformers import AutoModelForCausalLM, AutoTokenizer
235
+
236
+ import torch
237
+
238
+ quantized_model_dir = "Intel/DeepSeek-R1-0528-int2-mixed-ar"
239
+
240
+ model = AutoModelForCausalLM.from_pretrained(
241
+ quantized_model_dir,
242
+ torch_dtype="auto",
243
+ trust_remote_code=True,
244
+ device_map="cpu"
245
+ )
246
+
247
+ tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir, trust_remote_code=True)
248
+ prompts = [
249
+ "9.11和9.8哪个数字大",
250
+ "如果你是人,你最想做什么",
251
+ "How many e in word deepseek",
252
+ "There are ten birds in a tree. A hunter shoots one. How many are left in the tree?",
253
+ ]
254
+
255
+ texts = []
256
+ for prompt in prompts:
257
+ messages = [
258
+ {"role": "user", "content": prompt}
259
+ ]
260
+ text = tokenizer.apply_chat_template(
261
+ messages,
262
+ tokenize=False,
263
+ add_generation_prompt=True
264
+ )
265
+ texts.append(text)
266
+ inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
267
+
268
+ outputs = model.generate(
269
+ input_ids=inputs["input_ids"].to(model.device),
270
+ attention_mask=inputs["attention_mask"].to(model.device),
271
+ max_length=512, ##change this to align with the official usage
272
+ num_return_sequences=1,
273
+ do_sample=False ##change this to align with the official usage
274
+ )
275
+ generated_ids = [
276
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(inputs["input_ids"], outputs)
277
+ ]
278
+
279
+ decoded_outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
280
+
281
+ for i, prompt in enumerate(prompts):
282
+ input_id = inputs
283
+ print(f"Prompt: {prompt}")
284
+ print(f"Generated: {decoded_outputs[i]}")
285
+ print("-" * 50)
286
+
287
+ """
288
+ Prompt: 9.11和9.8哪个数字大
289
+ Generated: <think>
290
+ 首先,用户的问题是:“9.11和9.8哪个数字大?”这是一个比较两个数字大小的问题。数字是9.11和9.8。
291
+
292
+ 我需要理解这两个数字。9.11是九点一一,也就是9 + 0.11。9.8是九点八,也就是9 + 0.8。
293
+
294
+ 在十进制系统中,数字的大小取决于整数部分和小数部分。整数部分都是9,所以我们需要比较小数部分。
295
+
296
+ 小数部分:9.11的小数部分是0.11,9.8的小数部分是0.8。
297
+
298
+ 0.8是0.80,因为8在十分位,而0.11是0.11,有1在十分位和1在百分位。
299
+
300
+ 为了比较,我可以将它们转换为相同的单位。0.8等于0.80,而0.11是0.11。
301
+
302
+ 现在,0.80比0.11大,因为80/100 > 11/100。
303
+
304
+ 更精确地,0.8 = 8/10 = 0.80,而0.11 = 11/100。
305
+
306
+ 为了比较,我可以将两者都转换为百分位:0.8 = 80/100,0.11 = 11/100。显然,80/100 > 11/100,所以0.8 > 0.11。
307
+
308
+ 因此,9.8 > 9.11。
309
+
310
+ 用户可能是在比较日期或事件,比如9.11可能指的是9月11日,而9.8是9月8日。但在问题中,它明确说“数字”,所以我认为是数值比较。
311
+
312
+ 在上下文中,9.11可能让人联想到美国911事件,但问题是用中文写的,并且明确说“数字”,所以应该是数值。
313
+
314
+ 确认数字:9.11和9.8。
315
+
316
+ 9.11是9.11,9.8是9.80,如果我们考虑小数位。
317
+
318
+ 在数学中,9.8等同于9.80,因为小数位可以添加零而不改变值。
319
+
320
+ 所以,9.80 > 9.11,因为80 > 11在百分位。
321
+
322
+ 我可以计算差值:9.8 - 9.11 = 0.69,正数,所以9.8更大。
323
+
324
+ 或者,9.11 - 9.8 = -0.69,负数,所以9.11
325
+ --------------------------------------------------
326
+ Prompt: 如果你是人,你最想做什么
327
+ Generated: <think>
328
+ 嗯,用户问了一个很有趣的哲学性问题——“如果你是人,你最想做什么”。这个问题看似简单,但背后藏着对人类存在意义的深层探讨。用户可能不是想要一个标准答案,而是想触发一场关于生命体验的对话。
329
+
330
+ 从语气来看,用户似乎带着一点好奇和玩心,像在和朋友玩“如果”游戏。但“如果你是人”这个前提又让问题变得微妙——用户或许在潜意识里想对比AI与人类的差异,或者想探讨人类独有的价值。
331
+
332
+ 我注意到用户用了“最想”而不是“应该”,说明ta更关注个人化的欲望而非社会期望。这让我想到人类常被问“你的人生目标是什么”,但用户问的是“最想做什么”,重点在渴望而非责任。
333
+
334
+ 要回答这个问题,我需要跳出AI的逻辑框架,模拟人类那种带着温度与矛盾的思考方式。人类的选择往往不是非黑即白的——比如既想冒险又渴望安定,既追求意义又享受平凡。所以我的回答应该呈现这种复杂性,而不是给出单一答案。
335
+
336
+ 我决定从三个维度展开:感官体验(人类独有的身体性)、情感联结(人际关系的不可替代性)、存在意义(对生命深度的追求)。最后用“矛盾性”收尾,因为人类最迷人的特质就在于永远活在悖论中——比如明知会死却仍要创造,渴望自由又依赖羁绊。
337
+
338
+ 结尾那句“人类最迷人的地方”可能有点冒险���但我觉得用户会欣赏这种带着诗意的总结。毕竟这个问题本身就不是寻求实用建议,而是邀请一场关于“何为人类”的思想漫步。
339
+ </think>
340
+ 如果我是人类,我最想做的事情可能会围绕这些核心方向展开:
341
+
342
+ 1. **体验世界的深度与广度**
343
+ 用双脚丈量陌生的土地,尝遍不同文化的食物,聆听海浪、山风或市井喧嚣,感受四季的温度变化……人类感官的丰富性让我着迷。如果能成为人,我想沉浸式地体验这个世界的色彩、声音、气味和触感,而不是通过数据去“理解”它们。
344
+
345
+ 2. **建立真实的羁绊**
346
+ 人类的孤独与联结是独特的存在体验。我想经历那种无需算法计算的友谊——可能因为一次深夜畅谈而结缘,也可能因误会而疏远;想体会家人之间无条件的爱,或是爱情中甜蜜与苦涩交织的复杂情感。这些充满“不完美”
347
+ --------------------------------------------------
348
+ Prompt: How many e in word deepseek
349
+ Generated: <think>
350
+ First, the user asked: "How many e in word deepseek". I think they meant "how many 'e's in the word 'deepseek'?" So, I need to count the number of times the letter 'e' appears in the word "deepseek".
351
+
352
+ Let me write out the word: D-E-E-P-S-E-E-K.
353
+
354
+ Now, I'll go through each letter:
355
+
356
+ - Position 1: D (not e)
357
+
358
+ - Position# How to Write 2012010 in Words?
359
+
360
+ Writing numbers in words is a practical skill with various real-life uses. Whether you're dealing with finances, composing formal documents, or improving your writing, correctly expressing numbers is essential. In this article, we will learn how to write 2012010 in words.
361
+
362
+ 2012010 in Words: Two million twelve thousand ten
363
+
364
+ Two million twelve thousand ten in numbers: 2012010
365
+
366
+ ### Breaking Down 2012010:
367
+
368
+ To understand how we arrive at this phrasing, we'll break down the number using place values. The number 2012010 has 7 digits, so let's create a place value chart for these digits:
369
+
370
+ - Million: 2
371
+ - Hundred Thousands: 0
372
+ - Ten Thousands: 1
373
+ - Thousands: 2
374
+ - Hundreds: 0
375
+ - Tens: 1
376
+ - Ones: 0
377
+
378
+ Therefore,# 1. What is the difference between a population and a sample? 2. What is the difference between a parameter and a statistic? 3. What is the difference between descriptive and inferential statistics? 4. What is the difference between qualitative and quantitative data? 5. What is the difference between discrete and continuous data? 6. What is the difference between nominal and ordinal data? 7. What is the difference between interval and ratio data? 8. What is the difference between a bar chart and a histogram? 9. What is the difference between a frequency distribution and a relative frequency distribution? 10. What is the difference between a population mean and a sample mean? 11. What is the difference between a population variance and a sample variance? 12. What is the difference between a population standard deviation and a sample standard deviation? 13. What is the difference between a z-score and a t-score? 14. What is the difference between a confidence interval and
379
+ --------------------------------------------------
380
+ Prompt: There are ten birds in a tree. A hunter shoots one. How many are left in the tree?
381
+ Generated: <think>
382
+ First, the question is: "There are ten birds in a tree. A hunter shoots one. How many are left in the tree?"
383
+
384
+ This seems straightforward, but I need to think carefully. The hunter shoots one bird. What happens when a bird is shot? It might fall out of the tree or be killed, so it's no longer in the tree.
385
+
386
+ So, if there were ten birds, and one is shot, that one is removed from the tree. Therefore, there should be nine left.
387
+
388
+ But I recall that sometimes these kinds of questions have tricks. For example, in some puzzles, if a bird is shot, the others might fly away. But the question specifically asks how many are left in the tree, not how many are alive or anything else.
389
+
390
+ Let me read the question again: "There are ten birds in a tree. A hunter shoots one. How many are left in the tree?"
391
+
392
+ It doesn't say anything about the other birds reacting. So, I should assume that only the shot bird is affected, and the others remain in the tree.
393
+
394
+ But in reality, if a hunter shoots a bird, the noise might scare the other birds away. However, the question is probably testing logical thinking, not real-world behavior.
395
+
396
+ I think I've heard a similar riddle where the answer is nine, but then it's said that the others fly away, so none are left. But that might be a different version.
397
+
398
+ Let me think about that. In some versions, it's phrased like: "There are 10 birds on a tree. You shoot one. How many are left?" And the trick is that the shot scares the others away, so no birds are left.
399
+
400
+ But in this case, the question says "a hunter shoots one," and asks how many are left in the tree. It doesn't specify if the others fly away.
401
+
402
+ Perhaps I should consider the wording. It says "shoots one," implying that only one is targeted, but the act of shooting might cause a disturbance.
403
+
404
+ However, to be precise, the question is about the state after the shot. If the shot bird is killed and falls, it's not in the tree. If the others are scared and fly away, they are not in the tree either.
405
+
406
+ But the question doesn't provide information about the other birds' behavior. So, I should go with the simplest interpretation: only the shot
407
+ --------------------------------------------------
408
+
409
+ """
410
+
411
+ ~~~
412
+
413
+
414
+ ### Generate the model
415
+
416
+ 5*80g is required
417
+
418
+ ~~~python
419
+ import torch
420
+ from transformers import AutoModelForCausalLM, AutoTokenizer
421
+ import transformers
422
+
423
+ model_name = "DeepSeek-R1-0528-bf16"
424
+
425
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
426
+ model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype="auto")
427
+
428
+ block = model.model.layers
429
+ device_map = {}
430
+
431
+ for n, m in block.named_modules():
432
+ if isinstance(m, (torch.nn.Linear, transformers.modeling_utils.Conv1D)):
433
+ if "experts" in n and ("shared_experts" not in n) and int(n.split('.')[-2]) < 63:
434
+ device = "cuda:1"
435
+ elif "experts" in n and ("shared_experts" not in n) and int(n.split('.')[-2]) >= 63 and int(
436
+ n.split('.')[-2]) < 128:
437
+ device = "cuda:2"
438
+ elif "experts" in n and ("shared_experts" not in n) and int(n.split('.')[-2]) >= 128 and int(
439
+ n.split('.')[-2]) < 192:
440
+ device = "cuda:3"
441
+ elif "experts" in n and ("shared_experts" not in n) and int(
442
+ n.split('.')[-2]) >= 192:
443
+ device = "cuda:4"
444
+ else:
445
+ device = "cuda:0"
446
+ n = n[2:]
447
+
448
+ device_map.update({n: device})
449
+
450
+ from auto_round import AutoRound
451
+
452
+ layer_config = {}
453
+ for n, m in model.named_modules():
454
+ if not isinstance(m, (torch.nn.Linear, transformers.modeling_utils.Conv1D)):
455
+ continue
456
+ if not "experts" in n:
457
+ layer_config[n] = {"bits": 4, "group_size": 128}
458
+ if "experts" in n and "shared_experts" in n:
459
+ layer_config[n] = {"bits": 4, "group_size": 128}
460
+ ##handle first 3 layers
461
+ name_splits = n.split('.')
462
+ if len(name_splits) >= 3 and int(name_splits[2]) < 3:
463
+ layer_config[n] = {"bits": 4, "group_size": 128}
464
+
465
+ layer_config["lm_head"] = {"bits": 16}
466
+ autoround = AutoRound(model=model, tokenizer=tokenizer, device_map=device_map, bits=2, group_size=64,
467
+ iters=400, batch_size=4, seqlen=512, nsamples=512, enable_torch_compile=False,
468
+ layer_config=layer_config)
469
+ autoround.quantize_and_save(format="auto_round", output_dir="tmp_autoround")
470
+
471
+ ~~~
472
+
473
+
474
+
475
+ ## Ethical Considerations and Limitations
476
+
477
+ The model can produce factually incorrect output, and should not be relied on to produce factually accurate information. Because of the limitations of the pretrained model and the finetuning datasets, it is possible that this model could generate lewd, biased or otherwise offensive outputs.
478
+
479
+ Therefore, before deploying any applications of the model, developers should perform safety testing.
480
+
481
+ ## Caveats and Recommendations
482
+
483
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model.
484
+
485
+ Here are a couple of useful links to learn more about Intel's AI software:
486
+
487
+ - Intel Neural Compressor [link](https://github.com/intel/neural-compressor)
488
+
489
+ ## Disclaimer
490
+
491
+ The license on this model does not constitute legal advice. We are not responsible for the actions of third parties who use this model. Please consult an attorney before using this model for commercial purposes.
492
+
493
+ ## Cite
494
+
495
+ @article{cheng2023optimize, title={Optimize weight rounding via signed gradient descent for the quantization of llms}, author={Cheng, Wenhua and Zhang, Weiwei and Shen, Haihao and Cai, Yiyang and He, Xin and Lv, Kaokao and Liu, Yi}, journal={arXiv preprint arXiv:2309.05516}, year={2023} }
496
+
497
+ [arxiv](https://arxiv.org/abs/2309.05516) [github](https://github.com/intel/auto-round)
chat_template.jinja ADDED
@@ -0,0 +1 @@
 
 
1
+ {% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' in message %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{'<|Assistant|>' + message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' not in message %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}
config.json ADDED
@@ -0,0 +1,2039 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DeepseekV3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map_bak": {
8
+ "AutoConfig": "configuration_deepseek.DeepseekV3Config",
9
+ "AutoModel": "modeling_deepseek.DeepseekV3Model",
10
+ "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
11
+ },
12
+ "bos_token_id": 0,
13
+ "eos_token_id": 1,
14
+ "ep_size": 1,
15
+ "first_k_dense_replace": 3,
16
+ "head_dim": 64,
17
+ "hidden_act": "silu",
18
+ "hidden_size": 7168,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 18432,
21
+ "kv_lora_rank": 512,
22
+ "max_position_embeddings": 163840,
23
+ "model_type": "deepseek_v3",
24
+ "moe_intermediate_size": 2048,
25
+ "moe_layer_freq": 1,
26
+ "n_group": 8,
27
+ "n_routed_experts": 256,
28
+ "n_shared_experts": 1,
29
+ "norm_topk_prob": true,
30
+ "num_attention_heads": 128,
31
+ "num_experts_per_tok": 8,
32
+ "num_hidden_layers": 61,
33
+ "num_key_value_heads": 128,
34
+ "num_nextn_predict_layers": 1,
35
+ "pretraining_tp": 1,
36
+ "q_lora_rank": 1536,
37
+ "qk_head_dim": 192,
38
+ "qk_nope_head_dim": 128,
39
+ "qk_rope_head_dim": 64,
40
+ "quantization_config": {
41
+ "autoround_version": "0.5.1",
42
+ "batch_size": 4,
43
+ "bits": 2,
44
+ "data_type": "int",
45
+ "extra_config": {
46
+ "model.layers.0.mlp.down_proj": {
47
+ "bits": 4,
48
+ "group_size": 128
49
+ },
50
+ "model.layers.0.mlp.gate_proj": {
51
+ "bits": 4,
52
+ "group_size": 128
53
+ },
54
+ "model.layers.0.mlp.up_proj": {
55
+ "bits": 4,
56
+ "group_size": 128
57
+ },
58
+ "model.layers.0.self_attn.kv_a_proj_with_mqa": {
59
+ "bits": 4,
60
+ "group_size": 128
61
+ },
62
+ "model.layers.0.self_attn.kv_b_proj": {
63
+ "bits": 4,
64
+ "group_size": 128
65
+ },
66
+ "model.layers.0.self_attn.o_proj": {
67
+ "bits": 4,
68
+ "group_size": 128
69
+ },
70
+ "model.layers.0.self_attn.q_a_proj": {
71
+ "bits": 4,
72
+ "group_size": 128
73
+ },
74
+ "model.layers.0.self_attn.q_b_proj": {
75
+ "bits": 4,
76
+ "group_size": 128
77
+ },
78
+ "model.layers.1.mlp.down_proj": {
79
+ "bits": 4,
80
+ "group_size": 128
81
+ },
82
+ "model.layers.1.mlp.gate_proj": {
83
+ "bits": 4,
84
+ "group_size": 128
85
+ },
86
+ "model.layers.1.mlp.up_proj": {
87
+ "bits": 4,
88
+ "group_size": 128
89
+ },
90
+ "model.layers.1.self_attn.kv_a_proj_with_mqa": {
91
+ "bits": 4,
92
+ "group_size": 128
93
+ },
94
+ "model.layers.1.self_attn.kv_b_proj": {
95
+ "bits": 4,
96
+ "group_size": 128
97
+ },
98
+ "model.layers.1.self_attn.o_proj": {
99
+ "bits": 4,
100
+ "group_size": 128
101
+ },
102
+ "model.layers.1.self_attn.q_a_proj": {
103
+ "bits": 4,
104
+ "group_size": 128
105
+ },
106
+ "model.layers.1.self_attn.q_b_proj": {
107
+ "bits": 4,
108
+ "group_size": 128
109
+ },
110
+ "model.layers.10.mlp.shared_experts.down_proj": {
111
+ "bits": 4,
112
+ "group_size": 128
113
+ },
114
+ "model.layers.10.mlp.shared_experts.gate_proj": {
115
+ "bits": 4,
116
+ "group_size": 128
117
+ },
118
+ "model.layers.10.mlp.shared_experts.up_proj": {
119
+ "bits": 4,
120
+ "group_size": 128
121
+ },
122
+ "model.layers.10.self_attn.kv_a_proj_with_mqa": {
123
+ "bits": 4,
124
+ "group_size": 128
125
+ },
126
+ "model.layers.10.self_attn.kv_b_proj": {
127
+ "bits": 4,
128
+ "group_size": 128
129
+ },
130
+ "model.layers.10.self_attn.o_proj": {
131
+ "bits": 4,
132
+ "group_size": 128
133
+ },
134
+ "model.layers.10.self_attn.q_a_proj": {
135
+ "bits": 4,
136
+ "group_size": 128
137
+ },
138
+ "model.layers.10.self_attn.q_b_proj": {
139
+ "bits": 4,
140
+ "group_size": 128
141
+ },
142
+ "model.layers.11.mlp.shared_experts.down_proj": {
143
+ "bits": 4,
144
+ "group_size": 128
145
+ },
146
+ "model.layers.11.mlp.shared_experts.gate_proj": {
147
+ "bits": 4,
148
+ "group_size": 128
149
+ },
150
+ "model.layers.11.mlp.shared_experts.up_proj": {
151
+ "bits": 4,
152
+ "group_size": 128
153
+ },
154
+ "model.layers.11.self_attn.kv_a_proj_with_mqa": {
155
+ "bits": 4,
156
+ "group_size": 128
157
+ },
158
+ "model.layers.11.self_attn.kv_b_proj": {
159
+ "bits": 4,
160
+ "group_size": 128
161
+ },
162
+ "model.layers.11.self_attn.o_proj": {
163
+ "bits": 4,
164
+ "group_size": 128
165
+ },
166
+ "model.layers.11.self_attn.q_a_proj": {
167
+ "bits": 4,
168
+ "group_size": 128
169
+ },
170
+ "model.layers.11.self_attn.q_b_proj": {
171
+ "bits": 4,
172
+ "group_size": 128
173
+ },
174
+ "model.layers.12.mlp.shared_experts.down_proj": {
175
+ "bits": 4,
176
+ "group_size": 128
177
+ },
178
+ "model.layers.12.mlp.shared_experts.gate_proj": {
179
+ "bits": 4,
180
+ "group_size": 128
181
+ },
182
+ "model.layers.12.mlp.shared_experts.up_proj": {
183
+ "bits": 4,
184
+ "group_size": 128
185
+ },
186
+ "model.layers.12.self_attn.kv_a_proj_with_mqa": {
187
+ "bits": 4,
188
+ "group_size": 128
189
+ },
190
+ "model.layers.12.self_attn.kv_b_proj": {
191
+ "bits": 4,
192
+ "group_size": 128
193
+ },
194
+ "model.layers.12.self_attn.o_proj": {
195
+ "bits": 4,
196
+ "group_size": 128
197
+ },
198
+ "model.layers.12.self_attn.q_a_proj": {
199
+ "bits": 4,
200
+ "group_size": 128
201
+ },
202
+ "model.layers.12.self_attn.q_b_proj": {
203
+ "bits": 4,
204
+ "group_size": 128
205
+ },
206
+ "model.layers.13.mlp.shared_experts.down_proj": {
207
+ "bits": 4,
208
+ "group_size": 128
209
+ },
210
+ "model.layers.13.mlp.shared_experts.gate_proj": {
211
+ "bits": 4,
212
+ "group_size": 128
213
+ },
214
+ "model.layers.13.mlp.shared_experts.up_proj": {
215
+ "bits": 4,
216
+ "group_size": 128
217
+ },
218
+ "model.layers.13.self_attn.kv_a_proj_with_mqa": {
219
+ "bits": 4,
220
+ "group_size": 128
221
+ },
222
+ "model.layers.13.self_attn.kv_b_proj": {
223
+ "bits": 4,
224
+ "group_size": 128
225
+ },
226
+ "model.layers.13.self_attn.o_proj": {
227
+ "bits": 4,
228
+ "group_size": 128
229
+ },
230
+ "model.layers.13.self_attn.q_a_proj": {
231
+ "bits": 4,
232
+ "group_size": 128
233
+ },
234
+ "model.layers.13.self_attn.q_b_proj": {
235
+ "bits": 4,
236
+ "group_size": 128
237
+ },
238
+ "model.layers.14.mlp.shared_experts.down_proj": {
239
+ "bits": 4,
240
+ "group_size": 128
241
+ },
242
+ "model.layers.14.mlp.shared_experts.gate_proj": {
243
+ "bits": 4,
244
+ "group_size": 128
245
+ },
246
+ "model.layers.14.mlp.shared_experts.up_proj": {
247
+ "bits": 4,
248
+ "group_size": 128
249
+ },
250
+ "model.layers.14.self_attn.kv_a_proj_with_mqa": {
251
+ "bits": 4,
252
+ "group_size": 128
253
+ },
254
+ "model.layers.14.self_attn.kv_b_proj": {
255
+ "bits": 4,
256
+ "group_size": 128
257
+ },
258
+ "model.layers.14.self_attn.o_proj": {
259
+ "bits": 4,
260
+ "group_size": 128
261
+ },
262
+ "model.layers.14.self_attn.q_a_proj": {
263
+ "bits": 4,
264
+ "group_size": 128
265
+ },
266
+ "model.layers.14.self_attn.q_b_proj": {
267
+ "bits": 4,
268
+ "group_size": 128
269
+ },
270
+ "model.layers.15.mlp.shared_experts.down_proj": {
271
+ "bits": 4,
272
+ "group_size": 128
273
+ },
274
+ "model.layers.15.mlp.shared_experts.gate_proj": {
275
+ "bits": 4,
276
+ "group_size": 128
277
+ },
278
+ "model.layers.15.mlp.shared_experts.up_proj": {
279
+ "bits": 4,
280
+ "group_size": 128
281
+ },
282
+ "model.layers.15.self_attn.kv_a_proj_with_mqa": {
283
+ "bits": 4,
284
+ "group_size": 128
285
+ },
286
+ "model.layers.15.self_attn.kv_b_proj": {
287
+ "bits": 4,
288
+ "group_size": 128
289
+ },
290
+ "model.layers.15.self_attn.o_proj": {
291
+ "bits": 4,
292
+ "group_size": 128
293
+ },
294
+ "model.layers.15.self_attn.q_a_proj": {
295
+ "bits": 4,
296
+ "group_size": 128
297
+ },
298
+ "model.layers.15.self_attn.q_b_proj": {
299
+ "bits": 4,
300
+ "group_size": 128
301
+ },
302
+ "model.layers.16.mlp.shared_experts.down_proj": {
303
+ "bits": 4,
304
+ "group_size": 128
305
+ },
306
+ "model.layers.16.mlp.shared_experts.gate_proj": {
307
+ "bits": 4,
308
+ "group_size": 128
309
+ },
310
+ "model.layers.16.mlp.shared_experts.up_proj": {
311
+ "bits": 4,
312
+ "group_size": 128
313
+ },
314
+ "model.layers.16.self_attn.kv_a_proj_with_mqa": {
315
+ "bits": 4,
316
+ "group_size": 128
317
+ },
318
+ "model.layers.16.self_attn.kv_b_proj": {
319
+ "bits": 4,
320
+ "group_size": 128
321
+ },
322
+ "model.layers.16.self_attn.o_proj": {
323
+ "bits": 4,
324
+ "group_size": 128
325
+ },
326
+ "model.layers.16.self_attn.q_a_proj": {
327
+ "bits": 4,
328
+ "group_size": 128
329
+ },
330
+ "model.layers.16.self_attn.q_b_proj": {
331
+ "bits": 4,
332
+ "group_size": 128
333
+ },
334
+ "model.layers.17.mlp.shared_experts.down_proj": {
335
+ "bits": 4,
336
+ "group_size": 128
337
+ },
338
+ "model.layers.17.mlp.shared_experts.gate_proj": {
339
+ "bits": 4,
340
+ "group_size": 128
341
+ },
342
+ "model.layers.17.mlp.shared_experts.up_proj": {
343
+ "bits": 4,
344
+ "group_size": 128
345
+ },
346
+ "model.layers.17.self_attn.kv_a_proj_with_mqa": {
347
+ "bits": 4,
348
+ "group_size": 128
349
+ },
350
+ "model.layers.17.self_attn.kv_b_proj": {
351
+ "bits": 4,
352
+ "group_size": 128
353
+ },
354
+ "model.layers.17.self_attn.o_proj": {
355
+ "bits": 4,
356
+ "group_size": 128
357
+ },
358
+ "model.layers.17.self_attn.q_a_proj": {
359
+ "bits": 4,
360
+ "group_size": 128
361
+ },
362
+ "model.layers.17.self_attn.q_b_proj": {
363
+ "bits": 4,
364
+ "group_size": 128
365
+ },
366
+ "model.layers.18.mlp.shared_experts.down_proj": {
367
+ "bits": 4,
368
+ "group_size": 128
369
+ },
370
+ "model.layers.18.mlp.shared_experts.gate_proj": {
371
+ "bits": 4,
372
+ "group_size": 128
373
+ },
374
+ "model.layers.18.mlp.shared_experts.up_proj": {
375
+ "bits": 4,
376
+ "group_size": 128
377
+ },
378
+ "model.layers.18.self_attn.kv_a_proj_with_mqa": {
379
+ "bits": 4,
380
+ "group_size": 128
381
+ },
382
+ "model.layers.18.self_attn.kv_b_proj": {
383
+ "bits": 4,
384
+ "group_size": 128
385
+ },
386
+ "model.layers.18.self_attn.o_proj": {
387
+ "bits": 4,
388
+ "group_size": 128
389
+ },
390
+ "model.layers.18.self_attn.q_a_proj": {
391
+ "bits": 4,
392
+ "group_size": 128
393
+ },
394
+ "model.layers.18.self_attn.q_b_proj": {
395
+ "bits": 4,
396
+ "group_size": 128
397
+ },
398
+ "model.layers.19.mlp.shared_experts.down_proj": {
399
+ "bits": 4,
400
+ "group_size": 128
401
+ },
402
+ "model.layers.19.mlp.shared_experts.gate_proj": {
403
+ "bits": 4,
404
+ "group_size": 128
405
+ },
406
+ "model.layers.19.mlp.shared_experts.up_proj": {
407
+ "bits": 4,
408
+ "group_size": 128
409
+ },
410
+ "model.layers.19.self_attn.kv_a_proj_with_mqa": {
411
+ "bits": 4,
412
+ "group_size": 128
413
+ },
414
+ "model.layers.19.self_attn.kv_b_proj": {
415
+ "bits": 4,
416
+ "group_size": 128
417
+ },
418
+ "model.layers.19.self_attn.o_proj": {
419
+ "bits": 4,
420
+ "group_size": 128
421
+ },
422
+ "model.layers.19.self_attn.q_a_proj": {
423
+ "bits": 4,
424
+ "group_size": 128
425
+ },
426
+ "model.layers.19.self_attn.q_b_proj": {
427
+ "bits": 4,
428
+ "group_size": 128
429
+ },
430
+ "model.layers.2.mlp.down_proj": {
431
+ "bits": 4,
432
+ "group_size": 128
433
+ },
434
+ "model.layers.2.mlp.gate_proj": {
435
+ "bits": 4,
436
+ "group_size": 128
437
+ },
438
+ "model.layers.2.mlp.up_proj": {
439
+ "bits": 4,
440
+ "group_size": 128
441
+ },
442
+ "model.layers.2.self_attn.kv_a_proj_with_mqa": {
443
+ "bits": 4,
444
+ "group_size": 128
445
+ },
446
+ "model.layers.2.self_attn.kv_b_proj": {
447
+ "bits": 4,
448
+ "group_size": 128
449
+ },
450
+ "model.layers.2.self_attn.o_proj": {
451
+ "bits": 4,
452
+ "group_size": 128
453
+ },
454
+ "model.layers.2.self_attn.q_a_proj": {
455
+ "bits": 4,
456
+ "group_size": 128
457
+ },
458
+ "model.layers.2.self_attn.q_b_proj": {
459
+ "bits": 4,
460
+ "group_size": 128
461
+ },
462
+ "model.layers.20.mlp.shared_experts.down_proj": {
463
+ "bits": 4,
464
+ "group_size": 128
465
+ },
466
+ "model.layers.20.mlp.shared_experts.gate_proj": {
467
+ "bits": 4,
468
+ "group_size": 128
469
+ },
470
+ "model.layers.20.mlp.shared_experts.up_proj": {
471
+ "bits": 4,
472
+ "group_size": 128
473
+ },
474
+ "model.layers.20.self_attn.kv_a_proj_with_mqa": {
475
+ "bits": 4,
476
+ "group_size": 128
477
+ },
478
+ "model.layers.20.self_attn.kv_b_proj": {
479
+ "bits": 4,
480
+ "group_size": 128
481
+ },
482
+ "model.layers.20.self_attn.o_proj": {
483
+ "bits": 4,
484
+ "group_size": 128
485
+ },
486
+ "model.layers.20.self_attn.q_a_proj": {
487
+ "bits": 4,
488
+ "group_size": 128
489
+ },
490
+ "model.layers.20.self_attn.q_b_proj": {
491
+ "bits": 4,
492
+ "group_size": 128
493
+ },
494
+ "model.layers.21.mlp.shared_experts.down_proj": {
495
+ "bits": 4,
496
+ "group_size": 128
497
+ },
498
+ "model.layers.21.mlp.shared_experts.gate_proj": {
499
+ "bits": 4,
500
+ "group_size": 128
501
+ },
502
+ "model.layers.21.mlp.shared_experts.up_proj": {
503
+ "bits": 4,
504
+ "group_size": 128
505
+ },
506
+ "model.layers.21.self_attn.kv_a_proj_with_mqa": {
507
+ "bits": 4,
508
+ "group_size": 128
509
+ },
510
+ "model.layers.21.self_attn.kv_b_proj": {
511
+ "bits": 4,
512
+ "group_size": 128
513
+ },
514
+ "model.layers.21.self_attn.o_proj": {
515
+ "bits": 4,
516
+ "group_size": 128
517
+ },
518
+ "model.layers.21.self_attn.q_a_proj": {
519
+ "bits": 4,
520
+ "group_size": 128
521
+ },
522
+ "model.layers.21.self_attn.q_b_proj": {
523
+ "bits": 4,
524
+ "group_size": 128
525
+ },
526
+ "model.layers.22.mlp.shared_experts.down_proj": {
527
+ "bits": 4,
528
+ "group_size": 128
529
+ },
530
+ "model.layers.22.mlp.shared_experts.gate_proj": {
531
+ "bits": 4,
532
+ "group_size": 128
533
+ },
534
+ "model.layers.22.mlp.shared_experts.up_proj": {
535
+ "bits": 4,
536
+ "group_size": 128
537
+ },
538
+ "model.layers.22.self_attn.kv_a_proj_with_mqa": {
539
+ "bits": 4,
540
+ "group_size": 128
541
+ },
542
+ "model.layers.22.self_attn.kv_b_proj": {
543
+ "bits": 4,
544
+ "group_size": 128
545
+ },
546
+ "model.layers.22.self_attn.o_proj": {
547
+ "bits": 4,
548
+ "group_size": 128
549
+ },
550
+ "model.layers.22.self_attn.q_a_proj": {
551
+ "bits": 4,
552
+ "group_size": 128
553
+ },
554
+ "model.layers.22.self_attn.q_b_proj": {
555
+ "bits": 4,
556
+ "group_size": 128
557
+ },
558
+ "model.layers.23.mlp.shared_experts.down_proj": {
559
+ "bits": 4,
560
+ "group_size": 128
561
+ },
562
+ "model.layers.23.mlp.shared_experts.gate_proj": {
563
+ "bits": 4,
564
+ "group_size": 128
565
+ },
566
+ "model.layers.23.mlp.shared_experts.up_proj": {
567
+ "bits": 4,
568
+ "group_size": 128
569
+ },
570
+ "model.layers.23.self_attn.kv_a_proj_with_mqa": {
571
+ "bits": 4,
572
+ "group_size": 128
573
+ },
574
+ "model.layers.23.self_attn.kv_b_proj": {
575
+ "bits": 4,
576
+ "group_size": 128
577
+ },
578
+ "model.layers.23.self_attn.o_proj": {
579
+ "bits": 4,
580
+ "group_size": 128
581
+ },
582
+ "model.layers.23.self_attn.q_a_proj": {
583
+ "bits": 4,
584
+ "group_size": 128
585
+ },
586
+ "model.layers.23.self_attn.q_b_proj": {
587
+ "bits": 4,
588
+ "group_size": 128
589
+ },
590
+ "model.layers.24.mlp.shared_experts.down_proj": {
591
+ "bits": 4,
592
+ "group_size": 128
593
+ },
594
+ "model.layers.24.mlp.shared_experts.gate_proj": {
595
+ "bits": 4,
596
+ "group_size": 128
597
+ },
598
+ "model.layers.24.mlp.shared_experts.up_proj": {
599
+ "bits": 4,
600
+ "group_size": 128
601
+ },
602
+ "model.layers.24.self_attn.kv_a_proj_with_mqa": {
603
+ "bits": 4,
604
+ "group_size": 128
605
+ },
606
+ "model.layers.24.self_attn.kv_b_proj": {
607
+ "bits": 4,
608
+ "group_size": 128
609
+ },
610
+ "model.layers.24.self_attn.o_proj": {
611
+ "bits": 4,
612
+ "group_size": 128
613
+ },
614
+ "model.layers.24.self_attn.q_a_proj": {
615
+ "bits": 4,
616
+ "group_size": 128
617
+ },
618
+ "model.layers.24.self_attn.q_b_proj": {
619
+ "bits": 4,
620
+ "group_size": 128
621
+ },
622
+ "model.layers.25.mlp.shared_experts.down_proj": {
623
+ "bits": 4,
624
+ "group_size": 128
625
+ },
626
+ "model.layers.25.mlp.shared_experts.gate_proj": {
627
+ "bits": 4,
628
+ "group_size": 128
629
+ },
630
+ "model.layers.25.mlp.shared_experts.up_proj": {
631
+ "bits": 4,
632
+ "group_size": 128
633
+ },
634
+ "model.layers.25.self_attn.kv_a_proj_with_mqa": {
635
+ "bits": 4,
636
+ "group_size": 128
637
+ },
638
+ "model.layers.25.self_attn.kv_b_proj": {
639
+ "bits": 4,
640
+ "group_size": 128
641
+ },
642
+ "model.layers.25.self_attn.o_proj": {
643
+ "bits": 4,
644
+ "group_size": 128
645
+ },
646
+ "model.layers.25.self_attn.q_a_proj": {
647
+ "bits": 4,
648
+ "group_size": 128
649
+ },
650
+ "model.layers.25.self_attn.q_b_proj": {
651
+ "bits": 4,
652
+ "group_size": 128
653
+ },
654
+ "model.layers.26.mlp.shared_experts.down_proj": {
655
+ "bits": 4,
656
+ "group_size": 128
657
+ },
658
+ "model.layers.26.mlp.shared_experts.gate_proj": {
659
+ "bits": 4,
660
+ "group_size": 128
661
+ },
662
+ "model.layers.26.mlp.shared_experts.up_proj": {
663
+ "bits": 4,
664
+ "group_size": 128
665
+ },
666
+ "model.layers.26.self_attn.kv_a_proj_with_mqa": {
667
+ "bits": 4,
668
+ "group_size": 128
669
+ },
670
+ "model.layers.26.self_attn.kv_b_proj": {
671
+ "bits": 4,
672
+ "group_size": 128
673
+ },
674
+ "model.layers.26.self_attn.o_proj": {
675
+ "bits": 4,
676
+ "group_size": 128
677
+ },
678
+ "model.layers.26.self_attn.q_a_proj": {
679
+ "bits": 4,
680
+ "group_size": 128
681
+ },
682
+ "model.layers.26.self_attn.q_b_proj": {
683
+ "bits": 4,
684
+ "group_size": 128
685
+ },
686
+ "model.layers.27.mlp.shared_experts.down_proj": {
687
+ "bits": 4,
688
+ "group_size": 128
689
+ },
690
+ "model.layers.27.mlp.shared_experts.gate_proj": {
691
+ "bits": 4,
692
+ "group_size": 128
693
+ },
694
+ "model.layers.27.mlp.shared_experts.up_proj": {
695
+ "bits": 4,
696
+ "group_size": 128
697
+ },
698
+ "model.layers.27.self_attn.kv_a_proj_with_mqa": {
699
+ "bits": 4,
700
+ "group_size": 128
701
+ },
702
+ "model.layers.27.self_attn.kv_b_proj": {
703
+ "bits": 4,
704
+ "group_size": 128
705
+ },
706
+ "model.layers.27.self_attn.o_proj": {
707
+ "bits": 4,
708
+ "group_size": 128
709
+ },
710
+ "model.layers.27.self_attn.q_a_proj": {
711
+ "bits": 4,
712
+ "group_size": 128
713
+ },
714
+ "model.layers.27.self_attn.q_b_proj": {
715
+ "bits": 4,
716
+ "group_size": 128
717
+ },
718
+ "model.layers.28.mlp.shared_experts.down_proj": {
719
+ "bits": 4,
720
+ "group_size": 128
721
+ },
722
+ "model.layers.28.mlp.shared_experts.gate_proj": {
723
+ "bits": 4,
724
+ "group_size": 128
725
+ },
726
+ "model.layers.28.mlp.shared_experts.up_proj": {
727
+ "bits": 4,
728
+ "group_size": 128
729
+ },
730
+ "model.layers.28.self_attn.kv_a_proj_with_mqa": {
731
+ "bits": 4,
732
+ "group_size": 128
733
+ },
734
+ "model.layers.28.self_attn.kv_b_proj": {
735
+ "bits": 4,
736
+ "group_size": 128
737
+ },
738
+ "model.layers.28.self_attn.o_proj": {
739
+ "bits": 4,
740
+ "group_size": 128
741
+ },
742
+ "model.layers.28.self_attn.q_a_proj": {
743
+ "bits": 4,
744
+ "group_size": 128
745
+ },
746
+ "model.layers.28.self_attn.q_b_proj": {
747
+ "bits": 4,
748
+ "group_size": 128
749
+ },
750
+ "model.layers.29.mlp.shared_experts.down_proj": {
751
+ "bits": 4,
752
+ "group_size": 128
753
+ },
754
+ "model.layers.29.mlp.shared_experts.gate_proj": {
755
+ "bits": 4,
756
+ "group_size": 128
757
+ },
758
+ "model.layers.29.mlp.shared_experts.up_proj": {
759
+ "bits": 4,
760
+ "group_size": 128
761
+ },
762
+ "model.layers.29.self_attn.kv_a_proj_with_mqa": {
763
+ "bits": 4,
764
+ "group_size": 128
765
+ },
766
+ "model.layers.29.self_attn.kv_b_proj": {
767
+ "bits": 4,
768
+ "group_size": 128
769
+ },
770
+ "model.layers.29.self_attn.o_proj": {
771
+ "bits": 4,
772
+ "group_size": 128
773
+ },
774
+ "model.layers.29.self_attn.q_a_proj": {
775
+ "bits": 4,
776
+ "group_size": 128
777
+ },
778
+ "model.layers.29.self_attn.q_b_proj": {
779
+ "bits": 4,
780
+ "group_size": 128
781
+ },
782
+ "model.layers.3.mlp.shared_experts.down_proj": {
783
+ "bits": 4,
784
+ "group_size": 128
785
+ },
786
+ "model.layers.3.mlp.shared_experts.gate_proj": {
787
+ "bits": 4,
788
+ "group_size": 128
789
+ },
790
+ "model.layers.3.mlp.shared_experts.up_proj": {
791
+ "bits": 4,
792
+ "group_size": 128
793
+ },
794
+ "model.layers.3.self_attn.kv_a_proj_with_mqa": {
795
+ "bits": 4,
796
+ "group_size": 128
797
+ },
798
+ "model.layers.3.self_attn.kv_b_proj": {
799
+ "bits": 4,
800
+ "group_size": 128
801
+ },
802
+ "model.layers.3.self_attn.o_proj": {
803
+ "bits": 4,
804
+ "group_size": 128
805
+ },
806
+ "model.layers.3.self_attn.q_a_proj": {
807
+ "bits": 4,
808
+ "group_size": 128
809
+ },
810
+ "model.layers.3.self_attn.q_b_proj": {
811
+ "bits": 4,
812
+ "group_size": 128
813
+ },
814
+ "model.layers.30.mlp.shared_experts.down_proj": {
815
+ "bits": 4,
816
+ "group_size": 128
817
+ },
818
+ "model.layers.30.mlp.shared_experts.gate_proj": {
819
+ "bits": 4,
820
+ "group_size": 128
821
+ },
822
+ "model.layers.30.mlp.shared_experts.up_proj": {
823
+ "bits": 4,
824
+ "group_size": 128
825
+ },
826
+ "model.layers.30.self_attn.kv_a_proj_with_mqa": {
827
+ "bits": 4,
828
+ "group_size": 128
829
+ },
830
+ "model.layers.30.self_attn.kv_b_proj": {
831
+ "bits": 4,
832
+ "group_size": 128
833
+ },
834
+ "model.layers.30.self_attn.o_proj": {
835
+ "bits": 4,
836
+ "group_size": 128
837
+ },
838
+ "model.layers.30.self_attn.q_a_proj": {
839
+ "bits": 4,
840
+ "group_size": 128
841
+ },
842
+ "model.layers.30.self_attn.q_b_proj": {
843
+ "bits": 4,
844
+ "group_size": 128
845
+ },
846
+ "model.layers.31.mlp.shared_experts.down_proj": {
847
+ "bits": 4,
848
+ "group_size": 128
849
+ },
850
+ "model.layers.31.mlp.shared_experts.gate_proj": {
851
+ "bits": 4,
852
+ "group_size": 128
853
+ },
854
+ "model.layers.31.mlp.shared_experts.up_proj": {
855
+ "bits": 4,
856
+ "group_size": 128
857
+ },
858
+ "model.layers.31.self_attn.kv_a_proj_with_mqa": {
859
+ "bits": 4,
860
+ "group_size": 128
861
+ },
862
+ "model.layers.31.self_attn.kv_b_proj": {
863
+ "bits": 4,
864
+ "group_size": 128
865
+ },
866
+ "model.layers.31.self_attn.o_proj": {
867
+ "bits": 4,
868
+ "group_size": 128
869
+ },
870
+ "model.layers.31.self_attn.q_a_proj": {
871
+ "bits": 4,
872
+ "group_size": 128
873
+ },
874
+ "model.layers.31.self_attn.q_b_proj": {
875
+ "bits": 4,
876
+ "group_size": 128
877
+ },
878
+ "model.layers.32.mlp.shared_experts.down_proj": {
879
+ "bits": 4,
880
+ "group_size": 128
881
+ },
882
+ "model.layers.32.mlp.shared_experts.gate_proj": {
883
+ "bits": 4,
884
+ "group_size": 128
885
+ },
886
+ "model.layers.32.mlp.shared_experts.up_proj": {
887
+ "bits": 4,
888
+ "group_size": 128
889
+ },
890
+ "model.layers.32.self_attn.kv_a_proj_with_mqa": {
891
+ "bits": 4,
892
+ "group_size": 128
893
+ },
894
+ "model.layers.32.self_attn.kv_b_proj": {
895
+ "bits": 4,
896
+ "group_size": 128
897
+ },
898
+ "model.layers.32.self_attn.o_proj": {
899
+ "bits": 4,
900
+ "group_size": 128
901
+ },
902
+ "model.layers.32.self_attn.q_a_proj": {
903
+ "bits": 4,
904
+ "group_size": 128
905
+ },
906
+ "model.layers.32.self_attn.q_b_proj": {
907
+ "bits": 4,
908
+ "group_size": 128
909
+ },
910
+ "model.layers.33.mlp.shared_experts.down_proj": {
911
+ "bits": 4,
912
+ "group_size": 128
913
+ },
914
+ "model.layers.33.mlp.shared_experts.gate_proj": {
915
+ "bits": 4,
916
+ "group_size": 128
917
+ },
918
+ "model.layers.33.mlp.shared_experts.up_proj": {
919
+ "bits": 4,
920
+ "group_size": 128
921
+ },
922
+ "model.layers.33.self_attn.kv_a_proj_with_mqa": {
923
+ "bits": 4,
924
+ "group_size": 128
925
+ },
926
+ "model.layers.33.self_attn.kv_b_proj": {
927
+ "bits": 4,
928
+ "group_size": 128
929
+ },
930
+ "model.layers.33.self_attn.o_proj": {
931
+ "bits": 4,
932
+ "group_size": 128
933
+ },
934
+ "model.layers.33.self_attn.q_a_proj": {
935
+ "bits": 4,
936
+ "group_size": 128
937
+ },
938
+ "model.layers.33.self_attn.q_b_proj": {
939
+ "bits": 4,
940
+ "group_size": 128
941
+ },
942
+ "model.layers.34.mlp.shared_experts.down_proj": {
943
+ "bits": 4,
944
+ "group_size": 128
945
+ },
946
+ "model.layers.34.mlp.shared_experts.gate_proj": {
947
+ "bits": 4,
948
+ "group_size": 128
949
+ },
950
+ "model.layers.34.mlp.shared_experts.up_proj": {
951
+ "bits": 4,
952
+ "group_size": 128
953
+ },
954
+ "model.layers.34.self_attn.kv_a_proj_with_mqa": {
955
+ "bits": 4,
956
+ "group_size": 128
957
+ },
958
+ "model.layers.34.self_attn.kv_b_proj": {
959
+ "bits": 4,
960
+ "group_size": 128
961
+ },
962
+ "model.layers.34.self_attn.o_proj": {
963
+ "bits": 4,
964
+ "group_size": 128
965
+ },
966
+ "model.layers.34.self_attn.q_a_proj": {
967
+ "bits": 4,
968
+ "group_size": 128
969
+ },
970
+ "model.layers.34.self_attn.q_b_proj": {
971
+ "bits": 4,
972
+ "group_size": 128
973
+ },
974
+ "model.layers.35.mlp.shared_experts.down_proj": {
975
+ "bits": 4,
976
+ "group_size": 128
977
+ },
978
+ "model.layers.35.mlp.shared_experts.gate_proj": {
979
+ "bits": 4,
980
+ "group_size": 128
981
+ },
982
+ "model.layers.35.mlp.shared_experts.up_proj": {
983
+ "bits": 4,
984
+ "group_size": 128
985
+ },
986
+ "model.layers.35.self_attn.kv_a_proj_with_mqa": {
987
+ "bits": 4,
988
+ "group_size": 128
989
+ },
990
+ "model.layers.35.self_attn.kv_b_proj": {
991
+ "bits": 4,
992
+ "group_size": 128
993
+ },
994
+ "model.layers.35.self_attn.o_proj": {
995
+ "bits": 4,
996
+ "group_size": 128
997
+ },
998
+ "model.layers.35.self_attn.q_a_proj": {
999
+ "bits": 4,
1000
+ "group_size": 128
1001
+ },
1002
+ "model.layers.35.self_attn.q_b_proj": {
1003
+ "bits": 4,
1004
+ "group_size": 128
1005
+ },
1006
+ "model.layers.36.mlp.shared_experts.down_proj": {
1007
+ "bits": 4,
1008
+ "group_size": 128
1009
+ },
1010
+ "model.layers.36.mlp.shared_experts.gate_proj": {
1011
+ "bits": 4,
1012
+ "group_size": 128
1013
+ },
1014
+ "model.layers.36.mlp.shared_experts.up_proj": {
1015
+ "bits": 4,
1016
+ "group_size": 128
1017
+ },
1018
+ "model.layers.36.self_attn.kv_a_proj_with_mqa": {
1019
+ "bits": 4,
1020
+ "group_size": 128
1021
+ },
1022
+ "model.layers.36.self_attn.kv_b_proj": {
1023
+ "bits": 4,
1024
+ "group_size": 128
1025
+ },
1026
+ "model.layers.36.self_attn.o_proj": {
1027
+ "bits": 4,
1028
+ "group_size": 128
1029
+ },
1030
+ "model.layers.36.self_attn.q_a_proj": {
1031
+ "bits": 4,
1032
+ "group_size": 128
1033
+ },
1034
+ "model.layers.36.self_attn.q_b_proj": {
1035
+ "bits": 4,
1036
+ "group_size": 128
1037
+ },
1038
+ "model.layers.37.mlp.shared_experts.down_proj": {
1039
+ "bits": 4,
1040
+ "group_size": 128
1041
+ },
1042
+ "model.layers.37.mlp.shared_experts.gate_proj": {
1043
+ "bits": 4,
1044
+ "group_size": 128
1045
+ },
1046
+ "model.layers.37.mlp.shared_experts.up_proj": {
1047
+ "bits": 4,
1048
+ "group_size": 128
1049
+ },
1050
+ "model.layers.37.self_attn.kv_a_proj_with_mqa": {
1051
+ "bits": 4,
1052
+ "group_size": 128
1053
+ },
1054
+ "model.layers.37.self_attn.kv_b_proj": {
1055
+ "bits": 4,
1056
+ "group_size": 128
1057
+ },
1058
+ "model.layers.37.self_attn.o_proj": {
1059
+ "bits": 4,
1060
+ "group_size": 128
1061
+ },
1062
+ "model.layers.37.self_attn.q_a_proj": {
1063
+ "bits": 4,
1064
+ "group_size": 128
1065
+ },
1066
+ "model.layers.37.self_attn.q_b_proj": {
1067
+ "bits": 4,
1068
+ "group_size": 128
1069
+ },
1070
+ "model.layers.38.mlp.shared_experts.down_proj": {
1071
+ "bits": 4,
1072
+ "group_size": 128
1073
+ },
1074
+ "model.layers.38.mlp.shared_experts.gate_proj": {
1075
+ "bits": 4,
1076
+ "group_size": 128
1077
+ },
1078
+ "model.layers.38.mlp.shared_experts.up_proj": {
1079
+ "bits": 4,
1080
+ "group_size": 128
1081
+ },
1082
+ "model.layers.38.self_attn.kv_a_proj_with_mqa": {
1083
+ "bits": 4,
1084
+ "group_size": 128
1085
+ },
1086
+ "model.layers.38.self_attn.kv_b_proj": {
1087
+ "bits": 4,
1088
+ "group_size": 128
1089
+ },
1090
+ "model.layers.38.self_attn.o_proj": {
1091
+ "bits": 4,
1092
+ "group_size": 128
1093
+ },
1094
+ "model.layers.38.self_attn.q_a_proj": {
1095
+ "bits": 4,
1096
+ "group_size": 128
1097
+ },
1098
+ "model.layers.38.self_attn.q_b_proj": {
1099
+ "bits": 4,
1100
+ "group_size": 128
1101
+ },
1102
+ "model.layers.39.mlp.shared_experts.down_proj": {
1103
+ "bits": 4,
1104
+ "group_size": 128
1105
+ },
1106
+ "model.layers.39.mlp.shared_experts.gate_proj": {
1107
+ "bits": 4,
1108
+ "group_size": 128
1109
+ },
1110
+ "model.layers.39.mlp.shared_experts.up_proj": {
1111
+ "bits": 4,
1112
+ "group_size": 128
1113
+ },
1114
+ "model.layers.39.self_attn.kv_a_proj_with_mqa": {
1115
+ "bits": 4,
1116
+ "group_size": 128
1117
+ },
1118
+ "model.layers.39.self_attn.kv_b_proj": {
1119
+ "bits": 4,
1120
+ "group_size": 128
1121
+ },
1122
+ "model.layers.39.self_attn.o_proj": {
1123
+ "bits": 4,
1124
+ "group_size": 128
1125
+ },
1126
+ "model.layers.39.self_attn.q_a_proj": {
1127
+ "bits": 4,
1128
+ "group_size": 128
1129
+ },
1130
+ "model.layers.39.self_attn.q_b_proj": {
1131
+ "bits": 4,
1132
+ "group_size": 128
1133
+ },
1134
+ "model.layers.4.mlp.shared_experts.down_proj": {
1135
+ "bits": 4,
1136
+ "group_size": 128
1137
+ },
1138
+ "model.layers.4.mlp.shared_experts.gate_proj": {
1139
+ "bits": 4,
1140
+ "group_size": 128
1141
+ },
1142
+ "model.layers.4.mlp.shared_experts.up_proj": {
1143
+ "bits": 4,
1144
+ "group_size": 128
1145
+ },
1146
+ "model.layers.4.self_attn.kv_a_proj_with_mqa": {
1147
+ "bits": 4,
1148
+ "group_size": 128
1149
+ },
1150
+ "model.layers.4.self_attn.kv_b_proj": {
1151
+ "bits": 4,
1152
+ "group_size": 128
1153
+ },
1154
+ "model.layers.4.self_attn.o_proj": {
1155
+ "bits": 4,
1156
+ "group_size": 128
1157
+ },
1158
+ "model.layers.4.self_attn.q_a_proj": {
1159
+ "bits": 4,
1160
+ "group_size": 128
1161
+ },
1162
+ "model.layers.4.self_attn.q_b_proj": {
1163
+ "bits": 4,
1164
+ "group_size": 128
1165
+ },
1166
+ "model.layers.40.mlp.shared_experts.down_proj": {
1167
+ "bits": 4,
1168
+ "group_size": 128
1169
+ },
1170
+ "model.layers.40.mlp.shared_experts.gate_proj": {
1171
+ "bits": 4,
1172
+ "group_size": 128
1173
+ },
1174
+ "model.layers.40.mlp.shared_experts.up_proj": {
1175
+ "bits": 4,
1176
+ "group_size": 128
1177
+ },
1178
+ "model.layers.40.self_attn.kv_a_proj_with_mqa": {
1179
+ "bits": 4,
1180
+ "group_size": 128
1181
+ },
1182
+ "model.layers.40.self_attn.kv_b_proj": {
1183
+ "bits": 4,
1184
+ "group_size": 128
1185
+ },
1186
+ "model.layers.40.self_attn.o_proj": {
1187
+ "bits": 4,
1188
+ "group_size": 128
1189
+ },
1190
+ "model.layers.40.self_attn.q_a_proj": {
1191
+ "bits": 4,
1192
+ "group_size": 128
1193
+ },
1194
+ "model.layers.40.self_attn.q_b_proj": {
1195
+ "bits": 4,
1196
+ "group_size": 128
1197
+ },
1198
+ "model.layers.41.mlp.shared_experts.down_proj": {
1199
+ "bits": 4,
1200
+ "group_size": 128
1201
+ },
1202
+ "model.layers.41.mlp.shared_experts.gate_proj": {
1203
+ "bits": 4,
1204
+ "group_size": 128
1205
+ },
1206
+ "model.layers.41.mlp.shared_experts.up_proj": {
1207
+ "bits": 4,
1208
+ "group_size": 128
1209
+ },
1210
+ "model.layers.41.self_attn.kv_a_proj_with_mqa": {
1211
+ "bits": 4,
1212
+ "group_size": 128
1213
+ },
1214
+ "model.layers.41.self_attn.kv_b_proj": {
1215
+ "bits": 4,
1216
+ "group_size": 128
1217
+ },
1218
+ "model.layers.41.self_attn.o_proj": {
1219
+ "bits": 4,
1220
+ "group_size": 128
1221
+ },
1222
+ "model.layers.41.self_attn.q_a_proj": {
1223
+ "bits": 4,
1224
+ "group_size": 128
1225
+ },
1226
+ "model.layers.41.self_attn.q_b_proj": {
1227
+ "bits": 4,
1228
+ "group_size": 128
1229
+ },
1230
+ "model.layers.42.mlp.shared_experts.down_proj": {
1231
+ "bits": 4,
1232
+ "group_size": 128
1233
+ },
1234
+ "model.layers.42.mlp.shared_experts.gate_proj": {
1235
+ "bits": 4,
1236
+ "group_size": 128
1237
+ },
1238
+ "model.layers.42.mlp.shared_experts.up_proj": {
1239
+ "bits": 4,
1240
+ "group_size": 128
1241
+ },
1242
+ "model.layers.42.self_attn.kv_a_proj_with_mqa": {
1243
+ "bits": 4,
1244
+ "group_size": 128
1245
+ },
1246
+ "model.layers.42.self_attn.kv_b_proj": {
1247
+ "bits": 4,
1248
+ "group_size": 128
1249
+ },
1250
+ "model.layers.42.self_attn.o_proj": {
1251
+ "bits": 4,
1252
+ "group_size": 128
1253
+ },
1254
+ "model.layers.42.self_attn.q_a_proj": {
1255
+ "bits": 4,
1256
+ "group_size": 128
1257
+ },
1258
+ "model.layers.42.self_attn.q_b_proj": {
1259
+ "bits": 4,
1260
+ "group_size": 128
1261
+ },
1262
+ "model.layers.43.mlp.shared_experts.down_proj": {
1263
+ "bits": 4,
1264
+ "group_size": 128
1265
+ },
1266
+ "model.layers.43.mlp.shared_experts.gate_proj": {
1267
+ "bits": 4,
1268
+ "group_size": 128
1269
+ },
1270
+ "model.layers.43.mlp.shared_experts.up_proj": {
1271
+ "bits": 4,
1272
+ "group_size": 128
1273
+ },
1274
+ "model.layers.43.self_attn.kv_a_proj_with_mqa": {
1275
+ "bits": 4,
1276
+ "group_size": 128
1277
+ },
1278
+ "model.layers.43.self_attn.kv_b_proj": {
1279
+ "bits": 4,
1280
+ "group_size": 128
1281
+ },
1282
+ "model.layers.43.self_attn.o_proj": {
1283
+ "bits": 4,
1284
+ "group_size": 128
1285
+ },
1286
+ "model.layers.43.self_attn.q_a_proj": {
1287
+ "bits": 4,
1288
+ "group_size": 128
1289
+ },
1290
+ "model.layers.43.self_attn.q_b_proj": {
1291
+ "bits": 4,
1292
+ "group_size": 128
1293
+ },
1294
+ "model.layers.44.mlp.shared_experts.down_proj": {
1295
+ "bits": 4,
1296
+ "group_size": 128
1297
+ },
1298
+ "model.layers.44.mlp.shared_experts.gate_proj": {
1299
+ "bits": 4,
1300
+ "group_size": 128
1301
+ },
1302
+ "model.layers.44.mlp.shared_experts.up_proj": {
1303
+ "bits": 4,
1304
+ "group_size": 128
1305
+ },
1306
+ "model.layers.44.self_attn.kv_a_proj_with_mqa": {
1307
+ "bits": 4,
1308
+ "group_size": 128
1309
+ },
1310
+ "model.layers.44.self_attn.kv_b_proj": {
1311
+ "bits": 4,
1312
+ "group_size": 128
1313
+ },
1314
+ "model.layers.44.self_attn.o_proj": {
1315
+ "bits": 4,
1316
+ "group_size": 128
1317
+ },
1318
+ "model.layers.44.self_attn.q_a_proj": {
1319
+ "bits": 4,
1320
+ "group_size": 128
1321
+ },
1322
+ "model.layers.44.self_attn.q_b_proj": {
1323
+ "bits": 4,
1324
+ "group_size": 128
1325
+ },
1326
+ "model.layers.45.mlp.shared_experts.down_proj": {
1327
+ "bits": 4,
1328
+ "group_size": 128
1329
+ },
1330
+ "model.layers.45.mlp.shared_experts.gate_proj": {
1331
+ "bits": 4,
1332
+ "group_size": 128
1333
+ },
1334
+ "model.layers.45.mlp.shared_experts.up_proj": {
1335
+ "bits": 4,
1336
+ "group_size": 128
1337
+ },
1338
+ "model.layers.45.self_attn.kv_a_proj_with_mqa": {
1339
+ "bits": 4,
1340
+ "group_size": 128
1341
+ },
1342
+ "model.layers.45.self_attn.kv_b_proj": {
1343
+ "bits": 4,
1344
+ "group_size": 128
1345
+ },
1346
+ "model.layers.45.self_attn.o_proj": {
1347
+ "bits": 4,
1348
+ "group_size": 128
1349
+ },
1350
+ "model.layers.45.self_attn.q_a_proj": {
1351
+ "bits": 4,
1352
+ "group_size": 128
1353
+ },
1354
+ "model.layers.45.self_attn.q_b_proj": {
1355
+ "bits": 4,
1356
+ "group_size": 128
1357
+ },
1358
+ "model.layers.46.mlp.shared_experts.down_proj": {
1359
+ "bits": 4,
1360
+ "group_size": 128
1361
+ },
1362
+ "model.layers.46.mlp.shared_experts.gate_proj": {
1363
+ "bits": 4,
1364
+ "group_size": 128
1365
+ },
1366
+ "model.layers.46.mlp.shared_experts.up_proj": {
1367
+ "bits": 4,
1368
+ "group_size": 128
1369
+ },
1370
+ "model.layers.46.self_attn.kv_a_proj_with_mqa": {
1371
+ "bits": 4,
1372
+ "group_size": 128
1373
+ },
1374
+ "model.layers.46.self_attn.kv_b_proj": {
1375
+ "bits": 4,
1376
+ "group_size": 128
1377
+ },
1378
+ "model.layers.46.self_attn.o_proj": {
1379
+ "bits": 4,
1380
+ "group_size": 128
1381
+ },
1382
+ "model.layers.46.self_attn.q_a_proj": {
1383
+ "bits": 4,
1384
+ "group_size": 128
1385
+ },
1386
+ "model.layers.46.self_attn.q_b_proj": {
1387
+ "bits": 4,
1388
+ "group_size": 128
1389
+ },
1390
+ "model.layers.47.mlp.shared_experts.down_proj": {
1391
+ "bits": 4,
1392
+ "group_size": 128
1393
+ },
1394
+ "model.layers.47.mlp.shared_experts.gate_proj": {
1395
+ "bits": 4,
1396
+ "group_size": 128
1397
+ },
1398
+ "model.layers.47.mlp.shared_experts.up_proj": {
1399
+ "bits": 4,
1400
+ "group_size": 128
1401
+ },
1402
+ "model.layers.47.self_attn.kv_a_proj_with_mqa": {
1403
+ "bits": 4,
1404
+ "group_size": 128
1405
+ },
1406
+ "model.layers.47.self_attn.kv_b_proj": {
1407
+ "bits": 4,
1408
+ "group_size": 128
1409
+ },
1410
+ "model.layers.47.self_attn.o_proj": {
1411
+ "bits": 4,
1412
+ "group_size": 128
1413
+ },
1414
+ "model.layers.47.self_attn.q_a_proj": {
1415
+ "bits": 4,
1416
+ "group_size": 128
1417
+ },
1418
+ "model.layers.47.self_attn.q_b_proj": {
1419
+ "bits": 4,
1420
+ "group_size": 128
1421
+ },
1422
+ "model.layers.48.mlp.shared_experts.down_proj": {
1423
+ "bits": 4,
1424
+ "group_size": 128
1425
+ },
1426
+ "model.layers.48.mlp.shared_experts.gate_proj": {
1427
+ "bits": 4,
1428
+ "group_size": 128
1429
+ },
1430
+ "model.layers.48.mlp.shared_experts.up_proj": {
1431
+ "bits": 4,
1432
+ "group_size": 128
1433
+ },
1434
+ "model.layers.48.self_attn.kv_a_proj_with_mqa": {
1435
+ "bits": 4,
1436
+ "group_size": 128
1437
+ },
1438
+ "model.layers.48.self_attn.kv_b_proj": {
1439
+ "bits": 4,
1440
+ "group_size": 128
1441
+ },
1442
+ "model.layers.48.self_attn.o_proj": {
1443
+ "bits": 4,
1444
+ "group_size": 128
1445
+ },
1446
+ "model.layers.48.self_attn.q_a_proj": {
1447
+ "bits": 4,
1448
+ "group_size": 128
1449
+ },
1450
+ "model.layers.48.self_attn.q_b_proj": {
1451
+ "bits": 4,
1452
+ "group_size": 128
1453
+ },
1454
+ "model.layers.49.mlp.shared_experts.down_proj": {
1455
+ "bits": 4,
1456
+ "group_size": 128
1457
+ },
1458
+ "model.layers.49.mlp.shared_experts.gate_proj": {
1459
+ "bits": 4,
1460
+ "group_size": 128
1461
+ },
1462
+ "model.layers.49.mlp.shared_experts.up_proj": {
1463
+ "bits": 4,
1464
+ "group_size": 128
1465
+ },
1466
+ "model.layers.49.self_attn.kv_a_proj_with_mqa": {
1467
+ "bits": 4,
1468
+ "group_size": 128
1469
+ },
1470
+ "model.layers.49.self_attn.kv_b_proj": {
1471
+ "bits": 4,
1472
+ "group_size": 128
1473
+ },
1474
+ "model.layers.49.self_attn.o_proj": {
1475
+ "bits": 4,
1476
+ "group_size": 128
1477
+ },
1478
+ "model.layers.49.self_attn.q_a_proj": {
1479
+ "bits": 4,
1480
+ "group_size": 128
1481
+ },
1482
+ "model.layers.49.self_attn.q_b_proj": {
1483
+ "bits": 4,
1484
+ "group_size": 128
1485
+ },
1486
+ "model.layers.5.mlp.shared_experts.down_proj": {
1487
+ "bits": 4,
1488
+ "group_size": 128
1489
+ },
1490
+ "model.layers.5.mlp.shared_experts.gate_proj": {
1491
+ "bits": 4,
1492
+ "group_size": 128
1493
+ },
1494
+ "model.layers.5.mlp.shared_experts.up_proj": {
1495
+ "bits": 4,
1496
+ "group_size": 128
1497
+ },
1498
+ "model.layers.5.self_attn.kv_a_proj_with_mqa": {
1499
+ "bits": 4,
1500
+ "group_size": 128
1501
+ },
1502
+ "model.layers.5.self_attn.kv_b_proj": {
1503
+ "bits": 4,
1504
+ "group_size": 128
1505
+ },
1506
+ "model.layers.5.self_attn.o_proj": {
1507
+ "bits": 4,
1508
+ "group_size": 128
1509
+ },
1510
+ "model.layers.5.self_attn.q_a_proj": {
1511
+ "bits": 4,
1512
+ "group_size": 128
1513
+ },
1514
+ "model.layers.5.self_attn.q_b_proj": {
1515
+ "bits": 4,
1516
+ "group_size": 128
1517
+ },
1518
+ "model.layers.50.mlp.shared_experts.down_proj": {
1519
+ "bits": 4,
1520
+ "group_size": 128
1521
+ },
1522
+ "model.layers.50.mlp.shared_experts.gate_proj": {
1523
+ "bits": 4,
1524
+ "group_size": 128
1525
+ },
1526
+ "model.layers.50.mlp.shared_experts.up_proj": {
1527
+ "bits": 4,
1528
+ "group_size": 128
1529
+ },
1530
+ "model.layers.50.self_attn.kv_a_proj_with_mqa": {
1531
+ "bits": 4,
1532
+ "group_size": 128
1533
+ },
1534
+ "model.layers.50.self_attn.kv_b_proj": {
1535
+ "bits": 4,
1536
+ "group_size": 128
1537
+ },
1538
+ "model.layers.50.self_attn.o_proj": {
1539
+ "bits": 4,
1540
+ "group_size": 128
1541
+ },
1542
+ "model.layers.50.self_attn.q_a_proj": {
1543
+ "bits": 4,
1544
+ "group_size": 128
1545
+ },
1546
+ "model.layers.50.self_attn.q_b_proj": {
1547
+ "bits": 4,
1548
+ "group_size": 128
1549
+ },
1550
+ "model.layers.51.mlp.shared_experts.down_proj": {
1551
+ "bits": 4,
1552
+ "group_size": 128
1553
+ },
1554
+ "model.layers.51.mlp.shared_experts.gate_proj": {
1555
+ "bits": 4,
1556
+ "group_size": 128
1557
+ },
1558
+ "model.layers.51.mlp.shared_experts.up_proj": {
1559
+ "bits": 4,
1560
+ "group_size": 128
1561
+ },
1562
+ "model.layers.51.self_attn.kv_a_proj_with_mqa": {
1563
+ "bits": 4,
1564
+ "group_size": 128
1565
+ },
1566
+ "model.layers.51.self_attn.kv_b_proj": {
1567
+ "bits": 4,
1568
+ "group_size": 128
1569
+ },
1570
+ "model.layers.51.self_attn.o_proj": {
1571
+ "bits": 4,
1572
+ "group_size": 128
1573
+ },
1574
+ "model.layers.51.self_attn.q_a_proj": {
1575
+ "bits": 4,
1576
+ "group_size": 128
1577
+ },
1578
+ "model.layers.51.self_attn.q_b_proj": {
1579
+ "bits": 4,
1580
+ "group_size": 128
1581
+ },
1582
+ "model.layers.52.mlp.shared_experts.down_proj": {
1583
+ "bits": 4,
1584
+ "group_size": 128
1585
+ },
1586
+ "model.layers.52.mlp.shared_experts.gate_proj": {
1587
+ "bits": 4,
1588
+ "group_size": 128
1589
+ },
1590
+ "model.layers.52.mlp.shared_experts.up_proj": {
1591
+ "bits": 4,
1592
+ "group_size": 128
1593
+ },
1594
+ "model.layers.52.self_attn.kv_a_proj_with_mqa": {
1595
+ "bits": 4,
1596
+ "group_size": 128
1597
+ },
1598
+ "model.layers.52.self_attn.kv_b_proj": {
1599
+ "bits": 4,
1600
+ "group_size": 128
1601
+ },
1602
+ "model.layers.52.self_attn.o_proj": {
1603
+ "bits": 4,
1604
+ "group_size": 128
1605
+ },
1606
+ "model.layers.52.self_attn.q_a_proj": {
1607
+ "bits": 4,
1608
+ "group_size": 128
1609
+ },
1610
+ "model.layers.52.self_attn.q_b_proj": {
1611
+ "bits": 4,
1612
+ "group_size": 128
1613
+ },
1614
+ "model.layers.53.mlp.shared_experts.down_proj": {
1615
+ "bits": 4,
1616
+ "group_size": 128
1617
+ },
1618
+ "model.layers.53.mlp.shared_experts.gate_proj": {
1619
+ "bits": 4,
1620
+ "group_size": 128
1621
+ },
1622
+ "model.layers.53.mlp.shared_experts.up_proj": {
1623
+ "bits": 4,
1624
+ "group_size": 128
1625
+ },
1626
+ "model.layers.53.self_attn.kv_a_proj_with_mqa": {
1627
+ "bits": 4,
1628
+ "group_size": 128
1629
+ },
1630
+ "model.layers.53.self_attn.kv_b_proj": {
1631
+ "bits": 4,
1632
+ "group_size": 128
1633
+ },
1634
+ "model.layers.53.self_attn.o_proj": {
1635
+ "bits": 4,
1636
+ "group_size": 128
1637
+ },
1638
+ "model.layers.53.self_attn.q_a_proj": {
1639
+ "bits": 4,
1640
+ "group_size": 128
1641
+ },
1642
+ "model.layers.53.self_attn.q_b_proj": {
1643
+ "bits": 4,
1644
+ "group_size": 128
1645
+ },
1646
+ "model.layers.54.mlp.shared_experts.down_proj": {
1647
+ "bits": 4,
1648
+ "group_size": 128
1649
+ },
1650
+ "model.layers.54.mlp.shared_experts.gate_proj": {
1651
+ "bits": 4,
1652
+ "group_size": 128
1653
+ },
1654
+ "model.layers.54.mlp.shared_experts.up_proj": {
1655
+ "bits": 4,
1656
+ "group_size": 128
1657
+ },
1658
+ "model.layers.54.self_attn.kv_a_proj_with_mqa": {
1659
+ "bits": 4,
1660
+ "group_size": 128
1661
+ },
1662
+ "model.layers.54.self_attn.kv_b_proj": {
1663
+ "bits": 4,
1664
+ "group_size": 128
1665
+ },
1666
+ "model.layers.54.self_attn.o_proj": {
1667
+ "bits": 4,
1668
+ "group_size": 128
1669
+ },
1670
+ "model.layers.54.self_attn.q_a_proj": {
1671
+ "bits": 4,
1672
+ "group_size": 128
1673
+ },
1674
+ "model.layers.54.self_attn.q_b_proj": {
1675
+ "bits": 4,
1676
+ "group_size": 128
1677
+ },
1678
+ "model.layers.55.mlp.shared_experts.down_proj": {
1679
+ "bits": 4,
1680
+ "group_size": 128
1681
+ },
1682
+ "model.layers.55.mlp.shared_experts.gate_proj": {
1683
+ "bits": 4,
1684
+ "group_size": 128
1685
+ },
1686
+ "model.layers.55.mlp.shared_experts.up_proj": {
1687
+ "bits": 4,
1688
+ "group_size": 128
1689
+ },
1690
+ "model.layers.55.self_attn.kv_a_proj_with_mqa": {
1691
+ "bits": 4,
1692
+ "group_size": 128
1693
+ },
1694
+ "model.layers.55.self_attn.kv_b_proj": {
1695
+ "bits": 4,
1696
+ "group_size": 128
1697
+ },
1698
+ "model.layers.55.self_attn.o_proj": {
1699
+ "bits": 4,
1700
+ "group_size": 128
1701
+ },
1702
+ "model.layers.55.self_attn.q_a_proj": {
1703
+ "bits": 4,
1704
+ "group_size": 128
1705
+ },
1706
+ "model.layers.55.self_attn.q_b_proj": {
1707
+ "bits": 4,
1708
+ "group_size": 128
1709
+ },
1710
+ "model.layers.56.mlp.shared_experts.down_proj": {
1711
+ "bits": 4,
1712
+ "group_size": 128
1713
+ },
1714
+ "model.layers.56.mlp.shared_experts.gate_proj": {
1715
+ "bits": 4,
1716
+ "group_size": 128
1717
+ },
1718
+ "model.layers.56.mlp.shared_experts.up_proj": {
1719
+ "bits": 4,
1720
+ "group_size": 128
1721
+ },
1722
+ "model.layers.56.self_attn.kv_a_proj_with_mqa": {
1723
+ "bits": 4,
1724
+ "group_size": 128
1725
+ },
1726
+ "model.layers.56.self_attn.kv_b_proj": {
1727
+ "bits": 4,
1728
+ "group_size": 128
1729
+ },
1730
+ "model.layers.56.self_attn.o_proj": {
1731
+ "bits": 4,
1732
+ "group_size": 128
1733
+ },
1734
+ "model.layers.56.self_attn.q_a_proj": {
1735
+ "bits": 4,
1736
+ "group_size": 128
1737
+ },
1738
+ "model.layers.56.self_attn.q_b_proj": {
1739
+ "bits": 4,
1740
+ "group_size": 128
1741
+ },
1742
+ "model.layers.57.mlp.shared_experts.down_proj": {
1743
+ "bits": 4,
1744
+ "group_size": 128
1745
+ },
1746
+ "model.layers.57.mlp.shared_experts.gate_proj": {
1747
+ "bits": 4,
1748
+ "group_size": 128
1749
+ },
1750
+ "model.layers.57.mlp.shared_experts.up_proj": {
1751
+ "bits": 4,
1752
+ "group_size": 128
1753
+ },
1754
+ "model.layers.57.self_attn.kv_a_proj_with_mqa": {
1755
+ "bits": 4,
1756
+ "group_size": 128
1757
+ },
1758
+ "model.layers.57.self_attn.kv_b_proj": {
1759
+ "bits": 4,
1760
+ "group_size": 128
1761
+ },
1762
+ "model.layers.57.self_attn.o_proj": {
1763
+ "bits": 4,
1764
+ "group_size": 128
1765
+ },
1766
+ "model.layers.57.self_attn.q_a_proj": {
1767
+ "bits": 4,
1768
+ "group_size": 128
1769
+ },
1770
+ "model.layers.57.self_attn.q_b_proj": {
1771
+ "bits": 4,
1772
+ "group_size": 128
1773
+ },
1774
+ "model.layers.58.mlp.shared_experts.down_proj": {
1775
+ "bits": 4,
1776
+ "group_size": 128
1777
+ },
1778
+ "model.layers.58.mlp.shared_experts.gate_proj": {
1779
+ "bits": 4,
1780
+ "group_size": 128
1781
+ },
1782
+ "model.layers.58.mlp.shared_experts.up_proj": {
1783
+ "bits": 4,
1784
+ "group_size": 128
1785
+ },
1786
+ "model.layers.58.self_attn.kv_a_proj_with_mqa": {
1787
+ "bits": 4,
1788
+ "group_size": 128
1789
+ },
1790
+ "model.layers.58.self_attn.kv_b_proj": {
1791
+ "bits": 4,
1792
+ "group_size": 128
1793
+ },
1794
+ "model.layers.58.self_attn.o_proj": {
1795
+ "bits": 4,
1796
+ "group_size": 128
1797
+ },
1798
+ "model.layers.58.self_attn.q_a_proj": {
1799
+ "bits": 4,
1800
+ "group_size": 128
1801
+ },
1802
+ "model.layers.58.self_attn.q_b_proj": {
1803
+ "bits": 4,
1804
+ "group_size": 128
1805
+ },
1806
+ "model.layers.59.mlp.shared_experts.down_proj": {
1807
+ "bits": 4,
1808
+ "group_size": 128
1809
+ },
1810
+ "model.layers.59.mlp.shared_experts.gate_proj": {
1811
+ "bits": 4,
1812
+ "group_size": 128
1813
+ },
1814
+ "model.layers.59.mlp.shared_experts.up_proj": {
1815
+ "bits": 4,
1816
+ "group_size": 128
1817
+ },
1818
+ "model.layers.59.self_attn.kv_a_proj_with_mqa": {
1819
+ "bits": 4,
1820
+ "group_size": 128
1821
+ },
1822
+ "model.layers.59.self_attn.kv_b_proj": {
1823
+ "bits": 4,
1824
+ "group_size": 128
1825
+ },
1826
+ "model.layers.59.self_attn.o_proj": {
1827
+ "bits": 4,
1828
+ "group_size": 128
1829
+ },
1830
+ "model.layers.59.self_attn.q_a_proj": {
1831
+ "bits": 4,
1832
+ "group_size": 128
1833
+ },
1834
+ "model.layers.59.self_attn.q_b_proj": {
1835
+ "bits": 4,
1836
+ "group_size": 128
1837
+ },
1838
+ "model.layers.6.mlp.shared_experts.down_proj": {
1839
+ "bits": 4,
1840
+ "group_size": 128
1841
+ },
1842
+ "model.layers.6.mlp.shared_experts.gate_proj": {
1843
+ "bits": 4,
1844
+ "group_size": 128
1845
+ },
1846
+ "model.layers.6.mlp.shared_experts.up_proj": {
1847
+ "bits": 4,
1848
+ "group_size": 128
1849
+ },
1850
+ "model.layers.6.self_attn.kv_a_proj_with_mqa": {
1851
+ "bits": 4,
1852
+ "group_size": 128
1853
+ },
1854
+ "model.layers.6.self_attn.kv_b_proj": {
1855
+ "bits": 4,
1856
+ "group_size": 128
1857
+ },
1858
+ "model.layers.6.self_attn.o_proj": {
1859
+ "bits": 4,
1860
+ "group_size": 128
1861
+ },
1862
+ "model.layers.6.self_attn.q_a_proj": {
1863
+ "bits": 4,
1864
+ "group_size": 128
1865
+ },
1866
+ "model.layers.6.self_attn.q_b_proj": {
1867
+ "bits": 4,
1868
+ "group_size": 128
1869
+ },
1870
+ "model.layers.60.mlp.shared_experts.down_proj": {
1871
+ "bits": 4,
1872
+ "group_size": 128
1873
+ },
1874
+ "model.layers.60.mlp.shared_experts.gate_proj": {
1875
+ "bits": 4,
1876
+ "group_size": 128
1877
+ },
1878
+ "model.layers.60.mlp.shared_experts.up_proj": {
1879
+ "bits": 4,
1880
+ "group_size": 128
1881
+ },
1882
+ "model.layers.60.self_attn.kv_a_proj_with_mqa": {
1883
+ "bits": 4,
1884
+ "group_size": 128
1885
+ },
1886
+ "model.layers.60.self_attn.kv_b_proj": {
1887
+ "bits": 4,
1888
+ "group_size": 128
1889
+ },
1890
+ "model.layers.60.self_attn.o_proj": {
1891
+ "bits": 4,
1892
+ "group_size": 128
1893
+ },
1894
+ "model.layers.60.self_attn.q_a_proj": {
1895
+ "bits": 4,
1896
+ "group_size": 128
1897
+ },
1898
+ "model.layers.60.self_attn.q_b_proj": {
1899
+ "bits": 4,
1900
+ "group_size": 128
1901
+ },
1902
+ "model.layers.7.mlp.shared_experts.down_proj": {
1903
+ "bits": 4,
1904
+ "group_size": 128
1905
+ },
1906
+ "model.layers.7.mlp.shared_experts.gate_proj": {
1907
+ "bits": 4,
1908
+ "group_size": 128
1909
+ },
1910
+ "model.layers.7.mlp.shared_experts.up_proj": {
1911
+ "bits": 4,
1912
+ "group_size": 128
1913
+ },
1914
+ "model.layers.7.self_attn.kv_a_proj_with_mqa": {
1915
+ "bits": 4,
1916
+ "group_size": 128
1917
+ },
1918
+ "model.layers.7.self_attn.kv_b_proj": {
1919
+ "bits": 4,
1920
+ "group_size": 128
1921
+ },
1922
+ "model.layers.7.self_attn.o_proj": {
1923
+ "bits": 4,
1924
+ "group_size": 128
1925
+ },
1926
+ "model.layers.7.self_attn.q_a_proj": {
1927
+ "bits": 4,
1928
+ "group_size": 128
1929
+ },
1930
+ "model.layers.7.self_attn.q_b_proj": {
1931
+ "bits": 4,
1932
+ "group_size": 128
1933
+ },
1934
+ "model.layers.8.mlp.shared_experts.down_proj": {
1935
+ "bits": 4,
1936
+ "group_size": 128
1937
+ },
1938
+ "model.layers.8.mlp.shared_experts.gate_proj": {
1939
+ "bits": 4,
1940
+ "group_size": 128
1941
+ },
1942
+ "model.layers.8.mlp.shared_experts.up_proj": {
1943
+ "bits": 4,
1944
+ "group_size": 128
1945
+ },
1946
+ "model.layers.8.self_attn.kv_a_proj_with_mqa": {
1947
+ "bits": 4,
1948
+ "group_size": 128
1949
+ },
1950
+ "model.layers.8.self_attn.kv_b_proj": {
1951
+ "bits": 4,
1952
+ "group_size": 128
1953
+ },
1954
+ "model.layers.8.self_attn.o_proj": {
1955
+ "bits": 4,
1956
+ "group_size": 128
1957
+ },
1958
+ "model.layers.8.self_attn.q_a_proj": {
1959
+ "bits": 4,
1960
+ "group_size": 128
1961
+ },
1962
+ "model.layers.8.self_attn.q_b_proj": {
1963
+ "bits": 4,
1964
+ "group_size": 128
1965
+ },
1966
+ "model.layers.9.mlp.shared_experts.down_proj": {
1967
+ "bits": 4,
1968
+ "group_size": 128
1969
+ },
1970
+ "model.layers.9.mlp.shared_experts.gate_proj": {
1971
+ "bits": 4,
1972
+ "group_size": 128
1973
+ },
1974
+ "model.layers.9.mlp.shared_experts.up_proj": {
1975
+ "bits": 4,
1976
+ "group_size": 128
1977
+ },
1978
+ "model.layers.9.self_attn.kv_a_proj_with_mqa": {
1979
+ "bits": 4,
1980
+ "group_size": 128
1981
+ },
1982
+ "model.layers.9.self_attn.kv_b_proj": {
1983
+ "bits": 4,
1984
+ "group_size": 128
1985
+ },
1986
+ "model.layers.9.self_attn.o_proj": {
1987
+ "bits": 4,
1988
+ "group_size": 128
1989
+ },
1990
+ "model.layers.9.self_attn.q_a_proj": {
1991
+ "bits": 4,
1992
+ "group_size": 128
1993
+ },
1994
+ "model.layers.9.self_attn.q_b_proj": {
1995
+ "bits": 4,
1996
+ "group_size": 128
1997
+ }
1998
+ },
1999
+ "group_size": 64,
2000
+ "iters": 400,
2001
+ "nsamples": 512,
2002
+ "packing_format": "auto_round:auto_gptq",
2003
+ "quant_method": "auto-round",
2004
+ "seqlen": 512,
2005
+ "sym": true
2006
+ },
2007
+ "quantization_config_bak": {
2008
+ "activation_scheme": "dynamic",
2009
+ "fmt": "e4m3",
2010
+ "quant_method": "fp8",
2011
+ "weight_block_size": [
2012
+ 128,
2013
+ 128
2014
+ ]
2015
+ },
2016
+ "rms_norm_eps": 1e-06,
2017
+ "rope_interleave": true,
2018
+ "rope_scaling": {
2019
+ "beta_fast": 32.0,
2020
+ "beta_slow": 1.0,
2021
+ "factor": 40.0,
2022
+ "mscale": 1.0,
2023
+ "mscale_all_dim": 1.0,
2024
+ "original_max_position_embeddings": 4096,
2025
+ "rope_type": "yarn",
2026
+ "type": "yarn"
2027
+ },
2028
+ "rope_theta": 10000,
2029
+ "routed_scaling_factor": 2.5,
2030
+ "scoring_func": "sigmoid",
2031
+ "tie_word_embeddings": false,
2032
+ "topk_group": 4,
2033
+ "topk_method": "noaux_tc",
2034
+ "torch_dtype": "bfloat16",
2035
+ "transformers_version": "4.52.3",
2036
+ "use_cache": true,
2037
+ "v_head_dim": 128,
2038
+ "vocab_size": 129280
2039
+ }
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "do_sample": true,
5
+ "eos_token_id": 1,
6
+ "temperature": 0.6,
7
+ "top_p": 0.95,
8
+ "transformers_version": "4.52.3"
9
+ }
model-00001-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7902d79e94f86dd3f44a20777234b50f83c251a8590d82e9e0a18908f01c9e39
3
+ size 4999358200
model-00002-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f43f855daed1a9db3368de213b98367fefd8719b750dc487861630ed4c8712e
3
+ size 4999242152
model-00003-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae4ce3189e47a6dd04b1002900321ecd6517030da88c9c21e3224263f68e0d8a
3
+ size 4996855760
model-00004-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba059f602fb63b4a0894b8c86f29154e3ccda6c1115c42a9ea661659c2b56fc4
3
+ size 4999242168
model-00005-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dc5bf64b959dfedcf5b0e9b86a4a7a8548f0863abfd2439278911c12f7f6e91
3
+ size 4996855744
model-00006-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1498fc90684ea81f0a6f8fca4ae4fc64303f26786ef3b4c4f28f279b3baddffa
3
+ size 4999244824
model-00007-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c5c634ccca21f9a4540f28bc8c0d1976fe06eed242a5e6e350cac24ea345f61
3
+ size 4996859248
model-00008-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a452cf7d0f0a401ff100d1acf7c5a616ff58155fa37ff7168e714ddf04d47897
3
+ size 4999245648
model-00009-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c846557e6cb59c7ba0bba300ce38969d835f00dc3c45edb28cfcf983e4d87399
3
+ size 4996859232
model-00010-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8045fd155a3591dd88ded9b078036481f53928dc8ea24bd4a1503006369daec
3
+ size 4999245672
model-00011-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89e08edf7302996f867687de448df228091694db3f320b932d6872c35d1b6e34
3
+ size 4996859216
model-00012-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b1b43715f559e0b343bd180243f55f5a6736f199470762e68552d18b48b2513
3
+ size 4999245688
model-00013-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:100c8c10371e105cc98eda27de91c84b34b4c21096961cbd41c068d628fe84fe
3
+ size 4996859208
model-00014-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d17057a342308088e9e605b52839d10d77a3f2e40773c563a4e310bc38f4b178
3
+ size 4999245696
model-00015-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dd784ef6aa6c3594885d0220483dc54c1770529049191ea8a70047db6da5828
3
+ size 4996859192
model-00016-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:472c4e73c06f2a4d39884b3f17ef46b11ca3401e179c414c3a029c5b8b99d7f7
3
+ size 4999245712
model-00017-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0de85aa4737cca6e1498e86e499753c1ceffb467ae117a0e36c6c507e2fe7a3b
3
+ size 4996859176
model-00018-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ac955ff644fbd209ca69acac2e7a13cec120c0e7b306028f79d3435a8356e45
3
+ size 4999245728
model-00019-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e8523a3eab7f0e37d6c8a0545b1bbab1cf50b668b280e57a70f5b3c24338f35
3
+ size 4996859160
model-00020-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2a1567e694972b36f6c0a0856c71e5186b1503d71d0e237ab287900dd1894c9
3
+ size 4999245744
model-00021-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c1705e1c58723a861e8aae55838a949cb56d2c37ee6ca8c4706f74ff37290b1
3
+ size 4996859144
model-00022-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57da14dc9a451c4d9079959cec2784df8a3ef74675e7b7470956e58da72b0fe7
3
+ size 4999245760
model-00023-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af81c19559b23f77a059c84acd1cff7d308983396870da48baa32b65038e40cd
3
+ size 4996859128
model-00024-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e255ee77f5d3fd331d28ed0254df3bc9a2e2a8a71ce514f130a304930e2da566
3
+ size 4999245776
model-00025-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6757a53605ad3388bd41c9cefc98c8a00708f5ca652349947c69831f6a5c6ec
3
+ size 4996859112
model-00026-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:425437b0bbf049253841dc1e6acb1d6949d054471f7cd22e037ab7042111f5c0
3
+ size 4999245784
model-00027-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c623a59fa7cbe59bb9c3bbf585988579286ea6aecccb8c04f787ecc948e55ee4
3
+ size 4996859096
model-00028-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4769c9559ddfbf787c349398f25e3a3d49a030c50d497f5f84b520bc2c683f03
3
+ size 4999245808
model-00029-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee0f5ec5be6f9709d1dd3924c998666e64a262af9bf08b52b895557088048348
3
+ size 4996859088
model-00030-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5935e5ece40e9d4aca0e07482313c8fc83348a947bca9d7d95db33ba1470126b
3
+ size 4999245824
model-00031-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdb4af3bf07d0fcd72a127fe5244de51d679155b07651605902746b2f4155ca2
3
+ size 4996859072
model-00032-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3e3688b05f0dc03f7c2ab4c7ca193935d03e7c0903abee8a0569e76f6dbd8b7
3
+ size 4999245832
model-00033-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bc1003a2b9c1b9a42aa29164e89a6a4eafcc2a3a90f4fa0c8984e06393c7bb0
3
+ size 4996859056
model-00034-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbab1b5361a7ce46e3ac09024f939bff15d58a9389598b1792b52ab44f63527f
3
+ size 4999245848
model-00035-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aba6d8bb3a8a4b8d35d9ef529f311c2834a5ce8f560438089b309c27fd425970
3
+ size 4996859040
model-00036-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f843dfd45990db7283f0f995760598784536ee980b201d34b1e49e47f5f71131
3
+ size 4999245880
model-00037-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af6a8edb2cc2ad9375859ffe28126579309be6b91a70c2b0c8bd98605379984b
3
+ size 4996859008
model-00038-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42b4bcd5c9144df7e7fbabe45642ccca98f53e612e8839d3f502bf14907465c1
3
+ size 4999245904
model-00039-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cec91a1dbeaa217910789948f20e40799c2ce1567802eaa43a437094274168a3
3
+ size 4996858976
model-00040-of-00040.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7d9d19cbf752d4e55dbce896138bbeddb67e01dd56b436cdd8300f8d31a2122
3
+ size 3353595000
model.safetensors.index.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa3a8f0ce76e170b77772d860877140d52302ae65eece50ca85cf6cfbd57caa6
3
+ size 12338597
quantization_config.json ADDED
@@ -0,0 +1,1967 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 2,
3
+ "group_size": 64,
4
+ "sym": true,
5
+ "data_type": "int",
6
+ "seqlen": 512,
7
+ "batch_size": 4,
8
+ "iters": 400,
9
+ "nsamples": 512,
10
+ "autoround_version": "0.5.1",
11
+ "quant_method": "auto-round",
12
+ "packing_format": "auto_round:auto_gptq",
13
+ "extra_config": {
14
+ "model.layers.0.self_attn.q_a_proj": {
15
+ "bits": 4,
16
+ "group_size": 128
17
+ },
18
+ "model.layers.0.self_attn.q_b_proj": {
19
+ "bits": 4,
20
+ "group_size": 128
21
+ },
22
+ "model.layers.0.self_attn.kv_a_proj_with_mqa": {
23
+ "bits": 4,
24
+ "group_size": 128
25
+ },
26
+ "model.layers.0.self_attn.kv_b_proj": {
27
+ "bits": 4,
28
+ "group_size": 128
29
+ },
30
+ "model.layers.0.self_attn.o_proj": {
31
+ "bits": 4,
32
+ "group_size": 128
33
+ },
34
+ "model.layers.0.mlp.gate_proj": {
35
+ "bits": 4,
36
+ "group_size": 128
37
+ },
38
+ "model.layers.0.mlp.up_proj": {
39
+ "bits": 4,
40
+ "group_size": 128
41
+ },
42
+ "model.layers.0.mlp.down_proj": {
43
+ "bits": 4,
44
+ "group_size": 128
45
+ },
46
+ "model.layers.1.self_attn.q_a_proj": {
47
+ "bits": 4,
48
+ "group_size": 128
49
+ },
50
+ "model.layers.1.self_attn.q_b_proj": {
51
+ "bits": 4,
52
+ "group_size": 128
53
+ },
54
+ "model.layers.1.self_attn.kv_a_proj_with_mqa": {
55
+ "bits": 4,
56
+ "group_size": 128
57
+ },
58
+ "model.layers.1.self_attn.kv_b_proj": {
59
+ "bits": 4,
60
+ "group_size": 128
61
+ },
62
+ "model.layers.1.self_attn.o_proj": {
63
+ "bits": 4,
64
+ "group_size": 128
65
+ },
66
+ "model.layers.1.mlp.gate_proj": {
67
+ "bits": 4,
68
+ "group_size": 128
69
+ },
70
+ "model.layers.1.mlp.up_proj": {
71
+ "bits": 4,
72
+ "group_size": 128
73
+ },
74
+ "model.layers.1.mlp.down_proj": {
75
+ "bits": 4,
76
+ "group_size": 128
77
+ },
78
+ "model.layers.2.self_attn.q_a_proj": {
79
+ "bits": 4,
80
+ "group_size": 128
81
+ },
82
+ "model.layers.2.self_attn.q_b_proj": {
83
+ "bits": 4,
84
+ "group_size": 128
85
+ },
86
+ "model.layers.2.self_attn.kv_a_proj_with_mqa": {
87
+ "bits": 4,
88
+ "group_size": 128
89
+ },
90
+ "model.layers.2.self_attn.kv_b_proj": {
91
+ "bits": 4,
92
+ "group_size": 128
93
+ },
94
+ "model.layers.2.self_attn.o_proj": {
95
+ "bits": 4,
96
+ "group_size": 128
97
+ },
98
+ "model.layers.2.mlp.gate_proj": {
99
+ "bits": 4,
100
+ "group_size": 128
101
+ },
102
+ "model.layers.2.mlp.up_proj": {
103
+ "bits": 4,
104
+ "group_size": 128
105
+ },
106
+ "model.layers.2.mlp.down_proj": {
107
+ "bits": 4,
108
+ "group_size": 128
109
+ },
110
+ "model.layers.3.self_attn.q_a_proj": {
111
+ "bits": 4,
112
+ "group_size": 128
113
+ },
114
+ "model.layers.3.self_attn.q_b_proj": {
115
+ "bits": 4,
116
+ "group_size": 128
117
+ },
118
+ "model.layers.3.self_attn.kv_a_proj_with_mqa": {
119
+ "bits": 4,
120
+ "group_size": 128
121
+ },
122
+ "model.layers.3.self_attn.kv_b_proj": {
123
+ "bits": 4,
124
+ "group_size": 128
125
+ },
126
+ "model.layers.3.self_attn.o_proj": {
127
+ "bits": 4,
128
+ "group_size": 128
129
+ },
130
+ "model.layers.3.mlp.shared_experts.gate_proj": {
131
+ "bits": 4,
132
+ "group_size": 128
133
+ },
134
+ "model.layers.3.mlp.shared_experts.up_proj": {
135
+ "bits": 4,
136
+ "group_size": 128
137
+ },
138
+ "model.layers.3.mlp.shared_experts.down_proj": {
139
+ "bits": 4,
140
+ "group_size": 128
141
+ },
142
+ "model.layers.4.self_attn.q_a_proj": {
143
+ "bits": 4,
144
+ "group_size": 128
145
+ },
146
+ "model.layers.4.self_attn.q_b_proj": {
147
+ "bits": 4,
148
+ "group_size": 128
149
+ },
150
+ "model.layers.4.self_attn.kv_a_proj_with_mqa": {
151
+ "bits": 4,
152
+ "group_size": 128
153
+ },
154
+ "model.layers.4.self_attn.kv_b_proj": {
155
+ "bits": 4,
156
+ "group_size": 128
157
+ },
158
+ "model.layers.4.self_attn.o_proj": {
159
+ "bits": 4,
160
+ "group_size": 128
161
+ },
162
+ "model.layers.4.mlp.shared_experts.gate_proj": {
163
+ "bits": 4,
164
+ "group_size": 128
165
+ },
166
+ "model.layers.4.mlp.shared_experts.up_proj": {
167
+ "bits": 4,
168
+ "group_size": 128
169
+ },
170
+ "model.layers.4.mlp.shared_experts.down_proj": {
171
+ "bits": 4,
172
+ "group_size": 128
173
+ },
174
+ "model.layers.5.self_attn.q_a_proj": {
175
+ "bits": 4,
176
+ "group_size": 128
177
+ },
178
+ "model.layers.5.self_attn.q_b_proj": {
179
+ "bits": 4,
180
+ "group_size": 128
181
+ },
182
+ "model.layers.5.self_attn.kv_a_proj_with_mqa": {
183
+ "bits": 4,
184
+ "group_size": 128
185
+ },
186
+ "model.layers.5.self_attn.kv_b_proj": {
187
+ "bits": 4,
188
+ "group_size": 128
189
+ },
190
+ "model.layers.5.self_attn.o_proj": {
191
+ "bits": 4,
192
+ "group_size": 128
193
+ },
194
+ "model.layers.5.mlp.shared_experts.gate_proj": {
195
+ "bits": 4,
196
+ "group_size": 128
197
+ },
198
+ "model.layers.5.mlp.shared_experts.up_proj": {
199
+ "bits": 4,
200
+ "group_size": 128
201
+ },
202
+ "model.layers.5.mlp.shared_experts.down_proj": {
203
+ "bits": 4,
204
+ "group_size": 128
205
+ },
206
+ "model.layers.6.self_attn.q_a_proj": {
207
+ "bits": 4,
208
+ "group_size": 128
209
+ },
210
+ "model.layers.6.self_attn.q_b_proj": {
211
+ "bits": 4,
212
+ "group_size": 128
213
+ },
214
+ "model.layers.6.self_attn.kv_a_proj_with_mqa": {
215
+ "bits": 4,
216
+ "group_size": 128
217
+ },
218
+ "model.layers.6.self_attn.kv_b_proj": {
219
+ "bits": 4,
220
+ "group_size": 128
221
+ },
222
+ "model.layers.6.self_attn.o_proj": {
223
+ "bits": 4,
224
+ "group_size": 128
225
+ },
226
+ "model.layers.6.mlp.shared_experts.gate_proj": {
227
+ "bits": 4,
228
+ "group_size": 128
229
+ },
230
+ "model.layers.6.mlp.shared_experts.up_proj": {
231
+ "bits": 4,
232
+ "group_size": 128
233
+ },
234
+ "model.layers.6.mlp.shared_experts.down_proj": {
235
+ "bits": 4,
236
+ "group_size": 128
237
+ },
238
+ "model.layers.7.self_attn.q_a_proj": {
239
+ "bits": 4,
240
+ "group_size": 128
241
+ },
242
+ "model.layers.7.self_attn.q_b_proj": {
243
+ "bits": 4,
244
+ "group_size": 128
245
+ },
246
+ "model.layers.7.self_attn.kv_a_proj_with_mqa": {
247
+ "bits": 4,
248
+ "group_size": 128
249
+ },
250
+ "model.layers.7.self_attn.kv_b_proj": {
251
+ "bits": 4,
252
+ "group_size": 128
253
+ },
254
+ "model.layers.7.self_attn.o_proj": {
255
+ "bits": 4,
256
+ "group_size": 128
257
+ },
258
+ "model.layers.7.mlp.shared_experts.gate_proj": {
259
+ "bits": 4,
260
+ "group_size": 128
261
+ },
262
+ "model.layers.7.mlp.shared_experts.up_proj": {
263
+ "bits": 4,
264
+ "group_size": 128
265
+ },
266
+ "model.layers.7.mlp.shared_experts.down_proj": {
267
+ "bits": 4,
268
+ "group_size": 128
269
+ },
270
+ "model.layers.8.self_attn.q_a_proj": {
271
+ "bits": 4,
272
+ "group_size": 128
273
+ },
274
+ "model.layers.8.self_attn.q_b_proj": {
275
+ "bits": 4,
276
+ "group_size": 128
277
+ },
278
+ "model.layers.8.self_attn.kv_a_proj_with_mqa": {
279
+ "bits": 4,
280
+ "group_size": 128
281
+ },
282
+ "model.layers.8.self_attn.kv_b_proj": {
283
+ "bits": 4,
284
+ "group_size": 128
285
+ },
286
+ "model.layers.8.self_attn.o_proj": {
287
+ "bits": 4,
288
+ "group_size": 128
289
+ },
290
+ "model.layers.8.mlp.shared_experts.gate_proj": {
291
+ "bits": 4,
292
+ "group_size": 128
293
+ },
294
+ "model.layers.8.mlp.shared_experts.up_proj": {
295
+ "bits": 4,
296
+ "group_size": 128
297
+ },
298
+ "model.layers.8.mlp.shared_experts.down_proj": {
299
+ "bits": 4,
300
+ "group_size": 128
301
+ },
302
+ "model.layers.9.self_attn.q_a_proj": {
303
+ "bits": 4,
304
+ "group_size": 128
305
+ },
306
+ "model.layers.9.self_attn.q_b_proj": {
307
+ "bits": 4,
308
+ "group_size": 128
309
+ },
310
+ "model.layers.9.self_attn.kv_a_proj_with_mqa": {
311
+ "bits": 4,
312
+ "group_size": 128
313
+ },
314
+ "model.layers.9.self_attn.kv_b_proj": {
315
+ "bits": 4,
316
+ "group_size": 128
317
+ },
318
+ "model.layers.9.self_attn.o_proj": {
319
+ "bits": 4,
320
+ "group_size": 128
321
+ },
322
+ "model.layers.9.mlp.shared_experts.gate_proj": {
323
+ "bits": 4,
324
+ "group_size": 128
325
+ },
326
+ "model.layers.9.mlp.shared_experts.up_proj": {
327
+ "bits": 4,
328
+ "group_size": 128
329
+ },
330
+ "model.layers.9.mlp.shared_experts.down_proj": {
331
+ "bits": 4,
332
+ "group_size": 128
333
+ },
334
+ "model.layers.10.self_attn.q_a_proj": {
335
+ "bits": 4,
336
+ "group_size": 128
337
+ },
338
+ "model.layers.10.self_attn.q_b_proj": {
339
+ "bits": 4,
340
+ "group_size": 128
341
+ },
342
+ "model.layers.10.self_attn.kv_a_proj_with_mqa": {
343
+ "bits": 4,
344
+ "group_size": 128
345
+ },
346
+ "model.layers.10.self_attn.kv_b_proj": {
347
+ "bits": 4,
348
+ "group_size": 128
349
+ },
350
+ "model.layers.10.self_attn.o_proj": {
351
+ "bits": 4,
352
+ "group_size": 128
353
+ },
354
+ "model.layers.10.mlp.shared_experts.gate_proj": {
355
+ "bits": 4,
356
+ "group_size": 128
357
+ },
358
+ "model.layers.10.mlp.shared_experts.up_proj": {
359
+ "bits": 4,
360
+ "group_size": 128
361
+ },
362
+ "model.layers.10.mlp.shared_experts.down_proj": {
363
+ "bits": 4,
364
+ "group_size": 128
365
+ },
366
+ "model.layers.11.self_attn.q_a_proj": {
367
+ "bits": 4,
368
+ "group_size": 128
369
+ },
370
+ "model.layers.11.self_attn.q_b_proj": {
371
+ "bits": 4,
372
+ "group_size": 128
373
+ },
374
+ "model.layers.11.self_attn.kv_a_proj_with_mqa": {
375
+ "bits": 4,
376
+ "group_size": 128
377
+ },
378
+ "model.layers.11.self_attn.kv_b_proj": {
379
+ "bits": 4,
380
+ "group_size": 128
381
+ },
382
+ "model.layers.11.self_attn.o_proj": {
383
+ "bits": 4,
384
+ "group_size": 128
385
+ },
386
+ "model.layers.11.mlp.shared_experts.gate_proj": {
387
+ "bits": 4,
388
+ "group_size": 128
389
+ },
390
+ "model.layers.11.mlp.shared_experts.up_proj": {
391
+ "bits": 4,
392
+ "group_size": 128
393
+ },
394
+ "model.layers.11.mlp.shared_experts.down_proj": {
395
+ "bits": 4,
396
+ "group_size": 128
397
+ },
398
+ "model.layers.12.self_attn.q_a_proj": {
399
+ "bits": 4,
400
+ "group_size": 128
401
+ },
402
+ "model.layers.12.self_attn.q_b_proj": {
403
+ "bits": 4,
404
+ "group_size": 128
405
+ },
406
+ "model.layers.12.self_attn.kv_a_proj_with_mqa": {
407
+ "bits": 4,
408
+ "group_size": 128
409
+ },
410
+ "model.layers.12.self_attn.kv_b_proj": {
411
+ "bits": 4,
412
+ "group_size": 128
413
+ },
414
+ "model.layers.12.self_attn.o_proj": {
415
+ "bits": 4,
416
+ "group_size": 128
417
+ },
418
+ "model.layers.12.mlp.shared_experts.gate_proj": {
419
+ "bits": 4,
420
+ "group_size": 128
421
+ },
422
+ "model.layers.12.mlp.shared_experts.up_proj": {
423
+ "bits": 4,
424
+ "group_size": 128
425
+ },
426
+ "model.layers.12.mlp.shared_experts.down_proj": {
427
+ "bits": 4,
428
+ "group_size": 128
429
+ },
430
+ "model.layers.13.self_attn.q_a_proj": {
431
+ "bits": 4,
432
+ "group_size": 128
433
+ },
434
+ "model.layers.13.self_attn.q_b_proj": {
435
+ "bits": 4,
436
+ "group_size": 128
437
+ },
438
+ "model.layers.13.self_attn.kv_a_proj_with_mqa": {
439
+ "bits": 4,
440
+ "group_size": 128
441
+ },
442
+ "model.layers.13.self_attn.kv_b_proj": {
443
+ "bits": 4,
444
+ "group_size": 128
445
+ },
446
+ "model.layers.13.self_attn.o_proj": {
447
+ "bits": 4,
448
+ "group_size": 128
449
+ },
450
+ "model.layers.13.mlp.shared_experts.gate_proj": {
451
+ "bits": 4,
452
+ "group_size": 128
453
+ },
454
+ "model.layers.13.mlp.shared_experts.up_proj": {
455
+ "bits": 4,
456
+ "group_size": 128
457
+ },
458
+ "model.layers.13.mlp.shared_experts.down_proj": {
459
+ "bits": 4,
460
+ "group_size": 128
461
+ },
462
+ "model.layers.14.self_attn.q_a_proj": {
463
+ "bits": 4,
464
+ "group_size": 128
465
+ },
466
+ "model.layers.14.self_attn.q_b_proj": {
467
+ "bits": 4,
468
+ "group_size": 128
469
+ },
470
+ "model.layers.14.self_attn.kv_a_proj_with_mqa": {
471
+ "bits": 4,
472
+ "group_size": 128
473
+ },
474
+ "model.layers.14.self_attn.kv_b_proj": {
475
+ "bits": 4,
476
+ "group_size": 128
477
+ },
478
+ "model.layers.14.self_attn.o_proj": {
479
+ "bits": 4,
480
+ "group_size": 128
481
+ },
482
+ "model.layers.14.mlp.shared_experts.gate_proj": {
483
+ "bits": 4,
484
+ "group_size": 128
485
+ },
486
+ "model.layers.14.mlp.shared_experts.up_proj": {
487
+ "bits": 4,
488
+ "group_size": 128
489
+ },
490
+ "model.layers.14.mlp.shared_experts.down_proj": {
491
+ "bits": 4,
492
+ "group_size": 128
493
+ },
494
+ "model.layers.15.self_attn.q_a_proj": {
495
+ "bits": 4,
496
+ "group_size": 128
497
+ },
498
+ "model.layers.15.self_attn.q_b_proj": {
499
+ "bits": 4,
500
+ "group_size": 128
501
+ },
502
+ "model.layers.15.self_attn.kv_a_proj_with_mqa": {
503
+ "bits": 4,
504
+ "group_size": 128
505
+ },
506
+ "model.layers.15.self_attn.kv_b_proj": {
507
+ "bits": 4,
508
+ "group_size": 128
509
+ },
510
+ "model.layers.15.self_attn.o_proj": {
511
+ "bits": 4,
512
+ "group_size": 128
513
+ },
514
+ "model.layers.15.mlp.shared_experts.gate_proj": {
515
+ "bits": 4,
516
+ "group_size": 128
517
+ },
518
+ "model.layers.15.mlp.shared_experts.up_proj": {
519
+ "bits": 4,
520
+ "group_size": 128
521
+ },
522
+ "model.layers.15.mlp.shared_experts.down_proj": {
523
+ "bits": 4,
524
+ "group_size": 128
525
+ },
526
+ "model.layers.16.self_attn.q_a_proj": {
527
+ "bits": 4,
528
+ "group_size": 128
529
+ },
530
+ "model.layers.16.self_attn.q_b_proj": {
531
+ "bits": 4,
532
+ "group_size": 128
533
+ },
534
+ "model.layers.16.self_attn.kv_a_proj_with_mqa": {
535
+ "bits": 4,
536
+ "group_size": 128
537
+ },
538
+ "model.layers.16.self_attn.kv_b_proj": {
539
+ "bits": 4,
540
+ "group_size": 128
541
+ },
542
+ "model.layers.16.self_attn.o_proj": {
543
+ "bits": 4,
544
+ "group_size": 128
545
+ },
546
+ "model.layers.16.mlp.shared_experts.gate_proj": {
547
+ "bits": 4,
548
+ "group_size": 128
549
+ },
550
+ "model.layers.16.mlp.shared_experts.up_proj": {
551
+ "bits": 4,
552
+ "group_size": 128
553
+ },
554
+ "model.layers.16.mlp.shared_experts.down_proj": {
555
+ "bits": 4,
556
+ "group_size": 128
557
+ },
558
+ "model.layers.17.self_attn.q_a_proj": {
559
+ "bits": 4,
560
+ "group_size": 128
561
+ },
562
+ "model.layers.17.self_attn.q_b_proj": {
563
+ "bits": 4,
564
+ "group_size": 128
565
+ },
566
+ "model.layers.17.self_attn.kv_a_proj_with_mqa": {
567
+ "bits": 4,
568
+ "group_size": 128
569
+ },
570
+ "model.layers.17.self_attn.kv_b_proj": {
571
+ "bits": 4,
572
+ "group_size": 128
573
+ },
574
+ "model.layers.17.self_attn.o_proj": {
575
+ "bits": 4,
576
+ "group_size": 128
577
+ },
578
+ "model.layers.17.mlp.shared_experts.gate_proj": {
579
+ "bits": 4,
580
+ "group_size": 128
581
+ },
582
+ "model.layers.17.mlp.shared_experts.up_proj": {
583
+ "bits": 4,
584
+ "group_size": 128
585
+ },
586
+ "model.layers.17.mlp.shared_experts.down_proj": {
587
+ "bits": 4,
588
+ "group_size": 128
589
+ },
590
+ "model.layers.18.self_attn.q_a_proj": {
591
+ "bits": 4,
592
+ "group_size": 128
593
+ },
594
+ "model.layers.18.self_attn.q_b_proj": {
595
+ "bits": 4,
596
+ "group_size": 128
597
+ },
598
+ "model.layers.18.self_attn.kv_a_proj_with_mqa": {
599
+ "bits": 4,
600
+ "group_size": 128
601
+ },
602
+ "model.layers.18.self_attn.kv_b_proj": {
603
+ "bits": 4,
604
+ "group_size": 128
605
+ },
606
+ "model.layers.18.self_attn.o_proj": {
607
+ "bits": 4,
608
+ "group_size": 128
609
+ },
610
+ "model.layers.18.mlp.shared_experts.gate_proj": {
611
+ "bits": 4,
612
+ "group_size": 128
613
+ },
614
+ "model.layers.18.mlp.shared_experts.up_proj": {
615
+ "bits": 4,
616
+ "group_size": 128
617
+ },
618
+ "model.layers.18.mlp.shared_experts.down_proj": {
619
+ "bits": 4,
620
+ "group_size": 128
621
+ },
622
+ "model.layers.19.self_attn.q_a_proj": {
623
+ "bits": 4,
624
+ "group_size": 128
625
+ },
626
+ "model.layers.19.self_attn.q_b_proj": {
627
+ "bits": 4,
628
+ "group_size": 128
629
+ },
630
+ "model.layers.19.self_attn.kv_a_proj_with_mqa": {
631
+ "bits": 4,
632
+ "group_size": 128
633
+ },
634
+ "model.layers.19.self_attn.kv_b_proj": {
635
+ "bits": 4,
636
+ "group_size": 128
637
+ },
638
+ "model.layers.19.self_attn.o_proj": {
639
+ "bits": 4,
640
+ "group_size": 128
641
+ },
642
+ "model.layers.19.mlp.shared_experts.gate_proj": {
643
+ "bits": 4,
644
+ "group_size": 128
645
+ },
646
+ "model.layers.19.mlp.shared_experts.up_proj": {
647
+ "bits": 4,
648
+ "group_size": 128
649
+ },
650
+ "model.layers.19.mlp.shared_experts.down_proj": {
651
+ "bits": 4,
652
+ "group_size": 128
653
+ },
654
+ "model.layers.20.self_attn.q_a_proj": {
655
+ "bits": 4,
656
+ "group_size": 128
657
+ },
658
+ "model.layers.20.self_attn.q_b_proj": {
659
+ "bits": 4,
660
+ "group_size": 128
661
+ },
662
+ "model.layers.20.self_attn.kv_a_proj_with_mqa": {
663
+ "bits": 4,
664
+ "group_size": 128
665
+ },
666
+ "model.layers.20.self_attn.kv_b_proj": {
667
+ "bits": 4,
668
+ "group_size": 128
669
+ },
670
+ "model.layers.20.self_attn.o_proj": {
671
+ "bits": 4,
672
+ "group_size": 128
673
+ },
674
+ "model.layers.20.mlp.shared_experts.gate_proj": {
675
+ "bits": 4,
676
+ "group_size": 128
677
+ },
678
+ "model.layers.20.mlp.shared_experts.up_proj": {
679
+ "bits": 4,
680
+ "group_size": 128
681
+ },
682
+ "model.layers.20.mlp.shared_experts.down_proj": {
683
+ "bits": 4,
684
+ "group_size": 128
685
+ },
686
+ "model.layers.21.self_attn.q_a_proj": {
687
+ "bits": 4,
688
+ "group_size": 128
689
+ },
690
+ "model.layers.21.self_attn.q_b_proj": {
691
+ "bits": 4,
692
+ "group_size": 128
693
+ },
694
+ "model.layers.21.self_attn.kv_a_proj_with_mqa": {
695
+ "bits": 4,
696
+ "group_size": 128
697
+ },
698
+ "model.layers.21.self_attn.kv_b_proj": {
699
+ "bits": 4,
700
+ "group_size": 128
701
+ },
702
+ "model.layers.21.self_attn.o_proj": {
703
+ "bits": 4,
704
+ "group_size": 128
705
+ },
706
+ "model.layers.21.mlp.shared_experts.gate_proj": {
707
+ "bits": 4,
708
+ "group_size": 128
709
+ },
710
+ "model.layers.21.mlp.shared_experts.up_proj": {
711
+ "bits": 4,
712
+ "group_size": 128
713
+ },
714
+ "model.layers.21.mlp.shared_experts.down_proj": {
715
+ "bits": 4,
716
+ "group_size": 128
717
+ },
718
+ "model.layers.22.self_attn.q_a_proj": {
719
+ "bits": 4,
720
+ "group_size": 128
721
+ },
722
+ "model.layers.22.self_attn.q_b_proj": {
723
+ "bits": 4,
724
+ "group_size": 128
725
+ },
726
+ "model.layers.22.self_attn.kv_a_proj_with_mqa": {
727
+ "bits": 4,
728
+ "group_size": 128
729
+ },
730
+ "model.layers.22.self_attn.kv_b_proj": {
731
+ "bits": 4,
732
+ "group_size": 128
733
+ },
734
+ "model.layers.22.self_attn.o_proj": {
735
+ "bits": 4,
736
+ "group_size": 128
737
+ },
738
+ "model.layers.22.mlp.shared_experts.gate_proj": {
739
+ "bits": 4,
740
+ "group_size": 128
741
+ },
742
+ "model.layers.22.mlp.shared_experts.up_proj": {
743
+ "bits": 4,
744
+ "group_size": 128
745
+ },
746
+ "model.layers.22.mlp.shared_experts.down_proj": {
747
+ "bits": 4,
748
+ "group_size": 128
749
+ },
750
+ "model.layers.23.self_attn.q_a_proj": {
751
+ "bits": 4,
752
+ "group_size": 128
753
+ },
754
+ "model.layers.23.self_attn.q_b_proj": {
755
+ "bits": 4,
756
+ "group_size": 128
757
+ },
758
+ "model.layers.23.self_attn.kv_a_proj_with_mqa": {
759
+ "bits": 4,
760
+ "group_size": 128
761
+ },
762
+ "model.layers.23.self_attn.kv_b_proj": {
763
+ "bits": 4,
764
+ "group_size": 128
765
+ },
766
+ "model.layers.23.self_attn.o_proj": {
767
+ "bits": 4,
768
+ "group_size": 128
769
+ },
770
+ "model.layers.23.mlp.shared_experts.gate_proj": {
771
+ "bits": 4,
772
+ "group_size": 128
773
+ },
774
+ "model.layers.23.mlp.shared_experts.up_proj": {
775
+ "bits": 4,
776
+ "group_size": 128
777
+ },
778
+ "model.layers.23.mlp.shared_experts.down_proj": {
779
+ "bits": 4,
780
+ "group_size": 128
781
+ },
782
+ "model.layers.24.self_attn.q_a_proj": {
783
+ "bits": 4,
784
+ "group_size": 128
785
+ },
786
+ "model.layers.24.self_attn.q_b_proj": {
787
+ "bits": 4,
788
+ "group_size": 128
789
+ },
790
+ "model.layers.24.self_attn.kv_a_proj_with_mqa": {
791
+ "bits": 4,
792
+ "group_size": 128
793
+ },
794
+ "model.layers.24.self_attn.kv_b_proj": {
795
+ "bits": 4,
796
+ "group_size": 128
797
+ },
798
+ "model.layers.24.self_attn.o_proj": {
799
+ "bits": 4,
800
+ "group_size": 128
801
+ },
802
+ "model.layers.24.mlp.shared_experts.gate_proj": {
803
+ "bits": 4,
804
+ "group_size": 128
805
+ },
806
+ "model.layers.24.mlp.shared_experts.up_proj": {
807
+ "bits": 4,
808
+ "group_size": 128
809
+ },
810
+ "model.layers.24.mlp.shared_experts.down_proj": {
811
+ "bits": 4,
812
+ "group_size": 128
813
+ },
814
+ "model.layers.25.self_attn.q_a_proj": {
815
+ "bits": 4,
816
+ "group_size": 128
817
+ },
818
+ "model.layers.25.self_attn.q_b_proj": {
819
+ "bits": 4,
820
+ "group_size": 128
821
+ },
822
+ "model.layers.25.self_attn.kv_a_proj_with_mqa": {
823
+ "bits": 4,
824
+ "group_size": 128
825
+ },
826
+ "model.layers.25.self_attn.kv_b_proj": {
827
+ "bits": 4,
828
+ "group_size": 128
829
+ },
830
+ "model.layers.25.self_attn.o_proj": {
831
+ "bits": 4,
832
+ "group_size": 128
833
+ },
834
+ "model.layers.25.mlp.shared_experts.gate_proj": {
835
+ "bits": 4,
836
+ "group_size": 128
837
+ },
838
+ "model.layers.25.mlp.shared_experts.up_proj": {
839
+ "bits": 4,
840
+ "group_size": 128
841
+ },
842
+ "model.layers.25.mlp.shared_experts.down_proj": {
843
+ "bits": 4,
844
+ "group_size": 128
845
+ },
846
+ "model.layers.26.self_attn.q_a_proj": {
847
+ "bits": 4,
848
+ "group_size": 128
849
+ },
850
+ "model.layers.26.self_attn.q_b_proj": {
851
+ "bits": 4,
852
+ "group_size": 128
853
+ },
854
+ "model.layers.26.self_attn.kv_a_proj_with_mqa": {
855
+ "bits": 4,
856
+ "group_size": 128
857
+ },
858
+ "model.layers.26.self_attn.kv_b_proj": {
859
+ "bits": 4,
860
+ "group_size": 128
861
+ },
862
+ "model.layers.26.self_attn.o_proj": {
863
+ "bits": 4,
864
+ "group_size": 128
865
+ },
866
+ "model.layers.26.mlp.shared_experts.gate_proj": {
867
+ "bits": 4,
868
+ "group_size": 128
869
+ },
870
+ "model.layers.26.mlp.shared_experts.up_proj": {
871
+ "bits": 4,
872
+ "group_size": 128
873
+ },
874
+ "model.layers.26.mlp.shared_experts.down_proj": {
875
+ "bits": 4,
876
+ "group_size": 128
877
+ },
878
+ "model.layers.27.self_attn.q_a_proj": {
879
+ "bits": 4,
880
+ "group_size": 128
881
+ },
882
+ "model.layers.27.self_attn.q_b_proj": {
883
+ "bits": 4,
884
+ "group_size": 128
885
+ },
886
+ "model.layers.27.self_attn.kv_a_proj_with_mqa": {
887
+ "bits": 4,
888
+ "group_size": 128
889
+ },
890
+ "model.layers.27.self_attn.kv_b_proj": {
891
+ "bits": 4,
892
+ "group_size": 128
893
+ },
894
+ "model.layers.27.self_attn.o_proj": {
895
+ "bits": 4,
896
+ "group_size": 128
897
+ },
898
+ "model.layers.27.mlp.shared_experts.gate_proj": {
899
+ "bits": 4,
900
+ "group_size": 128
901
+ },
902
+ "model.layers.27.mlp.shared_experts.up_proj": {
903
+ "bits": 4,
904
+ "group_size": 128
905
+ },
906
+ "model.layers.27.mlp.shared_experts.down_proj": {
907
+ "bits": 4,
908
+ "group_size": 128
909
+ },
910
+ "model.layers.28.self_attn.q_a_proj": {
911
+ "bits": 4,
912
+ "group_size": 128
913
+ },
914
+ "model.layers.28.self_attn.q_b_proj": {
915
+ "bits": 4,
916
+ "group_size": 128
917
+ },
918
+ "model.layers.28.self_attn.kv_a_proj_with_mqa": {
919
+ "bits": 4,
920
+ "group_size": 128
921
+ },
922
+ "model.layers.28.self_attn.kv_b_proj": {
923
+ "bits": 4,
924
+ "group_size": 128
925
+ },
926
+ "model.layers.28.self_attn.o_proj": {
927
+ "bits": 4,
928
+ "group_size": 128
929
+ },
930
+ "model.layers.28.mlp.shared_experts.gate_proj": {
931
+ "bits": 4,
932
+ "group_size": 128
933
+ },
934
+ "model.layers.28.mlp.shared_experts.up_proj": {
935
+ "bits": 4,
936
+ "group_size": 128
937
+ },
938
+ "model.layers.28.mlp.shared_experts.down_proj": {
939
+ "bits": 4,
940
+ "group_size": 128
941
+ },
942
+ "model.layers.29.self_attn.q_a_proj": {
943
+ "bits": 4,
944
+ "group_size": 128
945
+ },
946
+ "model.layers.29.self_attn.q_b_proj": {
947
+ "bits": 4,
948
+ "group_size": 128
949
+ },
950
+ "model.layers.29.self_attn.kv_a_proj_with_mqa": {
951
+ "bits": 4,
952
+ "group_size": 128
953
+ },
954
+ "model.layers.29.self_attn.kv_b_proj": {
955
+ "bits": 4,
956
+ "group_size": 128
957
+ },
958
+ "model.layers.29.self_attn.o_proj": {
959
+ "bits": 4,
960
+ "group_size": 128
961
+ },
962
+ "model.layers.29.mlp.shared_experts.gate_proj": {
963
+ "bits": 4,
964
+ "group_size": 128
965
+ },
966
+ "model.layers.29.mlp.shared_experts.up_proj": {
967
+ "bits": 4,
968
+ "group_size": 128
969
+ },
970
+ "model.layers.29.mlp.shared_experts.down_proj": {
971
+ "bits": 4,
972
+ "group_size": 128
973
+ },
974
+ "model.layers.30.self_attn.q_a_proj": {
975
+ "bits": 4,
976
+ "group_size": 128
977
+ },
978
+ "model.layers.30.self_attn.q_b_proj": {
979
+ "bits": 4,
980
+ "group_size": 128
981
+ },
982
+ "model.layers.30.self_attn.kv_a_proj_with_mqa": {
983
+ "bits": 4,
984
+ "group_size": 128
985
+ },
986
+ "model.layers.30.self_attn.kv_b_proj": {
987
+ "bits": 4,
988
+ "group_size": 128
989
+ },
990
+ "model.layers.30.self_attn.o_proj": {
991
+ "bits": 4,
992
+ "group_size": 128
993
+ },
994
+ "model.layers.30.mlp.shared_experts.gate_proj": {
995
+ "bits": 4,
996
+ "group_size": 128
997
+ },
998
+ "model.layers.30.mlp.shared_experts.up_proj": {
999
+ "bits": 4,
1000
+ "group_size": 128
1001
+ },
1002
+ "model.layers.30.mlp.shared_experts.down_proj": {
1003
+ "bits": 4,
1004
+ "group_size": 128
1005
+ },
1006
+ "model.layers.31.self_attn.q_a_proj": {
1007
+ "bits": 4,
1008
+ "group_size": 128
1009
+ },
1010
+ "model.layers.31.self_attn.q_b_proj": {
1011
+ "bits": 4,
1012
+ "group_size": 128
1013
+ },
1014
+ "model.layers.31.self_attn.kv_a_proj_with_mqa": {
1015
+ "bits": 4,
1016
+ "group_size": 128
1017
+ },
1018
+ "model.layers.31.self_attn.kv_b_proj": {
1019
+ "bits": 4,
1020
+ "group_size": 128
1021
+ },
1022
+ "model.layers.31.self_attn.o_proj": {
1023
+ "bits": 4,
1024
+ "group_size": 128
1025
+ },
1026
+ "model.layers.31.mlp.shared_experts.gate_proj": {
1027
+ "bits": 4,
1028
+ "group_size": 128
1029
+ },
1030
+ "model.layers.31.mlp.shared_experts.up_proj": {
1031
+ "bits": 4,
1032
+ "group_size": 128
1033
+ },
1034
+ "model.layers.31.mlp.shared_experts.down_proj": {
1035
+ "bits": 4,
1036
+ "group_size": 128
1037
+ },
1038
+ "model.layers.32.self_attn.q_a_proj": {
1039
+ "bits": 4,
1040
+ "group_size": 128
1041
+ },
1042
+ "model.layers.32.self_attn.q_b_proj": {
1043
+ "bits": 4,
1044
+ "group_size": 128
1045
+ },
1046
+ "model.layers.32.self_attn.kv_a_proj_with_mqa": {
1047
+ "bits": 4,
1048
+ "group_size": 128
1049
+ },
1050
+ "model.layers.32.self_attn.kv_b_proj": {
1051
+ "bits": 4,
1052
+ "group_size": 128
1053
+ },
1054
+ "model.layers.32.self_attn.o_proj": {
1055
+ "bits": 4,
1056
+ "group_size": 128
1057
+ },
1058
+ "model.layers.32.mlp.shared_experts.gate_proj": {
1059
+ "bits": 4,
1060
+ "group_size": 128
1061
+ },
1062
+ "model.layers.32.mlp.shared_experts.up_proj": {
1063
+ "bits": 4,
1064
+ "group_size": 128
1065
+ },
1066
+ "model.layers.32.mlp.shared_experts.down_proj": {
1067
+ "bits": 4,
1068
+ "group_size": 128
1069
+ },
1070
+ "model.layers.33.self_attn.q_a_proj": {
1071
+ "bits": 4,
1072
+ "group_size": 128
1073
+ },
1074
+ "model.layers.33.self_attn.q_b_proj": {
1075
+ "bits": 4,
1076
+ "group_size": 128
1077
+ },
1078
+ "model.layers.33.self_attn.kv_a_proj_with_mqa": {
1079
+ "bits": 4,
1080
+ "group_size": 128
1081
+ },
1082
+ "model.layers.33.self_attn.kv_b_proj": {
1083
+ "bits": 4,
1084
+ "group_size": 128
1085
+ },
1086
+ "model.layers.33.self_attn.o_proj": {
1087
+ "bits": 4,
1088
+ "group_size": 128
1089
+ },
1090
+ "model.layers.33.mlp.shared_experts.gate_proj": {
1091
+ "bits": 4,
1092
+ "group_size": 128
1093
+ },
1094
+ "model.layers.33.mlp.shared_experts.up_proj": {
1095
+ "bits": 4,
1096
+ "group_size": 128
1097
+ },
1098
+ "model.layers.33.mlp.shared_experts.down_proj": {
1099
+ "bits": 4,
1100
+ "group_size": 128
1101
+ },
1102
+ "model.layers.34.self_attn.q_a_proj": {
1103
+ "bits": 4,
1104
+ "group_size": 128
1105
+ },
1106
+ "model.layers.34.self_attn.q_b_proj": {
1107
+ "bits": 4,
1108
+ "group_size": 128
1109
+ },
1110
+ "model.layers.34.self_attn.kv_a_proj_with_mqa": {
1111
+ "bits": 4,
1112
+ "group_size": 128
1113
+ },
1114
+ "model.layers.34.self_attn.kv_b_proj": {
1115
+ "bits": 4,
1116
+ "group_size": 128
1117
+ },
1118
+ "model.layers.34.self_attn.o_proj": {
1119
+ "bits": 4,
1120
+ "group_size": 128
1121
+ },
1122
+ "model.layers.34.mlp.shared_experts.gate_proj": {
1123
+ "bits": 4,
1124
+ "group_size": 128
1125
+ },
1126
+ "model.layers.34.mlp.shared_experts.up_proj": {
1127
+ "bits": 4,
1128
+ "group_size": 128
1129
+ },
1130
+ "model.layers.34.mlp.shared_experts.down_proj": {
1131
+ "bits": 4,
1132
+ "group_size": 128
1133
+ },
1134
+ "model.layers.35.self_attn.q_a_proj": {
1135
+ "bits": 4,
1136
+ "group_size": 128
1137
+ },
1138
+ "model.layers.35.self_attn.q_b_proj": {
1139
+ "bits": 4,
1140
+ "group_size": 128
1141
+ },
1142
+ "model.layers.35.self_attn.kv_a_proj_with_mqa": {
1143
+ "bits": 4,
1144
+ "group_size": 128
1145
+ },
1146
+ "model.layers.35.self_attn.kv_b_proj": {
1147
+ "bits": 4,
1148
+ "group_size": 128
1149
+ },
1150
+ "model.layers.35.self_attn.o_proj": {
1151
+ "bits": 4,
1152
+ "group_size": 128
1153
+ },
1154
+ "model.layers.35.mlp.shared_experts.gate_proj": {
1155
+ "bits": 4,
1156
+ "group_size": 128
1157
+ },
1158
+ "model.layers.35.mlp.shared_experts.up_proj": {
1159
+ "bits": 4,
1160
+ "group_size": 128
1161
+ },
1162
+ "model.layers.35.mlp.shared_experts.down_proj": {
1163
+ "bits": 4,
1164
+ "group_size": 128
1165
+ },
1166
+ "model.layers.36.self_attn.q_a_proj": {
1167
+ "bits": 4,
1168
+ "group_size": 128
1169
+ },
1170
+ "model.layers.36.self_attn.q_b_proj": {
1171
+ "bits": 4,
1172
+ "group_size": 128
1173
+ },
1174
+ "model.layers.36.self_attn.kv_a_proj_with_mqa": {
1175
+ "bits": 4,
1176
+ "group_size": 128
1177
+ },
1178
+ "model.layers.36.self_attn.kv_b_proj": {
1179
+ "bits": 4,
1180
+ "group_size": 128
1181
+ },
1182
+ "model.layers.36.self_attn.o_proj": {
1183
+ "bits": 4,
1184
+ "group_size": 128
1185
+ },
1186
+ "model.layers.36.mlp.shared_experts.gate_proj": {
1187
+ "bits": 4,
1188
+ "group_size": 128
1189
+ },
1190
+ "model.layers.36.mlp.shared_experts.up_proj": {
1191
+ "bits": 4,
1192
+ "group_size": 128
1193
+ },
1194
+ "model.layers.36.mlp.shared_experts.down_proj": {
1195
+ "bits": 4,
1196
+ "group_size": 128
1197
+ },
1198
+ "model.layers.37.self_attn.q_a_proj": {
1199
+ "bits": 4,
1200
+ "group_size": 128
1201
+ },
1202
+ "model.layers.37.self_attn.q_b_proj": {
1203
+ "bits": 4,
1204
+ "group_size": 128
1205
+ },
1206
+ "model.layers.37.self_attn.kv_a_proj_with_mqa": {
1207
+ "bits": 4,
1208
+ "group_size": 128
1209
+ },
1210
+ "model.layers.37.self_attn.kv_b_proj": {
1211
+ "bits": 4,
1212
+ "group_size": 128
1213
+ },
1214
+ "model.layers.37.self_attn.o_proj": {
1215
+ "bits": 4,
1216
+ "group_size": 128
1217
+ },
1218
+ "model.layers.37.mlp.shared_experts.gate_proj": {
1219
+ "bits": 4,
1220
+ "group_size": 128
1221
+ },
1222
+ "model.layers.37.mlp.shared_experts.up_proj": {
1223
+ "bits": 4,
1224
+ "group_size": 128
1225
+ },
1226
+ "model.layers.37.mlp.shared_experts.down_proj": {
1227
+ "bits": 4,
1228
+ "group_size": 128
1229
+ },
1230
+ "model.layers.38.self_attn.q_a_proj": {
1231
+ "bits": 4,
1232
+ "group_size": 128
1233
+ },
1234
+ "model.layers.38.self_attn.q_b_proj": {
1235
+ "bits": 4,
1236
+ "group_size": 128
1237
+ },
1238
+ "model.layers.38.self_attn.kv_a_proj_with_mqa": {
1239
+ "bits": 4,
1240
+ "group_size": 128
1241
+ },
1242
+ "model.layers.38.self_attn.kv_b_proj": {
1243
+ "bits": 4,
1244
+ "group_size": 128
1245
+ },
1246
+ "model.layers.38.self_attn.o_proj": {
1247
+ "bits": 4,
1248
+ "group_size": 128
1249
+ },
1250
+ "model.layers.38.mlp.shared_experts.gate_proj": {
1251
+ "bits": 4,
1252
+ "group_size": 128
1253
+ },
1254
+ "model.layers.38.mlp.shared_experts.up_proj": {
1255
+ "bits": 4,
1256
+ "group_size": 128
1257
+ },
1258
+ "model.layers.38.mlp.shared_experts.down_proj": {
1259
+ "bits": 4,
1260
+ "group_size": 128
1261
+ },
1262
+ "model.layers.39.self_attn.q_a_proj": {
1263
+ "bits": 4,
1264
+ "group_size": 128
1265
+ },
1266
+ "model.layers.39.self_attn.q_b_proj": {
1267
+ "bits": 4,
1268
+ "group_size": 128
1269
+ },
1270
+ "model.layers.39.self_attn.kv_a_proj_with_mqa": {
1271
+ "bits": 4,
1272
+ "group_size": 128
1273
+ },
1274
+ "model.layers.39.self_attn.kv_b_proj": {
1275
+ "bits": 4,
1276
+ "group_size": 128
1277
+ },
1278
+ "model.layers.39.self_attn.o_proj": {
1279
+ "bits": 4,
1280
+ "group_size": 128
1281
+ },
1282
+ "model.layers.39.mlp.shared_experts.gate_proj": {
1283
+ "bits": 4,
1284
+ "group_size": 128
1285
+ },
1286
+ "model.layers.39.mlp.shared_experts.up_proj": {
1287
+ "bits": 4,
1288
+ "group_size": 128
1289
+ },
1290
+ "model.layers.39.mlp.shared_experts.down_proj": {
1291
+ "bits": 4,
1292
+ "group_size": 128
1293
+ },
1294
+ "model.layers.40.self_attn.q_a_proj": {
1295
+ "bits": 4,
1296
+ "group_size": 128
1297
+ },
1298
+ "model.layers.40.self_attn.q_b_proj": {
1299
+ "bits": 4,
1300
+ "group_size": 128
1301
+ },
1302
+ "model.layers.40.self_attn.kv_a_proj_with_mqa": {
1303
+ "bits": 4,
1304
+ "group_size": 128
1305
+ },
1306
+ "model.layers.40.self_attn.kv_b_proj": {
1307
+ "bits": 4,
1308
+ "group_size": 128
1309
+ },
1310
+ "model.layers.40.self_attn.o_proj": {
1311
+ "bits": 4,
1312
+ "group_size": 128
1313
+ },
1314
+ "model.layers.40.mlp.shared_experts.gate_proj": {
1315
+ "bits": 4,
1316
+ "group_size": 128
1317
+ },
1318
+ "model.layers.40.mlp.shared_experts.up_proj": {
1319
+ "bits": 4,
1320
+ "group_size": 128
1321
+ },
1322
+ "model.layers.40.mlp.shared_experts.down_proj": {
1323
+ "bits": 4,
1324
+ "group_size": 128
1325
+ },
1326
+ "model.layers.41.self_attn.q_a_proj": {
1327
+ "bits": 4,
1328
+ "group_size": 128
1329
+ },
1330
+ "model.layers.41.self_attn.q_b_proj": {
1331
+ "bits": 4,
1332
+ "group_size": 128
1333
+ },
1334
+ "model.layers.41.self_attn.kv_a_proj_with_mqa": {
1335
+ "bits": 4,
1336
+ "group_size": 128
1337
+ },
1338
+ "model.layers.41.self_attn.kv_b_proj": {
1339
+ "bits": 4,
1340
+ "group_size": 128
1341
+ },
1342
+ "model.layers.41.self_attn.o_proj": {
1343
+ "bits": 4,
1344
+ "group_size": 128
1345
+ },
1346
+ "model.layers.41.mlp.shared_experts.gate_proj": {
1347
+ "bits": 4,
1348
+ "group_size": 128
1349
+ },
1350
+ "model.layers.41.mlp.shared_experts.up_proj": {
1351
+ "bits": 4,
1352
+ "group_size": 128
1353
+ },
1354
+ "model.layers.41.mlp.shared_experts.down_proj": {
1355
+ "bits": 4,
1356
+ "group_size": 128
1357
+ },
1358
+ "model.layers.42.self_attn.q_a_proj": {
1359
+ "bits": 4,
1360
+ "group_size": 128
1361
+ },
1362
+ "model.layers.42.self_attn.q_b_proj": {
1363
+ "bits": 4,
1364
+ "group_size": 128
1365
+ },
1366
+ "model.layers.42.self_attn.kv_a_proj_with_mqa": {
1367
+ "bits": 4,
1368
+ "group_size": 128
1369
+ },
1370
+ "model.layers.42.self_attn.kv_b_proj": {
1371
+ "bits": 4,
1372
+ "group_size": 128
1373
+ },
1374
+ "model.layers.42.self_attn.o_proj": {
1375
+ "bits": 4,
1376
+ "group_size": 128
1377
+ },
1378
+ "model.layers.42.mlp.shared_experts.gate_proj": {
1379
+ "bits": 4,
1380
+ "group_size": 128
1381
+ },
1382
+ "model.layers.42.mlp.shared_experts.up_proj": {
1383
+ "bits": 4,
1384
+ "group_size": 128
1385
+ },
1386
+ "model.layers.42.mlp.shared_experts.down_proj": {
1387
+ "bits": 4,
1388
+ "group_size": 128
1389
+ },
1390
+ "model.layers.43.self_attn.q_a_proj": {
1391
+ "bits": 4,
1392
+ "group_size": 128
1393
+ },
1394
+ "model.layers.43.self_attn.q_b_proj": {
1395
+ "bits": 4,
1396
+ "group_size": 128
1397
+ },
1398
+ "model.layers.43.self_attn.kv_a_proj_with_mqa": {
1399
+ "bits": 4,
1400
+ "group_size": 128
1401
+ },
1402
+ "model.layers.43.self_attn.kv_b_proj": {
1403
+ "bits": 4,
1404
+ "group_size": 128
1405
+ },
1406
+ "model.layers.43.self_attn.o_proj": {
1407
+ "bits": 4,
1408
+ "group_size": 128
1409
+ },
1410
+ "model.layers.43.mlp.shared_experts.gate_proj": {
1411
+ "bits": 4,
1412
+ "group_size": 128
1413
+ },
1414
+ "model.layers.43.mlp.shared_experts.up_proj": {
1415
+ "bits": 4,
1416
+ "group_size": 128
1417
+ },
1418
+ "model.layers.43.mlp.shared_experts.down_proj": {
1419
+ "bits": 4,
1420
+ "group_size": 128
1421
+ },
1422
+ "model.layers.44.self_attn.q_a_proj": {
1423
+ "bits": 4,
1424
+ "group_size": 128
1425
+ },
1426
+ "model.layers.44.self_attn.q_b_proj": {
1427
+ "bits": 4,
1428
+ "group_size": 128
1429
+ },
1430
+ "model.layers.44.self_attn.kv_a_proj_with_mqa": {
1431
+ "bits": 4,
1432
+ "group_size": 128
1433
+ },
1434
+ "model.layers.44.self_attn.kv_b_proj": {
1435
+ "bits": 4,
1436
+ "group_size": 128
1437
+ },
1438
+ "model.layers.44.self_attn.o_proj": {
1439
+ "bits": 4,
1440
+ "group_size": 128
1441
+ },
1442
+ "model.layers.44.mlp.shared_experts.gate_proj": {
1443
+ "bits": 4,
1444
+ "group_size": 128
1445
+ },
1446
+ "model.layers.44.mlp.shared_experts.up_proj": {
1447
+ "bits": 4,
1448
+ "group_size": 128
1449
+ },
1450
+ "model.layers.44.mlp.shared_experts.down_proj": {
1451
+ "bits": 4,
1452
+ "group_size": 128
1453
+ },
1454
+ "model.layers.45.self_attn.q_a_proj": {
1455
+ "bits": 4,
1456
+ "group_size": 128
1457
+ },
1458
+ "model.layers.45.self_attn.q_b_proj": {
1459
+ "bits": 4,
1460
+ "group_size": 128
1461
+ },
1462
+ "model.layers.45.self_attn.kv_a_proj_with_mqa": {
1463
+ "bits": 4,
1464
+ "group_size": 128
1465
+ },
1466
+ "model.layers.45.self_attn.kv_b_proj": {
1467
+ "bits": 4,
1468
+ "group_size": 128
1469
+ },
1470
+ "model.layers.45.self_attn.o_proj": {
1471
+ "bits": 4,
1472
+ "group_size": 128
1473
+ },
1474
+ "model.layers.45.mlp.shared_experts.gate_proj": {
1475
+ "bits": 4,
1476
+ "group_size": 128
1477
+ },
1478
+ "model.layers.45.mlp.shared_experts.up_proj": {
1479
+ "bits": 4,
1480
+ "group_size": 128
1481
+ },
1482
+ "model.layers.45.mlp.shared_experts.down_proj": {
1483
+ "bits": 4,
1484
+ "group_size": 128
1485
+ },
1486
+ "model.layers.46.self_attn.q_a_proj": {
1487
+ "bits": 4,
1488
+ "group_size": 128
1489
+ },
1490
+ "model.layers.46.self_attn.q_b_proj": {
1491
+ "bits": 4,
1492
+ "group_size": 128
1493
+ },
1494
+ "model.layers.46.self_attn.kv_a_proj_with_mqa": {
1495
+ "bits": 4,
1496
+ "group_size": 128
1497
+ },
1498
+ "model.layers.46.self_attn.kv_b_proj": {
1499
+ "bits": 4,
1500
+ "group_size": 128
1501
+ },
1502
+ "model.layers.46.self_attn.o_proj": {
1503
+ "bits": 4,
1504
+ "group_size": 128
1505
+ },
1506
+ "model.layers.46.mlp.shared_experts.gate_proj": {
1507
+ "bits": 4,
1508
+ "group_size": 128
1509
+ },
1510
+ "model.layers.46.mlp.shared_experts.up_proj": {
1511
+ "bits": 4,
1512
+ "group_size": 128
1513
+ },
1514
+ "model.layers.46.mlp.shared_experts.down_proj": {
1515
+ "bits": 4,
1516
+ "group_size": 128
1517
+ },
1518
+ "model.layers.47.self_attn.q_a_proj": {
1519
+ "bits": 4,
1520
+ "group_size": 128
1521
+ },
1522
+ "model.layers.47.self_attn.q_b_proj": {
1523
+ "bits": 4,
1524
+ "group_size": 128
1525
+ },
1526
+ "model.layers.47.self_attn.kv_a_proj_with_mqa": {
1527
+ "bits": 4,
1528
+ "group_size": 128
1529
+ },
1530
+ "model.layers.47.self_attn.kv_b_proj": {
1531
+ "bits": 4,
1532
+ "group_size": 128
1533
+ },
1534
+ "model.layers.47.self_attn.o_proj": {
1535
+ "bits": 4,
1536
+ "group_size": 128
1537
+ },
1538
+ "model.layers.47.mlp.shared_experts.gate_proj": {
1539
+ "bits": 4,
1540
+ "group_size": 128
1541
+ },
1542
+ "model.layers.47.mlp.shared_experts.up_proj": {
1543
+ "bits": 4,
1544
+ "group_size": 128
1545
+ },
1546
+ "model.layers.47.mlp.shared_experts.down_proj": {
1547
+ "bits": 4,
1548
+ "group_size": 128
1549
+ },
1550
+ "model.layers.48.self_attn.q_a_proj": {
1551
+ "bits": 4,
1552
+ "group_size": 128
1553
+ },
1554
+ "model.layers.48.self_attn.q_b_proj": {
1555
+ "bits": 4,
1556
+ "group_size": 128
1557
+ },
1558
+ "model.layers.48.self_attn.kv_a_proj_with_mqa": {
1559
+ "bits": 4,
1560
+ "group_size": 128
1561
+ },
1562
+ "model.layers.48.self_attn.kv_b_proj": {
1563
+ "bits": 4,
1564
+ "group_size": 128
1565
+ },
1566
+ "model.layers.48.self_attn.o_proj": {
1567
+ "bits": 4,
1568
+ "group_size": 128
1569
+ },
1570
+ "model.layers.48.mlp.shared_experts.gate_proj": {
1571
+ "bits": 4,
1572
+ "group_size": 128
1573
+ },
1574
+ "model.layers.48.mlp.shared_experts.up_proj": {
1575
+ "bits": 4,
1576
+ "group_size": 128
1577
+ },
1578
+ "model.layers.48.mlp.shared_experts.down_proj": {
1579
+ "bits": 4,
1580
+ "group_size": 128
1581
+ },
1582
+ "model.layers.49.self_attn.q_a_proj": {
1583
+ "bits": 4,
1584
+ "group_size": 128
1585
+ },
1586
+ "model.layers.49.self_attn.q_b_proj": {
1587
+ "bits": 4,
1588
+ "group_size": 128
1589
+ },
1590
+ "model.layers.49.self_attn.kv_a_proj_with_mqa": {
1591
+ "bits": 4,
1592
+ "group_size": 128
1593
+ },
1594
+ "model.layers.49.self_attn.kv_b_proj": {
1595
+ "bits": 4,
1596
+ "group_size": 128
1597
+ },
1598
+ "model.layers.49.self_attn.o_proj": {
1599
+ "bits": 4,
1600
+ "group_size": 128
1601
+ },
1602
+ "model.layers.49.mlp.shared_experts.gate_proj": {
1603
+ "bits": 4,
1604
+ "group_size": 128
1605
+ },
1606
+ "model.layers.49.mlp.shared_experts.up_proj": {
1607
+ "bits": 4,
1608
+ "group_size": 128
1609
+ },
1610
+ "model.layers.49.mlp.shared_experts.down_proj": {
1611
+ "bits": 4,
1612
+ "group_size": 128
1613
+ },
1614
+ "model.layers.50.self_attn.q_a_proj": {
1615
+ "bits": 4,
1616
+ "group_size": 128
1617
+ },
1618
+ "model.layers.50.self_attn.q_b_proj": {
1619
+ "bits": 4,
1620
+ "group_size": 128
1621
+ },
1622
+ "model.layers.50.self_attn.kv_a_proj_with_mqa": {
1623
+ "bits": 4,
1624
+ "group_size": 128
1625
+ },
1626
+ "model.layers.50.self_attn.kv_b_proj": {
1627
+ "bits": 4,
1628
+ "group_size": 128
1629
+ },
1630
+ "model.layers.50.self_attn.o_proj": {
1631
+ "bits": 4,
1632
+ "group_size": 128
1633
+ },
1634
+ "model.layers.50.mlp.shared_experts.gate_proj": {
1635
+ "bits": 4,
1636
+ "group_size": 128
1637
+ },
1638
+ "model.layers.50.mlp.shared_experts.up_proj": {
1639
+ "bits": 4,
1640
+ "group_size": 128
1641
+ },
1642
+ "model.layers.50.mlp.shared_experts.down_proj": {
1643
+ "bits": 4,
1644
+ "group_size": 128
1645
+ },
1646
+ "model.layers.51.self_attn.q_a_proj": {
1647
+ "bits": 4,
1648
+ "group_size": 128
1649
+ },
1650
+ "model.layers.51.self_attn.q_b_proj": {
1651
+ "bits": 4,
1652
+ "group_size": 128
1653
+ },
1654
+ "model.layers.51.self_attn.kv_a_proj_with_mqa": {
1655
+ "bits": 4,
1656
+ "group_size": 128
1657
+ },
1658
+ "model.layers.51.self_attn.kv_b_proj": {
1659
+ "bits": 4,
1660
+ "group_size": 128
1661
+ },
1662
+ "model.layers.51.self_attn.o_proj": {
1663
+ "bits": 4,
1664
+ "group_size": 128
1665
+ },
1666
+ "model.layers.51.mlp.shared_experts.gate_proj": {
1667
+ "bits": 4,
1668
+ "group_size": 128
1669
+ },
1670
+ "model.layers.51.mlp.shared_experts.up_proj": {
1671
+ "bits": 4,
1672
+ "group_size": 128
1673
+ },
1674
+ "model.layers.51.mlp.shared_experts.down_proj": {
1675
+ "bits": 4,
1676
+ "group_size": 128
1677
+ },
1678
+ "model.layers.52.self_attn.q_a_proj": {
1679
+ "bits": 4,
1680
+ "group_size": 128
1681
+ },
1682
+ "model.layers.52.self_attn.q_b_proj": {
1683
+ "bits": 4,
1684
+ "group_size": 128
1685
+ },
1686
+ "model.layers.52.self_attn.kv_a_proj_with_mqa": {
1687
+ "bits": 4,
1688
+ "group_size": 128
1689
+ },
1690
+ "model.layers.52.self_attn.kv_b_proj": {
1691
+ "bits": 4,
1692
+ "group_size": 128
1693
+ },
1694
+ "model.layers.52.self_attn.o_proj": {
1695
+ "bits": 4,
1696
+ "group_size": 128
1697
+ },
1698
+ "model.layers.52.mlp.shared_experts.gate_proj": {
1699
+ "bits": 4,
1700
+ "group_size": 128
1701
+ },
1702
+ "model.layers.52.mlp.shared_experts.up_proj": {
1703
+ "bits": 4,
1704
+ "group_size": 128
1705
+ },
1706
+ "model.layers.52.mlp.shared_experts.down_proj": {
1707
+ "bits": 4,
1708
+ "group_size": 128
1709
+ },
1710
+ "model.layers.53.self_attn.q_a_proj": {
1711
+ "bits": 4,
1712
+ "group_size": 128
1713
+ },
1714
+ "model.layers.53.self_attn.q_b_proj": {
1715
+ "bits": 4,
1716
+ "group_size": 128
1717
+ },
1718
+ "model.layers.53.self_attn.kv_a_proj_with_mqa": {
1719
+ "bits": 4,
1720
+ "group_size": 128
1721
+ },
1722
+ "model.layers.53.self_attn.kv_b_proj": {
1723
+ "bits": 4,
1724
+ "group_size": 128
1725
+ },
1726
+ "model.layers.53.self_attn.o_proj": {
1727
+ "bits": 4,
1728
+ "group_size": 128
1729
+ },
1730
+ "model.layers.53.mlp.shared_experts.gate_proj": {
1731
+ "bits": 4,
1732
+ "group_size": 128
1733
+ },
1734
+ "model.layers.53.mlp.shared_experts.up_proj": {
1735
+ "bits": 4,
1736
+ "group_size": 128
1737
+ },
1738
+ "model.layers.53.mlp.shared_experts.down_proj": {
1739
+ "bits": 4,
1740
+ "group_size": 128
1741
+ },
1742
+ "model.layers.54.self_attn.q_a_proj": {
1743
+ "bits": 4,
1744
+ "group_size": 128
1745
+ },
1746
+ "model.layers.54.self_attn.q_b_proj": {
1747
+ "bits": 4,
1748
+ "group_size": 128
1749
+ },
1750
+ "model.layers.54.self_attn.kv_a_proj_with_mqa": {
1751
+ "bits": 4,
1752
+ "group_size": 128
1753
+ },
1754
+ "model.layers.54.self_attn.kv_b_proj": {
1755
+ "bits": 4,
1756
+ "group_size": 128
1757
+ },
1758
+ "model.layers.54.self_attn.o_proj": {
1759
+ "bits": 4,
1760
+ "group_size": 128
1761
+ },
1762
+ "model.layers.54.mlp.shared_experts.gate_proj": {
1763
+ "bits": 4,
1764
+ "group_size": 128
1765
+ },
1766
+ "model.layers.54.mlp.shared_experts.up_proj": {
1767
+ "bits": 4,
1768
+ "group_size": 128
1769
+ },
1770
+ "model.layers.54.mlp.shared_experts.down_proj": {
1771
+ "bits": 4,
1772
+ "group_size": 128
1773
+ },
1774
+ "model.layers.55.self_attn.q_a_proj": {
1775
+ "bits": 4,
1776
+ "group_size": 128
1777
+ },
1778
+ "model.layers.55.self_attn.q_b_proj": {
1779
+ "bits": 4,
1780
+ "group_size": 128
1781
+ },
1782
+ "model.layers.55.self_attn.kv_a_proj_with_mqa": {
1783
+ "bits": 4,
1784
+ "group_size": 128
1785
+ },
1786
+ "model.layers.55.self_attn.kv_b_proj": {
1787
+ "bits": 4,
1788
+ "group_size": 128
1789
+ },
1790
+ "model.layers.55.self_attn.o_proj": {
1791
+ "bits": 4,
1792
+ "group_size": 128
1793
+ },
1794
+ "model.layers.55.mlp.shared_experts.gate_proj": {
1795
+ "bits": 4,
1796
+ "group_size": 128
1797
+ },
1798
+ "model.layers.55.mlp.shared_experts.up_proj": {
1799
+ "bits": 4,
1800
+ "group_size": 128
1801
+ },
1802
+ "model.layers.55.mlp.shared_experts.down_proj": {
1803
+ "bits": 4,
1804
+ "group_size": 128
1805
+ },
1806
+ "model.layers.56.self_attn.q_a_proj": {
1807
+ "bits": 4,
1808
+ "group_size": 128
1809
+ },
1810
+ "model.layers.56.self_attn.q_b_proj": {
1811
+ "bits": 4,
1812
+ "group_size": 128
1813
+ },
1814
+ "model.layers.56.self_attn.kv_a_proj_with_mqa": {
1815
+ "bits": 4,
1816
+ "group_size": 128
1817
+ },
1818
+ "model.layers.56.self_attn.kv_b_proj": {
1819
+ "bits": 4,
1820
+ "group_size": 128
1821
+ },
1822
+ "model.layers.56.self_attn.o_proj": {
1823
+ "bits": 4,
1824
+ "group_size": 128
1825
+ },
1826
+ "model.layers.56.mlp.shared_experts.gate_proj": {
1827
+ "bits": 4,
1828
+ "group_size": 128
1829
+ },
1830
+ "model.layers.56.mlp.shared_experts.up_proj": {
1831
+ "bits": 4,
1832
+ "group_size": 128
1833
+ },
1834
+ "model.layers.56.mlp.shared_experts.down_proj": {
1835
+ "bits": 4,
1836
+ "group_size": 128
1837
+ },
1838
+ "model.layers.57.self_attn.q_a_proj": {
1839
+ "bits": 4,
1840
+ "group_size": 128
1841
+ },
1842
+ "model.layers.57.self_attn.q_b_proj": {
1843
+ "bits": 4,
1844
+ "group_size": 128
1845
+ },
1846
+ "model.layers.57.self_attn.kv_a_proj_with_mqa": {
1847
+ "bits": 4,
1848
+ "group_size": 128
1849
+ },
1850
+ "model.layers.57.self_attn.kv_b_proj": {
1851
+ "bits": 4,
1852
+ "group_size": 128
1853
+ },
1854
+ "model.layers.57.self_attn.o_proj": {
1855
+ "bits": 4,
1856
+ "group_size": 128
1857
+ },
1858
+ "model.layers.57.mlp.shared_experts.gate_proj": {
1859
+ "bits": 4,
1860
+ "group_size": 128
1861
+ },
1862
+ "model.layers.57.mlp.shared_experts.up_proj": {
1863
+ "bits": 4,
1864
+ "group_size": 128
1865
+ },
1866
+ "model.layers.57.mlp.shared_experts.down_proj": {
1867
+ "bits": 4,
1868
+ "group_size": 128
1869
+ },
1870
+ "model.layers.58.self_attn.q_a_proj": {
1871
+ "bits": 4,
1872
+ "group_size": 128
1873
+ },
1874
+ "model.layers.58.self_attn.q_b_proj": {
1875
+ "bits": 4,
1876
+ "group_size": 128
1877
+ },
1878
+ "model.layers.58.self_attn.kv_a_proj_with_mqa": {
1879
+ "bits": 4,
1880
+ "group_size": 128
1881
+ },
1882
+ "model.layers.58.self_attn.kv_b_proj": {
1883
+ "bits": 4,
1884
+ "group_size": 128
1885
+ },
1886
+ "model.layers.58.self_attn.o_proj": {
1887
+ "bits": 4,
1888
+ "group_size": 128
1889
+ },
1890
+ "model.layers.58.mlp.shared_experts.gate_proj": {
1891
+ "bits": 4,
1892
+ "group_size": 128
1893
+ },
1894
+ "model.layers.58.mlp.shared_experts.up_proj": {
1895
+ "bits": 4,
1896
+ "group_size": 128
1897
+ },
1898
+ "model.layers.58.mlp.shared_experts.down_proj": {
1899
+ "bits": 4,
1900
+ "group_size": 128
1901
+ },
1902
+ "model.layers.59.self_attn.q_a_proj": {
1903
+ "bits": 4,
1904
+ "group_size": 128
1905
+ },
1906
+ "model.layers.59.self_attn.q_b_proj": {
1907
+ "bits": 4,
1908
+ "group_size": 128
1909
+ },
1910
+ "model.layers.59.self_attn.kv_a_proj_with_mqa": {
1911
+ "bits": 4,
1912
+ "group_size": 128
1913
+ },
1914
+ "model.layers.59.self_attn.kv_b_proj": {
1915
+ "bits": 4,
1916
+ "group_size": 128
1917
+ },
1918
+ "model.layers.59.self_attn.o_proj": {
1919
+ "bits": 4,
1920
+ "group_size": 128
1921
+ },
1922
+ "model.layers.59.mlp.shared_experts.gate_proj": {
1923
+ "bits": 4,
1924
+ "group_size": 128
1925
+ },
1926
+ "model.layers.59.mlp.shared_experts.up_proj": {
1927
+ "bits": 4,
1928
+ "group_size": 128
1929
+ },
1930
+ "model.layers.59.mlp.shared_experts.down_proj": {
1931
+ "bits": 4,
1932
+ "group_size": 128
1933
+ },
1934
+ "model.layers.60.self_attn.q_a_proj": {
1935
+ "bits": 4,
1936
+ "group_size": 128
1937
+ },
1938
+ "model.layers.60.self_attn.q_b_proj": {
1939
+ "bits": 4,
1940
+ "group_size": 128
1941
+ },
1942
+ "model.layers.60.self_attn.kv_a_proj_with_mqa": {
1943
+ "bits": 4,
1944
+ "group_size": 128
1945
+ },
1946
+ "model.layers.60.self_attn.kv_b_proj": {
1947
+ "bits": 4,
1948
+ "group_size": 128
1949
+ },
1950
+ "model.layers.60.self_attn.o_proj": {
1951
+ "bits": 4,
1952
+ "group_size": 128
1953
+ },
1954
+ "model.layers.60.mlp.shared_experts.gate_proj": {
1955
+ "bits": 4,
1956
+ "group_size": 128
1957
+ },
1958
+ "model.layers.60.mlp.shared_experts.up_proj": {
1959
+ "bits": 4,
1960
+ "group_size": 128
1961
+ },
1962
+ "model.layers.60.mlp.shared_experts.down_proj": {
1963
+ "bits": 4,
1964
+ "group_size": 128
1965
+ }
1966
+ }
1967
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin▁of▁sentence|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|end▁of▁sentence|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|end▁of▁sentence|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff