quantize mlp.up_proj, mlp.down_proj, shared_experts

#2
by jiaxwang - opened
This view is limited to 50 files because it contains too many changes. See the raw diff here.
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. chat_template.jinja +1 -5
  3. config.json +532 -8
  4. docs/deploy_guidance.md +82 -0
  5. figures/demo_video.mp4 +3 -0
  6. figures/kimi-logo.png +0 -0
  7. model-00001-of-000064.safetensors +2 -2
  8. model-00002-of-000064.safetensors +2 -2
  9. model-00003-of-000064.safetensors +2 -2
  10. model-00004-of-000064.safetensors +2 -2
  11. model-00005-of-000064.safetensors +2 -2
  12. model-00006-of-000064.safetensors +2 -2
  13. model-00007-of-000064.safetensors +2 -2
  14. model-00008-of-000064.safetensors +2 -2
  15. model-00009-of-000064.safetensors +2 -2
  16. model-00010-of-000064.safetensors +2 -2
  17. model-00011-of-000064.safetensors +2 -2
  18. model-00012-of-000064.safetensors +2 -2
  19. model-00013-of-000064.safetensors +2 -2
  20. model-00014-of-000064.safetensors +2 -2
  21. model-00015-of-000064.safetensors +2 -2
  22. model-00016-of-000064.safetensors +2 -2
  23. model-00017-of-000064.safetensors +2 -2
  24. model-00018-of-000064.safetensors +2 -2
  25. model-00019-of-000064.safetensors +2 -2
  26. model-00020-of-000064.safetensors +2 -2
  27. model-00021-of-000064.safetensors +2 -2
  28. model-00022-of-000064.safetensors +2 -2
  29. model-00023-of-000064.safetensors +2 -2
  30. model-00024-of-000064.safetensors +2 -2
  31. model-00025-of-000064.safetensors +2 -2
  32. model-00026-of-000064.safetensors +2 -2
  33. model-00027-of-000064.safetensors +2 -2
  34. model-00028-of-000064.safetensors +2 -2
  35. model-00029-of-000064.safetensors +2 -2
  36. model-00030-of-000064.safetensors +2 -2
  37. model-00031-of-000064.safetensors +2 -2
  38. model-00032-of-000064.safetensors +2 -2
  39. model-00033-of-000064.safetensors +2 -2
  40. model-00034-of-000064.safetensors +2 -2
  41. model-00035-of-000064.safetensors +2 -2
  42. model-00036-of-000064.safetensors +2 -2
  43. model-00037-of-000064.safetensors +2 -2
  44. model-00038-of-000064.safetensors +2 -2
  45. model-00039-of-000064.safetensors +2 -2
  46. model-00040-of-000064.safetensors +2 -2
  47. model-00041-of-000064.safetensors +2 -2
  48. model-00042-of-000064.safetensors +2 -2
  49. model-00043-of-000064.safetensors +2 -2
  50. model-00044-of-000064.safetensors +2 -2
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  model.safetensors.index.json filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  model.safetensors.index.json filter=lfs diff=lfs merge=lfs -text
37
+ figures/demo_video.mp4 filter=lfs diff=lfs merge=lfs -text
chat_template.jinja CHANGED
@@ -5,7 +5,7 @@
5
  {%- elif c is not none -%}
6
  {% for content in c -%}
7
  {% if content['type'] == 'image' or content['type'] == 'image_url' -%}
8
- <|media_start|>image<|media_content|><|media_pad|><|media_end|>
9
  {% elif content['type'] == 'video' or content['type']== 'video_url'-%}
10
  <|kimi_k25_video_placeholder|>
11
  {% else -%}
@@ -57,10 +57,6 @@
57
  <|im_system|>tool_declare<|im_middle|>{{ tools | tojson(separators=(',', ':')) }}<|im_end|>
58
  {%- endif -%}
59
  {%- endif -%}
60
-
61
- {%- if messages|length == 0 or messages[0]['role'] != 'system' -%}
62
- <|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|>
63
- {%- endif -%}
64
 
65
  {%- for message in hist_msgs -%}
66
  {{set_roles(message)}}
 
5
  {%- elif c is not none -%}
6
  {% for content in c -%}
7
  {% if content['type'] == 'image' or content['type'] == 'image_url' -%}
8
+ <|media_begin|>image<|media_content|><|media_pad|><|media_end|>
9
  {% elif content['type'] == 'video' or content['type']== 'video_url'-%}
10
  <|kimi_k25_video_placeholder|>
11
  {% else -%}
 
57
  <|im_system|>tool_declare<|im_middle|>{{ tools | tojson(separators=(',', ':')) }}<|im_end|>
58
  {%- endif -%}
59
  {%- endif -%}
 
 
 
 
60
 
61
  {%- for message in hist_msgs -%}
62
  {{set_roles(message)}}
config.json CHANGED
@@ -193,12 +193,536 @@
193
  "target_device": null
194
  },
195
  "exclude": [
196
- "lm_head",
197
- "re:.*self_attn.*",
198
- "re:.*shared_experts.*",
199
- "re:.*mlp\\.(gate|up|gate_up|down)_proj.*",
200
- "re:mm_projector.*",
201
- "re:vision_tower.*"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  ],
203
  "algo_config": null,
204
  "softmax_quant_spec": null,
@@ -208,7 +732,7 @@
208
  "kv_cache_quant_config": {},
209
  "kv_cache_post_rope": false,
210
  "quant_mode": "eager_mode",
211
- "version": "0.11+4a34634b4a",
212
  "export": {
213
  "kv_cache_group": [],
214
  "min_kv_scale": 0.0,
@@ -217,4 +741,4 @@
217
  "weight_merge_groups": null
218
  }
219
  }
220
- }
 
193
  "target_device": null
194
  },
195
  "exclude": [
196
+ "language_model.lm_head",
197
+ "language_model.model.layers.0.self_attn.kv_a_proj_with_mqa",
198
+ "language_model.model.layers.0.self_attn.kv_b_proj",
199
+ "language_model.model.layers.0.self_attn.o_proj",
200
+ "language_model.model.layers.0.self_attn.q_a_proj",
201
+ "language_model.model.layers.0.self_attn.q_b_proj",
202
+ "language_model.model.layers.1.mlp.gate",
203
+ "language_model.model.layers.1.self_attn.kv_a_proj_with_mqa",
204
+ "language_model.model.layers.1.self_attn.kv_b_proj",
205
+ "language_model.model.layers.1.self_attn.o_proj",
206
+ "language_model.model.layers.1.self_attn.q_a_proj",
207
+ "language_model.model.layers.1.self_attn.q_b_proj",
208
+ "language_model.model.layers.10.mlp.gate",
209
+ "language_model.model.layers.10.self_attn.kv_a_proj_with_mqa",
210
+ "language_model.model.layers.10.self_attn.kv_b_proj",
211
+ "language_model.model.layers.10.self_attn.o_proj",
212
+ "language_model.model.layers.10.self_attn.q_a_proj",
213
+ "language_model.model.layers.10.self_attn.q_b_proj",
214
+ "language_model.model.layers.11.mlp.gate",
215
+ "language_model.model.layers.11.self_attn.kv_a_proj_with_mqa",
216
+ "language_model.model.layers.11.self_attn.kv_b_proj",
217
+ "language_model.model.layers.11.self_attn.o_proj",
218
+ "language_model.model.layers.11.self_attn.q_a_proj",
219
+ "language_model.model.layers.11.self_attn.q_b_proj",
220
+ "language_model.model.layers.12.mlp.gate",
221
+ "language_model.model.layers.12.self_attn.kv_a_proj_with_mqa",
222
+ "language_model.model.layers.12.self_attn.kv_b_proj",
223
+ "language_model.model.layers.12.self_attn.o_proj",
224
+ "language_model.model.layers.12.self_attn.q_a_proj",
225
+ "language_model.model.layers.12.self_attn.q_b_proj",
226
+ "language_model.model.layers.13.mlp.gate",
227
+ "language_model.model.layers.13.self_attn.kv_a_proj_with_mqa",
228
+ "language_model.model.layers.13.self_attn.kv_b_proj",
229
+ "language_model.model.layers.13.self_attn.o_proj",
230
+ "language_model.model.layers.13.self_attn.q_a_proj",
231
+ "language_model.model.layers.13.self_attn.q_b_proj",
232
+ "language_model.model.layers.14.mlp.gate",
233
+ "language_model.model.layers.14.self_attn.kv_a_proj_with_mqa",
234
+ "language_model.model.layers.14.self_attn.kv_b_proj",
235
+ "language_model.model.layers.14.self_attn.o_proj",
236
+ "language_model.model.layers.14.self_attn.q_a_proj",
237
+ "language_model.model.layers.14.self_attn.q_b_proj",
238
+ "language_model.model.layers.15.mlp.gate",
239
+ "language_model.model.layers.15.self_attn.kv_a_proj_with_mqa",
240
+ "language_model.model.layers.15.self_attn.kv_b_proj",
241
+ "language_model.model.layers.15.self_attn.o_proj",
242
+ "language_model.model.layers.15.self_attn.q_a_proj",
243
+ "language_model.model.layers.15.self_attn.q_b_proj",
244
+ "language_model.model.layers.16.mlp.gate",
245
+ "language_model.model.layers.16.self_attn.kv_a_proj_with_mqa",
246
+ "language_model.model.layers.16.self_attn.kv_b_proj",
247
+ "language_model.model.layers.16.self_attn.o_proj",
248
+ "language_model.model.layers.16.self_attn.q_a_proj",
249
+ "language_model.model.layers.16.self_attn.q_b_proj",
250
+ "language_model.model.layers.17.mlp.gate",
251
+ "language_model.model.layers.17.self_attn.kv_a_proj_with_mqa",
252
+ "language_model.model.layers.17.self_attn.kv_b_proj",
253
+ "language_model.model.layers.17.self_attn.o_proj",
254
+ "language_model.model.layers.17.self_attn.q_a_proj",
255
+ "language_model.model.layers.17.self_attn.q_b_proj",
256
+ "language_model.model.layers.18.mlp.gate",
257
+ "language_model.model.layers.18.self_attn.kv_a_proj_with_mqa",
258
+ "language_model.model.layers.18.self_attn.kv_b_proj",
259
+ "language_model.model.layers.18.self_attn.o_proj",
260
+ "language_model.model.layers.18.self_attn.q_a_proj",
261
+ "language_model.model.layers.18.self_attn.q_b_proj",
262
+ "language_model.model.layers.19.mlp.gate",
263
+ "language_model.model.layers.19.self_attn.kv_a_proj_with_mqa",
264
+ "language_model.model.layers.19.self_attn.kv_b_proj",
265
+ "language_model.model.layers.19.self_attn.o_proj",
266
+ "language_model.model.layers.19.self_attn.q_a_proj",
267
+ "language_model.model.layers.19.self_attn.q_b_proj",
268
+ "language_model.model.layers.2.mlp.gate",
269
+ "language_model.model.layers.2.self_attn.kv_a_proj_with_mqa",
270
+ "language_model.model.layers.2.self_attn.kv_b_proj",
271
+ "language_model.model.layers.2.self_attn.o_proj",
272
+ "language_model.model.layers.2.self_attn.q_a_proj",
273
+ "language_model.model.layers.2.self_attn.q_b_proj",
274
+ "language_model.model.layers.20.mlp.gate",
275
+ "language_model.model.layers.20.self_attn.kv_a_proj_with_mqa",
276
+ "language_model.model.layers.20.self_attn.kv_b_proj",
277
+ "language_model.model.layers.20.self_attn.o_proj",
278
+ "language_model.model.layers.20.self_attn.q_a_proj",
279
+ "language_model.model.layers.20.self_attn.q_b_proj",
280
+ "language_model.model.layers.21.mlp.gate",
281
+ "language_model.model.layers.21.self_attn.kv_a_proj_with_mqa",
282
+ "language_model.model.layers.21.self_attn.kv_b_proj",
283
+ "language_model.model.layers.21.self_attn.o_proj",
284
+ "language_model.model.layers.21.self_attn.q_a_proj",
285
+ "language_model.model.layers.21.self_attn.q_b_proj",
286
+ "language_model.model.layers.22.mlp.gate",
287
+ "language_model.model.layers.22.self_attn.kv_a_proj_with_mqa",
288
+ "language_model.model.layers.22.self_attn.kv_b_proj",
289
+ "language_model.model.layers.22.self_attn.o_proj",
290
+ "language_model.model.layers.22.self_attn.q_a_proj",
291
+ "language_model.model.layers.22.self_attn.q_b_proj",
292
+ "language_model.model.layers.23.mlp.gate",
293
+ "language_model.model.layers.23.self_attn.kv_a_proj_with_mqa",
294
+ "language_model.model.layers.23.self_attn.kv_b_proj",
295
+ "language_model.model.layers.23.self_attn.o_proj",
296
+ "language_model.model.layers.23.self_attn.q_a_proj",
297
+ "language_model.model.layers.23.self_attn.q_b_proj",
298
+ "language_model.model.layers.24.mlp.gate",
299
+ "language_model.model.layers.24.self_attn.kv_a_proj_with_mqa",
300
+ "language_model.model.layers.24.self_attn.kv_b_proj",
301
+ "language_model.model.layers.24.self_attn.o_proj",
302
+ "language_model.model.layers.24.self_attn.q_a_proj",
303
+ "language_model.model.layers.24.self_attn.q_b_proj",
304
+ "language_model.model.layers.25.mlp.gate",
305
+ "language_model.model.layers.25.self_attn.kv_a_proj_with_mqa",
306
+ "language_model.model.layers.25.self_attn.kv_b_proj",
307
+ "language_model.model.layers.25.self_attn.o_proj",
308
+ "language_model.model.layers.25.self_attn.q_a_proj",
309
+ "language_model.model.layers.25.self_attn.q_b_proj",
310
+ "language_model.model.layers.26.mlp.gate",
311
+ "language_model.model.layers.26.self_attn.kv_a_proj_with_mqa",
312
+ "language_model.model.layers.26.self_attn.kv_b_proj",
313
+ "language_model.model.layers.26.self_attn.o_proj",
314
+ "language_model.model.layers.26.self_attn.q_a_proj",
315
+ "language_model.model.layers.26.self_attn.q_b_proj",
316
+ "language_model.model.layers.27.mlp.gate",
317
+ "language_model.model.layers.27.self_attn.kv_a_proj_with_mqa",
318
+ "language_model.model.layers.27.self_attn.kv_b_proj",
319
+ "language_model.model.layers.27.self_attn.o_proj",
320
+ "language_model.model.layers.27.self_attn.q_a_proj",
321
+ "language_model.model.layers.27.self_attn.q_b_proj",
322
+ "language_model.model.layers.28.mlp.gate",
323
+ "language_model.model.layers.28.self_attn.kv_a_proj_with_mqa",
324
+ "language_model.model.layers.28.self_attn.kv_b_proj",
325
+ "language_model.model.layers.28.self_attn.o_proj",
326
+ "language_model.model.layers.28.self_attn.q_a_proj",
327
+ "language_model.model.layers.28.self_attn.q_b_proj",
328
+ "language_model.model.layers.29.mlp.gate",
329
+ "language_model.model.layers.29.self_attn.kv_a_proj_with_mqa",
330
+ "language_model.model.layers.29.self_attn.kv_b_proj",
331
+ "language_model.model.layers.29.self_attn.o_proj",
332
+ "language_model.model.layers.29.self_attn.q_a_proj",
333
+ "language_model.model.layers.29.self_attn.q_b_proj",
334
+ "language_model.model.layers.3.mlp.gate",
335
+ "language_model.model.layers.3.self_attn.kv_a_proj_with_mqa",
336
+ "language_model.model.layers.3.self_attn.kv_b_proj",
337
+ "language_model.model.layers.3.self_attn.o_proj",
338
+ "language_model.model.layers.3.self_attn.q_a_proj",
339
+ "language_model.model.layers.3.self_attn.q_b_proj",
340
+ "language_model.model.layers.30.mlp.gate",
341
+ "language_model.model.layers.30.self_attn.kv_a_proj_with_mqa",
342
+ "language_model.model.layers.30.self_attn.kv_b_proj",
343
+ "language_model.model.layers.30.self_attn.o_proj",
344
+ "language_model.model.layers.30.self_attn.q_a_proj",
345
+ "language_model.model.layers.30.self_attn.q_b_proj",
346
+ "language_model.model.layers.31.mlp.gate",
347
+ "language_model.model.layers.31.self_attn.kv_a_proj_with_mqa",
348
+ "language_model.model.layers.31.self_attn.kv_b_proj",
349
+ "language_model.model.layers.31.self_attn.o_proj",
350
+ "language_model.model.layers.31.self_attn.q_a_proj",
351
+ "language_model.model.layers.31.self_attn.q_b_proj",
352
+ "language_model.model.layers.32.mlp.gate",
353
+ "language_model.model.layers.32.self_attn.kv_a_proj_with_mqa",
354
+ "language_model.model.layers.32.self_attn.kv_b_proj",
355
+ "language_model.model.layers.32.self_attn.o_proj",
356
+ "language_model.model.layers.32.self_attn.q_a_proj",
357
+ "language_model.model.layers.32.self_attn.q_b_proj",
358
+ "language_model.model.layers.33.mlp.gate",
359
+ "language_model.model.layers.33.self_attn.kv_a_proj_with_mqa",
360
+ "language_model.model.layers.33.self_attn.kv_b_proj",
361
+ "language_model.model.layers.33.self_attn.o_proj",
362
+ "language_model.model.layers.33.self_attn.q_a_proj",
363
+ "language_model.model.layers.33.self_attn.q_b_proj",
364
+ "language_model.model.layers.34.mlp.gate",
365
+ "language_model.model.layers.34.self_attn.kv_a_proj_with_mqa",
366
+ "language_model.model.layers.34.self_attn.kv_b_proj",
367
+ "language_model.model.layers.34.self_attn.o_proj",
368
+ "language_model.model.layers.34.self_attn.q_a_proj",
369
+ "language_model.model.layers.34.self_attn.q_b_proj",
370
+ "language_model.model.layers.35.mlp.gate",
371
+ "language_model.model.layers.35.self_attn.kv_a_proj_with_mqa",
372
+ "language_model.model.layers.35.self_attn.kv_b_proj",
373
+ "language_model.model.layers.35.self_attn.o_proj",
374
+ "language_model.model.layers.35.self_attn.q_a_proj",
375
+ "language_model.model.layers.35.self_attn.q_b_proj",
376
+ "language_model.model.layers.36.mlp.gate",
377
+ "language_model.model.layers.36.self_attn.kv_a_proj_with_mqa",
378
+ "language_model.model.layers.36.self_attn.kv_b_proj",
379
+ "language_model.model.layers.36.self_attn.o_proj",
380
+ "language_model.model.layers.36.self_attn.q_a_proj",
381
+ "language_model.model.layers.36.self_attn.q_b_proj",
382
+ "language_model.model.layers.37.mlp.gate",
383
+ "language_model.model.layers.37.self_attn.kv_a_proj_with_mqa",
384
+ "language_model.model.layers.37.self_attn.kv_b_proj",
385
+ "language_model.model.layers.37.self_attn.o_proj",
386
+ "language_model.model.layers.37.self_attn.q_a_proj",
387
+ "language_model.model.layers.37.self_attn.q_b_proj",
388
+ "language_model.model.layers.38.mlp.gate",
389
+ "language_model.model.layers.38.self_attn.kv_a_proj_with_mqa",
390
+ "language_model.model.layers.38.self_attn.kv_b_proj",
391
+ "language_model.model.layers.38.self_attn.o_proj",
392
+ "language_model.model.layers.38.self_attn.q_a_proj",
393
+ "language_model.model.layers.38.self_attn.q_b_proj",
394
+ "language_model.model.layers.39.mlp.gate",
395
+ "language_model.model.layers.39.self_attn.kv_a_proj_with_mqa",
396
+ "language_model.model.layers.39.self_attn.kv_b_proj",
397
+ "language_model.model.layers.39.self_attn.o_proj",
398
+ "language_model.model.layers.39.self_attn.q_a_proj",
399
+ "language_model.model.layers.39.self_attn.q_b_proj",
400
+ "language_model.model.layers.4.mlp.gate",
401
+ "language_model.model.layers.4.self_attn.kv_a_proj_with_mqa",
402
+ "language_model.model.layers.4.self_attn.kv_b_proj",
403
+ "language_model.model.layers.4.self_attn.o_proj",
404
+ "language_model.model.layers.4.self_attn.q_a_proj",
405
+ "language_model.model.layers.4.self_attn.q_b_proj",
406
+ "language_model.model.layers.40.mlp.gate",
407
+ "language_model.model.layers.40.self_attn.kv_a_proj_with_mqa",
408
+ "language_model.model.layers.40.self_attn.kv_b_proj",
409
+ "language_model.model.layers.40.self_attn.o_proj",
410
+ "language_model.model.layers.40.self_attn.q_a_proj",
411
+ "language_model.model.layers.40.self_attn.q_b_proj",
412
+ "language_model.model.layers.41.mlp.gate",
413
+ "language_model.model.layers.41.self_attn.kv_a_proj_with_mqa",
414
+ "language_model.model.layers.41.self_attn.kv_b_proj",
415
+ "language_model.model.layers.41.self_attn.o_proj",
416
+ "language_model.model.layers.41.self_attn.q_a_proj",
417
+ "language_model.model.layers.41.self_attn.q_b_proj",
418
+ "language_model.model.layers.42.mlp.gate",
419
+ "language_model.model.layers.42.self_attn.kv_a_proj_with_mqa",
420
+ "language_model.model.layers.42.self_attn.kv_b_proj",
421
+ "language_model.model.layers.42.self_attn.o_proj",
422
+ "language_model.model.layers.42.self_attn.q_a_proj",
423
+ "language_model.model.layers.42.self_attn.q_b_proj",
424
+ "language_model.model.layers.43.mlp.gate",
425
+ "language_model.model.layers.43.self_attn.kv_a_proj_with_mqa",
426
+ "language_model.model.layers.43.self_attn.kv_b_proj",
427
+ "language_model.model.layers.43.self_attn.o_proj",
428
+ "language_model.model.layers.43.self_attn.q_a_proj",
429
+ "language_model.model.layers.43.self_attn.q_b_proj",
430
+ "language_model.model.layers.44.mlp.gate",
431
+ "language_model.model.layers.44.self_attn.kv_a_proj_with_mqa",
432
+ "language_model.model.layers.44.self_attn.kv_b_proj",
433
+ "language_model.model.layers.44.self_attn.o_proj",
434
+ "language_model.model.layers.44.self_attn.q_a_proj",
435
+ "language_model.model.layers.44.self_attn.q_b_proj",
436
+ "language_model.model.layers.45.mlp.gate",
437
+ "language_model.model.layers.45.self_attn.kv_a_proj_with_mqa",
438
+ "language_model.model.layers.45.self_attn.kv_b_proj",
439
+ "language_model.model.layers.45.self_attn.o_proj",
440
+ "language_model.model.layers.45.self_attn.q_a_proj",
441
+ "language_model.model.layers.45.self_attn.q_b_proj",
442
+ "language_model.model.layers.46.mlp.gate",
443
+ "language_model.model.layers.46.self_attn.kv_a_proj_with_mqa",
444
+ "language_model.model.layers.46.self_attn.kv_b_proj",
445
+ "language_model.model.layers.46.self_attn.o_proj",
446
+ "language_model.model.layers.46.self_attn.q_a_proj",
447
+ "language_model.model.layers.46.self_attn.q_b_proj",
448
+ "language_model.model.layers.47.mlp.gate",
449
+ "language_model.model.layers.47.self_attn.kv_a_proj_with_mqa",
450
+ "language_model.model.layers.47.self_attn.kv_b_proj",
451
+ "language_model.model.layers.47.self_attn.o_proj",
452
+ "language_model.model.layers.47.self_attn.q_a_proj",
453
+ "language_model.model.layers.47.self_attn.q_b_proj",
454
+ "language_model.model.layers.48.mlp.gate",
455
+ "language_model.model.layers.48.self_attn.kv_a_proj_with_mqa",
456
+ "language_model.model.layers.48.self_attn.kv_b_proj",
457
+ "language_model.model.layers.48.self_attn.o_proj",
458
+ "language_model.model.layers.48.self_attn.q_a_proj",
459
+ "language_model.model.layers.48.self_attn.q_b_proj",
460
+ "language_model.model.layers.49.mlp.gate",
461
+ "language_model.model.layers.49.self_attn.kv_a_proj_with_mqa",
462
+ "language_model.model.layers.49.self_attn.kv_b_proj",
463
+ "language_model.model.layers.49.self_attn.o_proj",
464
+ "language_model.model.layers.49.self_attn.q_a_proj",
465
+ "language_model.model.layers.49.self_attn.q_b_proj",
466
+ "language_model.model.layers.5.mlp.gate",
467
+ "language_model.model.layers.5.self_attn.kv_a_proj_with_mqa",
468
+ "language_model.model.layers.5.self_attn.kv_b_proj",
469
+ "language_model.model.layers.5.self_attn.o_proj",
470
+ "language_model.model.layers.5.self_attn.q_a_proj",
471
+ "language_model.model.layers.5.self_attn.q_b_proj",
472
+ "language_model.model.layers.50.mlp.gate",
473
+ "language_model.model.layers.50.self_attn.kv_a_proj_with_mqa",
474
+ "language_model.model.layers.50.self_attn.kv_b_proj",
475
+ "language_model.model.layers.50.self_attn.o_proj",
476
+ "language_model.model.layers.50.self_attn.q_a_proj",
477
+ "language_model.model.layers.50.self_attn.q_b_proj",
478
+ "language_model.model.layers.51.mlp.gate",
479
+ "language_model.model.layers.51.self_attn.kv_a_proj_with_mqa",
480
+ "language_model.model.layers.51.self_attn.kv_b_proj",
481
+ "language_model.model.layers.51.self_attn.o_proj",
482
+ "language_model.model.layers.51.self_attn.q_a_proj",
483
+ "language_model.model.layers.51.self_attn.q_b_proj",
484
+ "language_model.model.layers.52.mlp.gate",
485
+ "language_model.model.layers.52.self_attn.kv_a_proj_with_mqa",
486
+ "language_model.model.layers.52.self_attn.kv_b_proj",
487
+ "language_model.model.layers.52.self_attn.o_proj",
488
+ "language_model.model.layers.52.self_attn.q_a_proj",
489
+ "language_model.model.layers.52.self_attn.q_b_proj",
490
+ "language_model.model.layers.53.mlp.gate",
491
+ "language_model.model.layers.53.self_attn.kv_a_proj_with_mqa",
492
+ "language_model.model.layers.53.self_attn.kv_b_proj",
493
+ "language_model.model.layers.53.self_attn.o_proj",
494
+ "language_model.model.layers.53.self_attn.q_a_proj",
495
+ "language_model.model.layers.53.self_attn.q_b_proj",
496
+ "language_model.model.layers.54.mlp.gate",
497
+ "language_model.model.layers.54.self_attn.kv_a_proj_with_mqa",
498
+ "language_model.model.layers.54.self_attn.kv_b_proj",
499
+ "language_model.model.layers.54.self_attn.o_proj",
500
+ "language_model.model.layers.54.self_attn.q_a_proj",
501
+ "language_model.model.layers.54.self_attn.q_b_proj",
502
+ "language_model.model.layers.55.mlp.gate",
503
+ "language_model.model.layers.55.self_attn.kv_a_proj_with_mqa",
504
+ "language_model.model.layers.55.self_attn.kv_b_proj",
505
+ "language_model.model.layers.55.self_attn.o_proj",
506
+ "language_model.model.layers.55.self_attn.q_a_proj",
507
+ "language_model.model.layers.55.self_attn.q_b_proj",
508
+ "language_model.model.layers.56.mlp.gate",
509
+ "language_model.model.layers.56.self_attn.kv_a_proj_with_mqa",
510
+ "language_model.model.layers.56.self_attn.kv_b_proj",
511
+ "language_model.model.layers.56.self_attn.o_proj",
512
+ "language_model.model.layers.56.self_attn.q_a_proj",
513
+ "language_model.model.layers.56.self_attn.q_b_proj",
514
+ "language_model.model.layers.57.mlp.gate",
515
+ "language_model.model.layers.57.self_attn.kv_a_proj_with_mqa",
516
+ "language_model.model.layers.57.self_attn.kv_b_proj",
517
+ "language_model.model.layers.57.self_attn.o_proj",
518
+ "language_model.model.layers.57.self_attn.q_a_proj",
519
+ "language_model.model.layers.57.self_attn.q_b_proj",
520
+ "language_model.model.layers.58.mlp.gate",
521
+ "language_model.model.layers.58.self_attn.kv_a_proj_with_mqa",
522
+ "language_model.model.layers.58.self_attn.kv_b_proj",
523
+ "language_model.model.layers.58.self_attn.o_proj",
524
+ "language_model.model.layers.58.self_attn.q_a_proj",
525
+ "language_model.model.layers.58.self_attn.q_b_proj",
526
+ "language_model.model.layers.59.mlp.gate",
527
+ "language_model.model.layers.59.self_attn.kv_a_proj_with_mqa",
528
+ "language_model.model.layers.59.self_attn.kv_b_proj",
529
+ "language_model.model.layers.59.self_attn.o_proj",
530
+ "language_model.model.layers.59.self_attn.q_a_proj",
531
+ "language_model.model.layers.59.self_attn.q_b_proj",
532
+ "language_model.model.layers.6.mlp.gate",
533
+ "language_model.model.layers.6.self_attn.kv_a_proj_with_mqa",
534
+ "language_model.model.layers.6.self_attn.kv_b_proj",
535
+ "language_model.model.layers.6.self_attn.o_proj",
536
+ "language_model.model.layers.6.self_attn.q_a_proj",
537
+ "language_model.model.layers.6.self_attn.q_b_proj",
538
+ "language_model.model.layers.60.mlp.gate",
539
+ "language_model.model.layers.60.self_attn.kv_a_proj_with_mqa",
540
+ "language_model.model.layers.60.self_attn.kv_b_proj",
541
+ "language_model.model.layers.60.self_attn.o_proj",
542
+ "language_model.model.layers.60.self_attn.q_a_proj",
543
+ "language_model.model.layers.60.self_attn.q_b_proj",
544
+ "language_model.model.layers.7.mlp.gate",
545
+ "language_model.model.layers.7.self_attn.kv_a_proj_with_mqa",
546
+ "language_model.model.layers.7.self_attn.kv_b_proj",
547
+ "language_model.model.layers.7.self_attn.o_proj",
548
+ "language_model.model.layers.7.self_attn.q_a_proj",
549
+ "language_model.model.layers.7.self_attn.q_b_proj",
550
+ "language_model.model.layers.8.mlp.gate",
551
+ "language_model.model.layers.8.self_attn.kv_a_proj_with_mqa",
552
+ "language_model.model.layers.8.self_attn.kv_b_proj",
553
+ "language_model.model.layers.8.self_attn.o_proj",
554
+ "language_model.model.layers.8.self_attn.q_a_proj",
555
+ "language_model.model.layers.8.self_attn.q_b_proj",
556
+ "language_model.model.layers.9.mlp.gate",
557
+ "language_model.model.layers.9.self_attn.kv_a_proj_with_mqa",
558
+ "language_model.model.layers.9.self_attn.kv_b_proj",
559
+ "language_model.model.layers.9.self_attn.o_proj",
560
+ "language_model.model.layers.9.self_attn.q_a_proj",
561
+ "language_model.model.layers.9.self_attn.q_b_proj",
562
+ "mm_projector.proj.0",
563
+ "mm_projector.proj.2",
564
+ "vision_tower.encoder.blocks.0.mlp.fc0",
565
+ "vision_tower.encoder.blocks.0.mlp.fc1",
566
+ "vision_tower.encoder.blocks.0.norm0",
567
+ "vision_tower.encoder.blocks.0.norm1",
568
+ "vision_tower.encoder.blocks.0.wo",
569
+ "vision_tower.encoder.blocks.0.wqkv",
570
+ "vision_tower.encoder.blocks.1.mlp.fc0",
571
+ "vision_tower.encoder.blocks.1.mlp.fc1",
572
+ "vision_tower.encoder.blocks.1.norm0",
573
+ "vision_tower.encoder.blocks.1.norm1",
574
+ "vision_tower.encoder.blocks.1.wo",
575
+ "vision_tower.encoder.blocks.1.wqkv",
576
+ "vision_tower.encoder.blocks.10.mlp.fc0",
577
+ "vision_tower.encoder.blocks.10.mlp.fc1",
578
+ "vision_tower.encoder.blocks.10.norm0",
579
+ "vision_tower.encoder.blocks.10.norm1",
580
+ "vision_tower.encoder.blocks.10.wo",
581
+ "vision_tower.encoder.blocks.10.wqkv",
582
+ "vision_tower.encoder.blocks.11.mlp.fc0",
583
+ "vision_tower.encoder.blocks.11.mlp.fc1",
584
+ "vision_tower.encoder.blocks.11.norm0",
585
+ "vision_tower.encoder.blocks.11.norm1",
586
+ "vision_tower.encoder.blocks.11.wo",
587
+ "vision_tower.encoder.blocks.11.wqkv",
588
+ "vision_tower.encoder.blocks.12.mlp.fc0",
589
+ "vision_tower.encoder.blocks.12.mlp.fc1",
590
+ "vision_tower.encoder.blocks.12.norm0",
591
+ "vision_tower.encoder.blocks.12.norm1",
592
+ "vision_tower.encoder.blocks.12.wo",
593
+ "vision_tower.encoder.blocks.12.wqkv",
594
+ "vision_tower.encoder.blocks.13.mlp.fc0",
595
+ "vision_tower.encoder.blocks.13.mlp.fc1",
596
+ "vision_tower.encoder.blocks.13.norm0",
597
+ "vision_tower.encoder.blocks.13.norm1",
598
+ "vision_tower.encoder.blocks.13.wo",
599
+ "vision_tower.encoder.blocks.13.wqkv",
600
+ "vision_tower.encoder.blocks.14.mlp.fc0",
601
+ "vision_tower.encoder.blocks.14.mlp.fc1",
602
+ "vision_tower.encoder.blocks.14.norm0",
603
+ "vision_tower.encoder.blocks.14.norm1",
604
+ "vision_tower.encoder.blocks.14.wo",
605
+ "vision_tower.encoder.blocks.14.wqkv",
606
+ "vision_tower.encoder.blocks.15.mlp.fc0",
607
+ "vision_tower.encoder.blocks.15.mlp.fc1",
608
+ "vision_tower.encoder.blocks.15.norm0",
609
+ "vision_tower.encoder.blocks.15.norm1",
610
+ "vision_tower.encoder.blocks.15.wo",
611
+ "vision_tower.encoder.blocks.15.wqkv",
612
+ "vision_tower.encoder.blocks.16.mlp.fc0",
613
+ "vision_tower.encoder.blocks.16.mlp.fc1",
614
+ "vision_tower.encoder.blocks.16.norm0",
615
+ "vision_tower.encoder.blocks.16.norm1",
616
+ "vision_tower.encoder.blocks.16.wo",
617
+ "vision_tower.encoder.blocks.16.wqkv",
618
+ "vision_tower.encoder.blocks.17.mlp.fc0",
619
+ "vision_tower.encoder.blocks.17.mlp.fc1",
620
+ "vision_tower.encoder.blocks.17.norm0",
621
+ "vision_tower.encoder.blocks.17.norm1",
622
+ "vision_tower.encoder.blocks.17.wo",
623
+ "vision_tower.encoder.blocks.17.wqkv",
624
+ "vision_tower.encoder.blocks.18.mlp.fc0",
625
+ "vision_tower.encoder.blocks.18.mlp.fc1",
626
+ "vision_tower.encoder.blocks.18.norm0",
627
+ "vision_tower.encoder.blocks.18.norm1",
628
+ "vision_tower.encoder.blocks.18.wo",
629
+ "vision_tower.encoder.blocks.18.wqkv",
630
+ "vision_tower.encoder.blocks.19.mlp.fc0",
631
+ "vision_tower.encoder.blocks.19.mlp.fc1",
632
+ "vision_tower.encoder.blocks.19.norm0",
633
+ "vision_tower.encoder.blocks.19.norm1",
634
+ "vision_tower.encoder.blocks.19.wo",
635
+ "vision_tower.encoder.blocks.19.wqkv",
636
+ "vision_tower.encoder.blocks.2.mlp.fc0",
637
+ "vision_tower.encoder.blocks.2.mlp.fc1",
638
+ "vision_tower.encoder.blocks.2.norm0",
639
+ "vision_tower.encoder.blocks.2.norm1",
640
+ "vision_tower.encoder.blocks.2.wo",
641
+ "vision_tower.encoder.blocks.2.wqkv",
642
+ "vision_tower.encoder.blocks.20.mlp.fc0",
643
+ "vision_tower.encoder.blocks.20.mlp.fc1",
644
+ "vision_tower.encoder.blocks.20.norm0",
645
+ "vision_tower.encoder.blocks.20.norm1",
646
+ "vision_tower.encoder.blocks.20.wo",
647
+ "vision_tower.encoder.blocks.20.wqkv",
648
+ "vision_tower.encoder.blocks.21.mlp.fc0",
649
+ "vision_tower.encoder.blocks.21.mlp.fc1",
650
+ "vision_tower.encoder.blocks.21.norm0",
651
+ "vision_tower.encoder.blocks.21.norm1",
652
+ "vision_tower.encoder.blocks.21.wo",
653
+ "vision_tower.encoder.blocks.21.wqkv",
654
+ "vision_tower.encoder.blocks.22.mlp.fc0",
655
+ "vision_tower.encoder.blocks.22.mlp.fc1",
656
+ "vision_tower.encoder.blocks.22.norm0",
657
+ "vision_tower.encoder.blocks.22.norm1",
658
+ "vision_tower.encoder.blocks.22.wo",
659
+ "vision_tower.encoder.blocks.22.wqkv",
660
+ "vision_tower.encoder.blocks.23.mlp.fc0",
661
+ "vision_tower.encoder.blocks.23.mlp.fc1",
662
+ "vision_tower.encoder.blocks.23.norm0",
663
+ "vision_tower.encoder.blocks.23.norm1",
664
+ "vision_tower.encoder.blocks.23.wo",
665
+ "vision_tower.encoder.blocks.23.wqkv",
666
+ "vision_tower.encoder.blocks.24.mlp.fc0",
667
+ "vision_tower.encoder.blocks.24.mlp.fc1",
668
+ "vision_tower.encoder.blocks.24.norm0",
669
+ "vision_tower.encoder.blocks.24.norm1",
670
+ "vision_tower.encoder.blocks.24.wo",
671
+ "vision_tower.encoder.blocks.24.wqkv",
672
+ "vision_tower.encoder.blocks.25.mlp.fc0",
673
+ "vision_tower.encoder.blocks.25.mlp.fc1",
674
+ "vision_tower.encoder.blocks.25.norm0",
675
+ "vision_tower.encoder.blocks.25.norm1",
676
+ "vision_tower.encoder.blocks.25.wo",
677
+ "vision_tower.encoder.blocks.25.wqkv",
678
+ "vision_tower.encoder.blocks.26.mlp.fc0",
679
+ "vision_tower.encoder.blocks.26.mlp.fc1",
680
+ "vision_tower.encoder.blocks.26.norm0",
681
+ "vision_tower.encoder.blocks.26.norm1",
682
+ "vision_tower.encoder.blocks.26.wo",
683
+ "vision_tower.encoder.blocks.26.wqkv",
684
+ "vision_tower.encoder.blocks.3.mlp.fc0",
685
+ "vision_tower.encoder.blocks.3.mlp.fc1",
686
+ "vision_tower.encoder.blocks.3.norm0",
687
+ "vision_tower.encoder.blocks.3.norm1",
688
+ "vision_tower.encoder.blocks.3.wo",
689
+ "vision_tower.encoder.blocks.3.wqkv",
690
+ "vision_tower.encoder.blocks.4.mlp.fc0",
691
+ "vision_tower.encoder.blocks.4.mlp.fc1",
692
+ "vision_tower.encoder.blocks.4.norm0",
693
+ "vision_tower.encoder.blocks.4.norm1",
694
+ "vision_tower.encoder.blocks.4.wo",
695
+ "vision_tower.encoder.blocks.4.wqkv",
696
+ "vision_tower.encoder.blocks.5.mlp.fc0",
697
+ "vision_tower.encoder.blocks.5.mlp.fc1",
698
+ "vision_tower.encoder.blocks.5.norm0",
699
+ "vision_tower.encoder.blocks.5.norm1",
700
+ "vision_tower.encoder.blocks.5.wo",
701
+ "vision_tower.encoder.blocks.5.wqkv",
702
+ "vision_tower.encoder.blocks.6.mlp.fc0",
703
+ "vision_tower.encoder.blocks.6.mlp.fc1",
704
+ "vision_tower.encoder.blocks.6.norm0",
705
+ "vision_tower.encoder.blocks.6.norm1",
706
+ "vision_tower.encoder.blocks.6.wo",
707
+ "vision_tower.encoder.blocks.6.wqkv",
708
+ "vision_tower.encoder.blocks.7.mlp.fc0",
709
+ "vision_tower.encoder.blocks.7.mlp.fc1",
710
+ "vision_tower.encoder.blocks.7.norm0",
711
+ "vision_tower.encoder.blocks.7.norm1",
712
+ "vision_tower.encoder.blocks.7.wo",
713
+ "vision_tower.encoder.blocks.7.wqkv",
714
+ "vision_tower.encoder.blocks.8.mlp.fc0",
715
+ "vision_tower.encoder.blocks.8.mlp.fc1",
716
+ "vision_tower.encoder.blocks.8.norm0",
717
+ "vision_tower.encoder.blocks.8.norm1",
718
+ "vision_tower.encoder.blocks.8.wo",
719
+ "vision_tower.encoder.blocks.8.wqkv",
720
+ "vision_tower.encoder.blocks.9.mlp.fc0",
721
+ "vision_tower.encoder.blocks.9.mlp.fc1",
722
+ "vision_tower.encoder.blocks.9.norm0",
723
+ "vision_tower.encoder.blocks.9.norm1",
724
+ "vision_tower.encoder.blocks.9.wo",
725
+ "vision_tower.encoder.blocks.9.wqkv"
726
  ],
727
  "algo_config": null,
728
  "softmax_quant_spec": null,
 
732
  "kv_cache_quant_config": {},
733
  "kv_cache_post_rope": false,
734
  "quant_mode": "eager_mode",
735
+ "version": "0.11.2+b560ff9e7f9",
736
  "export": {
737
  "kv_cache_group": [],
738
  "min_kv_scale": 0.0,
 
741
  "weight_merge_groups": null
742
  }
743
  }
744
+ }
docs/deploy_guidance.md ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Kimi-K2.5 Deployment Guide
2
+
3
+ > [!Note]
4
+ > This guide only provides some examples of deployment commands for Kimi-K2.5, which may not be the optimal configuration. Since inference engines are still being updated frequenty, please continue to follow the guidance from their homepage if you want to achieve better inference performance.
5
+
6
+ > kimi_k2 reasoning parser and other related features have been merged into vLLM/sglang and will be available in the next release. For now, please use the nightly build Docker image.
7
+ ## vLLM Deployment
8
+
9
+ This model is available in nightly vLLM wheel:
10
+ ```
11
+ uv pip install -U vllm \
12
+ --torch-backend=auto \
13
+ --extra-index-url https://wheels.vllm.ai/nightly
14
+ ```
15
+
16
+ Here is the example to serve this model on a H200 single node with TP8 via vLLM:
17
+ ```bash
18
+ vllm serve $MODEL_PATH -tp 8 --mm-encoder-tp-mode data --trust-remote-code --tool-call-parser kimi_k2 --reasoning-parser kimi_k2
19
+ ```
20
+ **Key notes**
21
+ - `--tool-call-parser kimi_k2`: Required for enabling tool calling
22
+ - `--reasoning-parser kimi_k2`: Kimi-K2.5 enables thinking mode by default. Make sure to pass this for correct reasoning processing.
23
+
24
+ ## SGLang Deployment
25
+
26
+ This model is available in SGLang latest main:
27
+
28
+ ```
29
+ pip install "sglang @ git+https://github.com/sgl-project/sglang.git#subdirectory=python"
30
+ pip install nvidia-cudnn-cu12==9.16.0.29
31
+ ```
32
+
33
+ Similarly, here is the example for it to run with TP8 on H200 in a single node via SGLang:
34
+ ``` bash
35
+ sglang serve --model-path $MODEL_PATH --tp 8 --trust-remote-code --tool-call-parser kimi_k2 --reasoning-parser kimi_k2
36
+ ```
37
+ **Key parameter notes:**
38
+ - `--tool-call-parser kimi_k2`: Required when enabling tool usage.
39
+ - `--reasoning-parser kimi_k2`: Required for correctly processing reasoning content.
40
+
41
+ ## KTransformers Deployment
42
+ ### KTransformers+SGLang Inference Deployment
43
+ Launch with KTransformers + SGLang for CPU+GPU heterogeneous inference:
44
+
45
+ ```
46
+ python -m sglang.launch_server \
47
+ --model path/to/Kimi-K2.5/ \
48
+ --kt-amx-weight-path path/to/Kimi-K2.5/ \
49
+ --kt-cpuinfer 64 \
50
+ --kt-threadpool-count 2 \
51
+ --kt-num-gpu-experts 180 \
52
+ --kt-amx-method AMXINT4 \
53
+ --trust-remote-code \
54
+ --mem-fraction-static 0.98 \
55
+ --chunked-prefill-size 16384 \
56
+ --max-running-requests 48 \
57
+ --max-total-tokens 50000 \
58
+ --tensor-parallel-size 8 \
59
+ --enable-p2p-check \
60
+ --disable-shared-experts-fusion
61
+ ```
62
+
63
+ Achieves 640.12 tokens/s Prefill and 24.51 tokens/s Decode (48-way concurrency) on 8× NVIDIA L20 + 2× Intel 6454S.
64
+
65
+ More details: https://github.com/kvcache-ai/ktransformers/blob/main/doc/en/Kimi-K2.5.md .
66
+
67
+ ### KTransformers+LLaMA-Factory Fine-tuning Deployment
68
+
69
+ You can use below command to run LoRA SFT with KT+llamafactory.
70
+
71
+ ```
72
+ # For LoRA SFT
73
+ USE_KT=1 llamafactory-cli train examples/train_lora/kimik2_lora_sft_kt.yaml
74
+ # For Chat with model after LoRA SFT
75
+ llamafactory-cli chat examples/inference/kimik2_lora_sft_kt.yaml
76
+ # For API with model after LoRA SFT
77
+ llamafactory-cli api examples/inference/kimik2_lora_sft_kt.yaml
78
+ ```
79
+
80
+ This achieves end-to-end LoRA SFT Throughput: 44.55 token/s on 2× NVIDIA 4090 + Intel 8488C with 1.97T RAM and 200G swap memory.
81
+
82
+ More details refer to https://github.com/kvcache-ai/ktransformers/blob/main/doc/en/SFT_Installation_Guide_KimiK2.5.md .
figures/demo_video.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09b4d925aa0a7c712feef50765355f0625d8f6d46ea302fd98db9609e9070047
3
+ size 270100
figures/kimi-logo.png ADDED
model-00001-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18daf53a15070b9c70d8bb63420dbd39764af9118af67982eeb60749f5453233
3
- size 995001888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14c37abfedf2540344d9c088f8394358ac06c6409809e7ccea10e6a792d25124
3
+ size 412845960
model-00002-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b80fdcfe1617d02fd492347cd8d21f2722087720264f02a344d0d923d0914f1
3
- size 9280387888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:792a1c34d7039f97a22bbe331210186a1efe648b217394033cd788c09f547c25
3
+ size 9215704248
model-00003-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d9e99bdea44c88c3fc99b74167f9072c41c84a33e1b42d9e3771aa390c97eeb
3
- size 9280387888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fda96a9661fdda6853f2bc5514978d21b8176e9ac423f6da6dde4e5ab39d3e17
3
+ size 9215704248
model-00004-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:485d3d217728d45507a2fdfde8f9600f3bc06d418ed55e4f7443ac1c2304f256
3
- size 9280387888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d7fdbd62d7e6e80c01957217c233c397a6e7311646d9083a8fa715331c08587
3
+ size 9215704248
model-00005-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a91f6c484c960703a9a0f9366db10ae6fc28fb1c1de84f5c5fd910d1a984a6b
3
- size 9280387888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0faf42d63e96cc159d1aeea33b704f5e9377e56c87db6f102bb88d2ec923f1d7
3
+ size 9215704248
model-00006-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03bf67c02fe04d31ce317041ba9c3ae2d2d6804c2da7180a0890335979ed92b3
3
- size 9280387888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10c9da3f02f978a504c12d12b9bd5d9b67264bd9b8e55eed3a6f691fff589f7e
3
+ size 9215704248
model-00007-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d376e7fe9d75b924986501db22b2e402a85f33031f271bf0d49edd32b28093de
3
- size 9280387888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a65675206a855250093b68c9fcf5423dac4847b874df67d9cce8e98305402f1
3
+ size 9215704248
model-00008-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae5dd2a875a0ddc3a6de77f14adb608adb553213899a667f74594a08540307e3
3
- size 9280387888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0045cc5f47eca2266d4e6610226a69bfd2175a3dfc4334250a0acd9899f4eb43
3
+ size 9215704248
model-00009-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2a1b0c0d06cbdc03715c65c05f64f9ecf1705d8c2ec7f4a42b064c513a00d72
3
- size 9280387888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14115c4305225f6c65f74a8bec71efd26fb7326a8ea1058f675f8530596b4186
3
+ size 9215704248
model-00010-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff40390d24131d0d9c210bf03b5bb48d42d5105887ea8f20348573a313db9897
3
- size 9280387888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f424523790a658bbb7a4ec62bcbd5310add58b755ece211bb0c0dd82a15e745d
3
+ size 9215704248
model-00011-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d28d28bbb697f45dde07299aa881b35239704e5f91cdb77609f468934a3ef37
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15f8b1b9457361aa7f2a9e34db370f8f287c233b15fa5e428602e35bb8f5db2c
3
+ size 9215706568
model-00012-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efe3b96284fac1b594944e9801726d84f71d7247b6a195c77ec8d98f1e582376
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:226a876a929af9c38ca586f7661045552d86aff02d8a43880f0ccc78c274c938
3
+ size 9215706568
model-00013-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb29f46107bc4221fc3a16d76f61e348b9f96f5054ac2a1736f7a5be65800601
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccd7616d80bc16837103e64d7fd2d020393001e5e59a43f416822c14afdef65c
3
+ size 9215706568
model-00014-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2408502c83b4498998cff1fe6c5c5e940fbdead1652d9dd81459df0c89eeea1
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dba0172ab2774268198058cd689a66d39e21fbd149730d315f0aef891119672
3
+ size 9215706568
model-00015-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22d9760a5997e71fedb17919636d1637992b05f90bcf6ba9872352bc328f9e93
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93346f577c4a9d44016a2829372c98c3b7f2c3567423202ac0550f51dde116b7
3
+ size 9215706568
model-00016-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f6065fc376cd1750db9ea6ec0e86f2b43d22605a59c8c19dabb465da9d6598b
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ff3a6fcf528980439f8b2c24b74d8ed5abe3790db678132f46d9043319030bb
3
+ size 9215706568
model-00017-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5adf83a3c83d6ae280603a4d17a83e331e927f667208dfa72872ff0878f158f0
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77d4caa81cc735aed439a0c294413ba33fe78e1f7a5fc0bd81ecebd6261de255
3
+ size 9215706568
model-00018-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bbd89e36450acaddf6baaeddf38b7cc71155077fbe5159e0c190de62eb8fcbd
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a84f144d2b12e81f3f0bfb4f950b6dbcd5c987eea8f2fce4d775cff04450408
3
+ size 9215706568
model-00019-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5058f1819d4e323a44d86e45d761d6bf7991895880b2d84cfe073d5ec2dda008
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12095b71eccd3826b28ed63a45865697311291c5b4bed53b8ae8bd0756f1e803
3
+ size 9215706568
model-00020-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1477c77ea661bf6ac315eb69f557e1c7a24fff9e889774897755f761fa5cc74e
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d07e2ccfa9feabd932b67ef4c8d6a8c8379c9787b499dccba1ad5a5e91191ca
3
+ size 9215706568
model-00021-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:812a9a13dd1ef9b772baeaad8427cbd5431840c07fbc07551fb635dc679d6e6c
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a12dbcc606047811678759674f081f7bf5560a4e6098e1167c81088c4044635
3
+ size 9215706568
model-00022-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74b3748f8a0e7350e243da84debc4da7e9e8bb00e6211f052be6c5c7cee6234f
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51d6bf38338af0d117ce67e499b1b7eac35be599e1718484b71cb3a9feadfc5f
3
+ size 9215706568
model-00023-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51f865ab4390fb736c1c064f63bd35a6ad1ead04567ace27bccb178f6d8dde94
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39fb888657d4005b5abc8eb13d2edfea0d2c18dbcc3b56374b198e0894cbf94e
3
+ size 9215706568
model-00024-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07af75e2f4a76d0be4484dcdf466507c0f2b23ab599294ea5e8fe3e30de8436c
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d685dbe4e1676b1ce53baf95adc2e62870a74628526baa05281cea48306610fe
3
+ size 9215706568
model-00025-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7dee177ef397da45da7f81fd42aee2058d9e14ce00d0df7a5f19caef6230e419
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f147528633e61d1e97a0604df23688838d071bf86d05dbb5dcdbb995ac349a88
3
+ size 9215706568
model-00026-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6410f9b5ae0fa36c28dc70a4d86f9da0ba0628bf5bcc695afa6181279a478890
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3404d1c7e76a17486454749e46eca43aa8d6a78a7500507e62cef1db5539ebc5
3
+ size 9215706568
model-00027-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10fbd4f42708d8e40217b3d805192237d258b13c14e8a10f7b303e20390a5656
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:775e12da35274b8183a976c1e486ba9c183a9fb1a758e5474fc41d4d5de4ecce
3
+ size 9215706568
model-00028-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:406fa91d7b312c028687e061f76e5aefc5ce642a48f039838b11f0544acee727
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c42babe21f267986aae253cc57533ba8030b92c91d7bfa0cf5f8d57b28ec0403
3
+ size 9215706568
model-00029-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef406584f8afbd1b6d28fbe5a96a852375c6e3506296193face05dd3cdd1e6e8
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55ca5f77b82588917ba10755953f438c6904da4796dfbda6ee2e75df99990b88
3
+ size 9215706568
model-00030-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f593e541d77102b6948cf7ad8c1aa7894587d06ec8607922a1821dbf9e1ddb1
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7107c97607726f7b2be725e76cf8bc4499684f6ae99e1c6077f4752bf6b9608d
3
+ size 9215706568
model-00031-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0a898fc052c039ee66416c3a03a86ad4a242b6cd10601386be13c6524bce1d4
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2cb6862d16b9f9989528a0521ef008ed2abb4d2ff519d126bff74c2086d6596
3
+ size 9215706568
model-00032-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f5078fd742de320f492d9c9f7c380844a33660b8f8f0205eeaf374957e7ade3
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7e9071b8f60c1ef742cb61b3cfe6bfc4d753c6a1ea3dc69287a556e049fb968
3
+ size 9215706568
model-00033-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:099e832cfb44be1400e3cb6c6da7337e71ded14662993782215c23d42b80dcb7
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e94bafff4fd281a993de205b542ce10110e93850e6c65c61d03be6c364a222a2
3
+ size 9215706568
model-00034-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c418ab2f416a664dc4624e74e498bd8bf131ec95a106f3c514699daba6c74dc
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d17e014b26c65a9154a45957111a08fed38000084b8930a9a28c6b0a7ff0c83
3
+ size 9215706568
model-00035-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc2e3664fa3e816abf3d38743e436aac089bbd22d73b5c5d26837c4d0c112aa0
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:229857de9029e67ee27dbe60e2f248ec46c105fa08bf76f80e60de6d6411c8f5
3
+ size 9215706568
model-00036-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b19c8b026f97ff1d10d6cf71a4de178537f5e71dde682ab3beb3cd12a7334d7e
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95b0a3e5ae595ea2550c99042795130493e3b7d5421cb2ecf6af97c7a55c4f4e
3
+ size 9215706568
model-00037-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:775702ce0a1e5f015f2089ec556f7bf962e1a147eb99b4cd71c564a98bc4b6d4
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2611ef97858d1d6b09cc6e429839e727d3d15d967abe3da8bfad655079fedb5
3
+ size 9215706568
model-00038-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ea29d9ccd21191a1a8c13d9139b498334099ad342ab0703c692db989e54dd2b
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bf1afe774559fa6718b2a4f1c485f3460ac74ca63ead287c473c5f7053e0ccc
3
+ size 9215706568
model-00039-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd091f20926b11a58428c6cc9c67bb4b842a2182eb5d8173af087eac86d99aad
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90c1709b2436b892bf319128a3880f9ad660381c0254136928698e94d4cc7454
3
+ size 9215706568
model-00040-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25b11a04fbdedecd19ec047e597ac82be1172807f93f03f137c23bc0eac7a76f
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9ac158f4752b092b37ce4cf51d7a11640caa24faa72e4e96028b6da1400d88e
3
+ size 9215706568
model-00041-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ae5719fe28bac0dd17677e67861bd632930877ce0d8a0b15b11a624b0aba69f
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dfcac8e04b9e71fdbc875dbd8b7d633df49b1ba8fc096adf040fce451a51970
3
+ size 9215706568
model-00042-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f7bf4f3aea3bf79d9c1744b16134974db972a4dd7e0afe2b48ee48aedeb4e4a
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d688b5dd61818c24d75c8f28ba28516ec7247eb850fad0db107d4dc9153e3a7d
3
+ size 9215706568
model-00043-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de4d7f327b6926f1b6640023ed631ad5484ec76bf58b3d368f6a73e2b9cbb1e9
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0b768d9a9cef432800fbb488542c3e27dfce7795ebe6a0eb058dfc2d0919525
3
+ size 9215706568
model-00044-of-000064.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b8137f44508adaf77d60413549baab93218515b3bc7ba4036cd92fee45a89c7
3
- size 9280390208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2181b8e7841f13727fe71d23a38ecd790be3a838592a96293de5e784bb6225e9
3
+ size 9215706568