diff --git a/.gitattributes b/.gitattributes index 495a7175bb7b0b48bd0ac67d86f3e0c900127a71..20ea40516e24e1bacb8e3434e3a7ca441764ee9b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text model.safetensors.index.json filter=lfs diff=lfs merge=lfs -text +figures/demo_video.mp4 filter=lfs diff=lfs merge=lfs -text diff --git a/chat_template.jinja b/chat_template.jinja index bc2867858b843a9de74c2649bf4b1dfd74039df3..f88cb41422626beb4cf1e7826b079448a4663cd0 100644 --- a/chat_template.jinja +++ b/chat_template.jinja @@ -5,7 +5,7 @@ {%- elif c is not none -%} {% for content in c -%} {% if content['type'] == 'image' or content['type'] == 'image_url' -%} - <|media_start|>image<|media_content|><|media_pad|><|media_end|> + <|media_begin|>image<|media_content|><|media_pad|><|media_end|> {% elif content['type'] == 'video' or content['type']== 'video_url'-%} <|kimi_k25_video_placeholder|> {% else -%} @@ -57,10 +57,6 @@ <|im_system|>tool_declare<|im_middle|>{{ tools | tojson(separators=(',', ':')) }}<|im_end|> {%- endif -%} {%- endif -%} - -{%- if messages|length == 0 or messages[0]['role'] != 'system' -%} - <|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|> -{%- endif -%} {%- for message in hist_msgs -%} {{set_roles(message)}} diff --git a/config.json b/config.json index 2f8c172d16baa96b1e43c1e0a343ca78db28f80e..f2779be52ae498d7dfe18cfb5681105539b69eb4 100644 --- a/config.json +++ b/config.json @@ -193,12 +193,536 @@ "target_device": null }, "exclude": [ - "lm_head", - "re:.*self_attn.*", - "re:.*shared_experts.*", - "re:.*mlp\\.(gate|up|gate_up|down)_proj.*", - "re:mm_projector.*", - "re:vision_tower.*" + "language_model.lm_head", + "language_model.model.layers.0.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.0.self_attn.kv_b_proj", + "language_model.model.layers.0.self_attn.o_proj", + "language_model.model.layers.0.self_attn.q_a_proj", + "language_model.model.layers.0.self_attn.q_b_proj", + "language_model.model.layers.1.mlp.gate", + "language_model.model.layers.1.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.1.self_attn.kv_b_proj", + "language_model.model.layers.1.self_attn.o_proj", + "language_model.model.layers.1.self_attn.q_a_proj", + "language_model.model.layers.1.self_attn.q_b_proj", + "language_model.model.layers.10.mlp.gate", + "language_model.model.layers.10.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.10.self_attn.kv_b_proj", + "language_model.model.layers.10.self_attn.o_proj", + "language_model.model.layers.10.self_attn.q_a_proj", + "language_model.model.layers.10.self_attn.q_b_proj", + "language_model.model.layers.11.mlp.gate", + "language_model.model.layers.11.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.11.self_attn.kv_b_proj", + "language_model.model.layers.11.self_attn.o_proj", + "language_model.model.layers.11.self_attn.q_a_proj", + "language_model.model.layers.11.self_attn.q_b_proj", + "language_model.model.layers.12.mlp.gate", + "language_model.model.layers.12.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.12.self_attn.kv_b_proj", + "language_model.model.layers.12.self_attn.o_proj", + "language_model.model.layers.12.self_attn.q_a_proj", + "language_model.model.layers.12.self_attn.q_b_proj", + "language_model.model.layers.13.mlp.gate", + "language_model.model.layers.13.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.13.self_attn.kv_b_proj", + "language_model.model.layers.13.self_attn.o_proj", + "language_model.model.layers.13.self_attn.q_a_proj", + "language_model.model.layers.13.self_attn.q_b_proj", + "language_model.model.layers.14.mlp.gate", + "language_model.model.layers.14.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.14.self_attn.kv_b_proj", + "language_model.model.layers.14.self_attn.o_proj", + "language_model.model.layers.14.self_attn.q_a_proj", + "language_model.model.layers.14.self_attn.q_b_proj", + "language_model.model.layers.15.mlp.gate", + "language_model.model.layers.15.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.15.self_attn.kv_b_proj", + "language_model.model.layers.15.self_attn.o_proj", + "language_model.model.layers.15.self_attn.q_a_proj", + "language_model.model.layers.15.self_attn.q_b_proj", + "language_model.model.layers.16.mlp.gate", + "language_model.model.layers.16.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.16.self_attn.kv_b_proj", + "language_model.model.layers.16.self_attn.o_proj", + "language_model.model.layers.16.self_attn.q_a_proj", + "language_model.model.layers.16.self_attn.q_b_proj", + "language_model.model.layers.17.mlp.gate", + "language_model.model.layers.17.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.17.self_attn.kv_b_proj", + "language_model.model.layers.17.self_attn.o_proj", + "language_model.model.layers.17.self_attn.q_a_proj", + "language_model.model.layers.17.self_attn.q_b_proj", + "language_model.model.layers.18.mlp.gate", + "language_model.model.layers.18.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.18.self_attn.kv_b_proj", + "language_model.model.layers.18.self_attn.o_proj", + "language_model.model.layers.18.self_attn.q_a_proj", + "language_model.model.layers.18.self_attn.q_b_proj", + "language_model.model.layers.19.mlp.gate", + "language_model.model.layers.19.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.19.self_attn.kv_b_proj", + "language_model.model.layers.19.self_attn.o_proj", + "language_model.model.layers.19.self_attn.q_a_proj", + "language_model.model.layers.19.self_attn.q_b_proj", + "language_model.model.layers.2.mlp.gate", + "language_model.model.layers.2.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.2.self_attn.kv_b_proj", + "language_model.model.layers.2.self_attn.o_proj", + "language_model.model.layers.2.self_attn.q_a_proj", + "language_model.model.layers.2.self_attn.q_b_proj", + "language_model.model.layers.20.mlp.gate", + "language_model.model.layers.20.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.20.self_attn.kv_b_proj", + "language_model.model.layers.20.self_attn.o_proj", + "language_model.model.layers.20.self_attn.q_a_proj", + "language_model.model.layers.20.self_attn.q_b_proj", + "language_model.model.layers.21.mlp.gate", + "language_model.model.layers.21.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.21.self_attn.kv_b_proj", + "language_model.model.layers.21.self_attn.o_proj", + "language_model.model.layers.21.self_attn.q_a_proj", + "language_model.model.layers.21.self_attn.q_b_proj", + "language_model.model.layers.22.mlp.gate", + "language_model.model.layers.22.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.22.self_attn.kv_b_proj", + "language_model.model.layers.22.self_attn.o_proj", + "language_model.model.layers.22.self_attn.q_a_proj", + "language_model.model.layers.22.self_attn.q_b_proj", + "language_model.model.layers.23.mlp.gate", + "language_model.model.layers.23.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.23.self_attn.kv_b_proj", + "language_model.model.layers.23.self_attn.o_proj", + "language_model.model.layers.23.self_attn.q_a_proj", + "language_model.model.layers.23.self_attn.q_b_proj", + "language_model.model.layers.24.mlp.gate", + "language_model.model.layers.24.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.24.self_attn.kv_b_proj", + "language_model.model.layers.24.self_attn.o_proj", + "language_model.model.layers.24.self_attn.q_a_proj", + "language_model.model.layers.24.self_attn.q_b_proj", + "language_model.model.layers.25.mlp.gate", + "language_model.model.layers.25.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.25.self_attn.kv_b_proj", + "language_model.model.layers.25.self_attn.o_proj", + "language_model.model.layers.25.self_attn.q_a_proj", + "language_model.model.layers.25.self_attn.q_b_proj", + "language_model.model.layers.26.mlp.gate", + "language_model.model.layers.26.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.26.self_attn.kv_b_proj", + "language_model.model.layers.26.self_attn.o_proj", + "language_model.model.layers.26.self_attn.q_a_proj", + "language_model.model.layers.26.self_attn.q_b_proj", + "language_model.model.layers.27.mlp.gate", + "language_model.model.layers.27.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.27.self_attn.kv_b_proj", + "language_model.model.layers.27.self_attn.o_proj", + "language_model.model.layers.27.self_attn.q_a_proj", + "language_model.model.layers.27.self_attn.q_b_proj", + "language_model.model.layers.28.mlp.gate", + "language_model.model.layers.28.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.28.self_attn.kv_b_proj", + "language_model.model.layers.28.self_attn.o_proj", + "language_model.model.layers.28.self_attn.q_a_proj", + "language_model.model.layers.28.self_attn.q_b_proj", + "language_model.model.layers.29.mlp.gate", + "language_model.model.layers.29.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.29.self_attn.kv_b_proj", + "language_model.model.layers.29.self_attn.o_proj", + "language_model.model.layers.29.self_attn.q_a_proj", + "language_model.model.layers.29.self_attn.q_b_proj", + "language_model.model.layers.3.mlp.gate", + "language_model.model.layers.3.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.3.self_attn.kv_b_proj", + "language_model.model.layers.3.self_attn.o_proj", + "language_model.model.layers.3.self_attn.q_a_proj", + "language_model.model.layers.3.self_attn.q_b_proj", + "language_model.model.layers.30.mlp.gate", + "language_model.model.layers.30.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.30.self_attn.kv_b_proj", + "language_model.model.layers.30.self_attn.o_proj", + "language_model.model.layers.30.self_attn.q_a_proj", + "language_model.model.layers.30.self_attn.q_b_proj", + "language_model.model.layers.31.mlp.gate", + "language_model.model.layers.31.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.31.self_attn.kv_b_proj", + "language_model.model.layers.31.self_attn.o_proj", + "language_model.model.layers.31.self_attn.q_a_proj", + "language_model.model.layers.31.self_attn.q_b_proj", + "language_model.model.layers.32.mlp.gate", + "language_model.model.layers.32.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.32.self_attn.kv_b_proj", + "language_model.model.layers.32.self_attn.o_proj", + "language_model.model.layers.32.self_attn.q_a_proj", + "language_model.model.layers.32.self_attn.q_b_proj", + "language_model.model.layers.33.mlp.gate", + "language_model.model.layers.33.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.33.self_attn.kv_b_proj", + "language_model.model.layers.33.self_attn.o_proj", + "language_model.model.layers.33.self_attn.q_a_proj", + "language_model.model.layers.33.self_attn.q_b_proj", + "language_model.model.layers.34.mlp.gate", + "language_model.model.layers.34.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.34.self_attn.kv_b_proj", + "language_model.model.layers.34.self_attn.o_proj", + "language_model.model.layers.34.self_attn.q_a_proj", + "language_model.model.layers.34.self_attn.q_b_proj", + "language_model.model.layers.35.mlp.gate", + "language_model.model.layers.35.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.35.self_attn.kv_b_proj", + "language_model.model.layers.35.self_attn.o_proj", + "language_model.model.layers.35.self_attn.q_a_proj", + "language_model.model.layers.35.self_attn.q_b_proj", + "language_model.model.layers.36.mlp.gate", + "language_model.model.layers.36.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.36.self_attn.kv_b_proj", + "language_model.model.layers.36.self_attn.o_proj", + "language_model.model.layers.36.self_attn.q_a_proj", + "language_model.model.layers.36.self_attn.q_b_proj", + "language_model.model.layers.37.mlp.gate", + "language_model.model.layers.37.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.37.self_attn.kv_b_proj", + "language_model.model.layers.37.self_attn.o_proj", + "language_model.model.layers.37.self_attn.q_a_proj", + "language_model.model.layers.37.self_attn.q_b_proj", + "language_model.model.layers.38.mlp.gate", + "language_model.model.layers.38.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.38.self_attn.kv_b_proj", + "language_model.model.layers.38.self_attn.o_proj", + "language_model.model.layers.38.self_attn.q_a_proj", + "language_model.model.layers.38.self_attn.q_b_proj", + "language_model.model.layers.39.mlp.gate", + "language_model.model.layers.39.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.39.self_attn.kv_b_proj", + "language_model.model.layers.39.self_attn.o_proj", + "language_model.model.layers.39.self_attn.q_a_proj", + "language_model.model.layers.39.self_attn.q_b_proj", + "language_model.model.layers.4.mlp.gate", + "language_model.model.layers.4.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.4.self_attn.kv_b_proj", + "language_model.model.layers.4.self_attn.o_proj", + "language_model.model.layers.4.self_attn.q_a_proj", + "language_model.model.layers.4.self_attn.q_b_proj", + "language_model.model.layers.40.mlp.gate", + "language_model.model.layers.40.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.40.self_attn.kv_b_proj", + "language_model.model.layers.40.self_attn.o_proj", + "language_model.model.layers.40.self_attn.q_a_proj", + "language_model.model.layers.40.self_attn.q_b_proj", + "language_model.model.layers.41.mlp.gate", + "language_model.model.layers.41.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.41.self_attn.kv_b_proj", + "language_model.model.layers.41.self_attn.o_proj", + "language_model.model.layers.41.self_attn.q_a_proj", + "language_model.model.layers.41.self_attn.q_b_proj", + "language_model.model.layers.42.mlp.gate", + "language_model.model.layers.42.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.42.self_attn.kv_b_proj", + "language_model.model.layers.42.self_attn.o_proj", + "language_model.model.layers.42.self_attn.q_a_proj", + "language_model.model.layers.42.self_attn.q_b_proj", + "language_model.model.layers.43.mlp.gate", + "language_model.model.layers.43.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.43.self_attn.kv_b_proj", + "language_model.model.layers.43.self_attn.o_proj", + "language_model.model.layers.43.self_attn.q_a_proj", + "language_model.model.layers.43.self_attn.q_b_proj", + "language_model.model.layers.44.mlp.gate", + "language_model.model.layers.44.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.44.self_attn.kv_b_proj", + "language_model.model.layers.44.self_attn.o_proj", + "language_model.model.layers.44.self_attn.q_a_proj", + "language_model.model.layers.44.self_attn.q_b_proj", + "language_model.model.layers.45.mlp.gate", + "language_model.model.layers.45.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.45.self_attn.kv_b_proj", + "language_model.model.layers.45.self_attn.o_proj", + "language_model.model.layers.45.self_attn.q_a_proj", + "language_model.model.layers.45.self_attn.q_b_proj", + "language_model.model.layers.46.mlp.gate", + "language_model.model.layers.46.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.46.self_attn.kv_b_proj", + "language_model.model.layers.46.self_attn.o_proj", + "language_model.model.layers.46.self_attn.q_a_proj", + "language_model.model.layers.46.self_attn.q_b_proj", + "language_model.model.layers.47.mlp.gate", + "language_model.model.layers.47.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.47.self_attn.kv_b_proj", + "language_model.model.layers.47.self_attn.o_proj", + "language_model.model.layers.47.self_attn.q_a_proj", + "language_model.model.layers.47.self_attn.q_b_proj", + "language_model.model.layers.48.mlp.gate", + "language_model.model.layers.48.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.48.self_attn.kv_b_proj", + "language_model.model.layers.48.self_attn.o_proj", + "language_model.model.layers.48.self_attn.q_a_proj", + "language_model.model.layers.48.self_attn.q_b_proj", + "language_model.model.layers.49.mlp.gate", + "language_model.model.layers.49.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.49.self_attn.kv_b_proj", + "language_model.model.layers.49.self_attn.o_proj", + "language_model.model.layers.49.self_attn.q_a_proj", + "language_model.model.layers.49.self_attn.q_b_proj", + "language_model.model.layers.5.mlp.gate", + "language_model.model.layers.5.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.5.self_attn.kv_b_proj", + "language_model.model.layers.5.self_attn.o_proj", + "language_model.model.layers.5.self_attn.q_a_proj", + "language_model.model.layers.5.self_attn.q_b_proj", + "language_model.model.layers.50.mlp.gate", + "language_model.model.layers.50.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.50.self_attn.kv_b_proj", + "language_model.model.layers.50.self_attn.o_proj", + "language_model.model.layers.50.self_attn.q_a_proj", + "language_model.model.layers.50.self_attn.q_b_proj", + "language_model.model.layers.51.mlp.gate", + "language_model.model.layers.51.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.51.self_attn.kv_b_proj", + "language_model.model.layers.51.self_attn.o_proj", + "language_model.model.layers.51.self_attn.q_a_proj", + "language_model.model.layers.51.self_attn.q_b_proj", + "language_model.model.layers.52.mlp.gate", + "language_model.model.layers.52.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.52.self_attn.kv_b_proj", + "language_model.model.layers.52.self_attn.o_proj", + "language_model.model.layers.52.self_attn.q_a_proj", + "language_model.model.layers.52.self_attn.q_b_proj", + "language_model.model.layers.53.mlp.gate", + "language_model.model.layers.53.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.53.self_attn.kv_b_proj", + "language_model.model.layers.53.self_attn.o_proj", + "language_model.model.layers.53.self_attn.q_a_proj", + "language_model.model.layers.53.self_attn.q_b_proj", + "language_model.model.layers.54.mlp.gate", + "language_model.model.layers.54.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.54.self_attn.kv_b_proj", + "language_model.model.layers.54.self_attn.o_proj", + "language_model.model.layers.54.self_attn.q_a_proj", + "language_model.model.layers.54.self_attn.q_b_proj", + "language_model.model.layers.55.mlp.gate", + "language_model.model.layers.55.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.55.self_attn.kv_b_proj", + "language_model.model.layers.55.self_attn.o_proj", + "language_model.model.layers.55.self_attn.q_a_proj", + "language_model.model.layers.55.self_attn.q_b_proj", + "language_model.model.layers.56.mlp.gate", + "language_model.model.layers.56.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.56.self_attn.kv_b_proj", + "language_model.model.layers.56.self_attn.o_proj", + "language_model.model.layers.56.self_attn.q_a_proj", + "language_model.model.layers.56.self_attn.q_b_proj", + "language_model.model.layers.57.mlp.gate", + "language_model.model.layers.57.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.57.self_attn.kv_b_proj", + "language_model.model.layers.57.self_attn.o_proj", + "language_model.model.layers.57.self_attn.q_a_proj", + "language_model.model.layers.57.self_attn.q_b_proj", + "language_model.model.layers.58.mlp.gate", + "language_model.model.layers.58.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.58.self_attn.kv_b_proj", + "language_model.model.layers.58.self_attn.o_proj", + "language_model.model.layers.58.self_attn.q_a_proj", + "language_model.model.layers.58.self_attn.q_b_proj", + "language_model.model.layers.59.mlp.gate", + "language_model.model.layers.59.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.59.self_attn.kv_b_proj", + "language_model.model.layers.59.self_attn.o_proj", + "language_model.model.layers.59.self_attn.q_a_proj", + "language_model.model.layers.59.self_attn.q_b_proj", + "language_model.model.layers.6.mlp.gate", + "language_model.model.layers.6.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.6.self_attn.kv_b_proj", + "language_model.model.layers.6.self_attn.o_proj", + "language_model.model.layers.6.self_attn.q_a_proj", + "language_model.model.layers.6.self_attn.q_b_proj", + "language_model.model.layers.60.mlp.gate", + "language_model.model.layers.60.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.60.self_attn.kv_b_proj", + "language_model.model.layers.60.self_attn.o_proj", + "language_model.model.layers.60.self_attn.q_a_proj", + "language_model.model.layers.60.self_attn.q_b_proj", + "language_model.model.layers.7.mlp.gate", + "language_model.model.layers.7.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.7.self_attn.kv_b_proj", + "language_model.model.layers.7.self_attn.o_proj", + "language_model.model.layers.7.self_attn.q_a_proj", + "language_model.model.layers.7.self_attn.q_b_proj", + "language_model.model.layers.8.mlp.gate", + "language_model.model.layers.8.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.8.self_attn.kv_b_proj", + "language_model.model.layers.8.self_attn.o_proj", + "language_model.model.layers.8.self_attn.q_a_proj", + "language_model.model.layers.8.self_attn.q_b_proj", + "language_model.model.layers.9.mlp.gate", + "language_model.model.layers.9.self_attn.kv_a_proj_with_mqa", + "language_model.model.layers.9.self_attn.kv_b_proj", + "language_model.model.layers.9.self_attn.o_proj", + "language_model.model.layers.9.self_attn.q_a_proj", + "language_model.model.layers.9.self_attn.q_b_proj", + "mm_projector.proj.0", + "mm_projector.proj.2", + "vision_tower.encoder.blocks.0.mlp.fc0", + "vision_tower.encoder.blocks.0.mlp.fc1", + "vision_tower.encoder.blocks.0.norm0", + "vision_tower.encoder.blocks.0.norm1", + "vision_tower.encoder.blocks.0.wo", + "vision_tower.encoder.blocks.0.wqkv", + "vision_tower.encoder.blocks.1.mlp.fc0", + "vision_tower.encoder.blocks.1.mlp.fc1", + "vision_tower.encoder.blocks.1.norm0", + "vision_tower.encoder.blocks.1.norm1", + "vision_tower.encoder.blocks.1.wo", + "vision_tower.encoder.blocks.1.wqkv", + "vision_tower.encoder.blocks.10.mlp.fc0", + "vision_tower.encoder.blocks.10.mlp.fc1", + "vision_tower.encoder.blocks.10.norm0", + "vision_tower.encoder.blocks.10.norm1", + "vision_tower.encoder.blocks.10.wo", + "vision_tower.encoder.blocks.10.wqkv", + "vision_tower.encoder.blocks.11.mlp.fc0", + "vision_tower.encoder.blocks.11.mlp.fc1", + "vision_tower.encoder.blocks.11.norm0", + "vision_tower.encoder.blocks.11.norm1", + "vision_tower.encoder.blocks.11.wo", + "vision_tower.encoder.blocks.11.wqkv", + "vision_tower.encoder.blocks.12.mlp.fc0", + "vision_tower.encoder.blocks.12.mlp.fc1", + "vision_tower.encoder.blocks.12.norm0", + "vision_tower.encoder.blocks.12.norm1", + "vision_tower.encoder.blocks.12.wo", + "vision_tower.encoder.blocks.12.wqkv", + "vision_tower.encoder.blocks.13.mlp.fc0", + "vision_tower.encoder.blocks.13.mlp.fc1", + "vision_tower.encoder.blocks.13.norm0", + "vision_tower.encoder.blocks.13.norm1", + "vision_tower.encoder.blocks.13.wo", + "vision_tower.encoder.blocks.13.wqkv", + "vision_tower.encoder.blocks.14.mlp.fc0", + "vision_tower.encoder.blocks.14.mlp.fc1", + "vision_tower.encoder.blocks.14.norm0", + "vision_tower.encoder.blocks.14.norm1", + "vision_tower.encoder.blocks.14.wo", + "vision_tower.encoder.blocks.14.wqkv", + "vision_tower.encoder.blocks.15.mlp.fc0", + "vision_tower.encoder.blocks.15.mlp.fc1", + "vision_tower.encoder.blocks.15.norm0", + "vision_tower.encoder.blocks.15.norm1", + "vision_tower.encoder.blocks.15.wo", + "vision_tower.encoder.blocks.15.wqkv", + "vision_tower.encoder.blocks.16.mlp.fc0", + "vision_tower.encoder.blocks.16.mlp.fc1", + "vision_tower.encoder.blocks.16.norm0", + "vision_tower.encoder.blocks.16.norm1", + "vision_tower.encoder.blocks.16.wo", + "vision_tower.encoder.blocks.16.wqkv", + "vision_tower.encoder.blocks.17.mlp.fc0", + "vision_tower.encoder.blocks.17.mlp.fc1", + "vision_tower.encoder.blocks.17.norm0", + "vision_tower.encoder.blocks.17.norm1", + "vision_tower.encoder.blocks.17.wo", + "vision_tower.encoder.blocks.17.wqkv", + "vision_tower.encoder.blocks.18.mlp.fc0", + "vision_tower.encoder.blocks.18.mlp.fc1", + "vision_tower.encoder.blocks.18.norm0", + "vision_tower.encoder.blocks.18.norm1", + "vision_tower.encoder.blocks.18.wo", + "vision_tower.encoder.blocks.18.wqkv", + "vision_tower.encoder.blocks.19.mlp.fc0", + "vision_tower.encoder.blocks.19.mlp.fc1", + "vision_tower.encoder.blocks.19.norm0", + "vision_tower.encoder.blocks.19.norm1", + "vision_tower.encoder.blocks.19.wo", + "vision_tower.encoder.blocks.19.wqkv", + "vision_tower.encoder.blocks.2.mlp.fc0", + "vision_tower.encoder.blocks.2.mlp.fc1", + "vision_tower.encoder.blocks.2.norm0", + "vision_tower.encoder.blocks.2.norm1", + "vision_tower.encoder.blocks.2.wo", + "vision_tower.encoder.blocks.2.wqkv", + "vision_tower.encoder.blocks.20.mlp.fc0", + "vision_tower.encoder.blocks.20.mlp.fc1", + "vision_tower.encoder.blocks.20.norm0", + "vision_tower.encoder.blocks.20.norm1", + "vision_tower.encoder.blocks.20.wo", + "vision_tower.encoder.blocks.20.wqkv", + "vision_tower.encoder.blocks.21.mlp.fc0", + "vision_tower.encoder.blocks.21.mlp.fc1", + "vision_tower.encoder.blocks.21.norm0", + "vision_tower.encoder.blocks.21.norm1", + "vision_tower.encoder.blocks.21.wo", + "vision_tower.encoder.blocks.21.wqkv", + "vision_tower.encoder.blocks.22.mlp.fc0", + "vision_tower.encoder.blocks.22.mlp.fc1", + "vision_tower.encoder.blocks.22.norm0", + "vision_tower.encoder.blocks.22.norm1", + "vision_tower.encoder.blocks.22.wo", + "vision_tower.encoder.blocks.22.wqkv", + "vision_tower.encoder.blocks.23.mlp.fc0", + "vision_tower.encoder.blocks.23.mlp.fc1", + "vision_tower.encoder.blocks.23.norm0", + "vision_tower.encoder.blocks.23.norm1", + "vision_tower.encoder.blocks.23.wo", + "vision_tower.encoder.blocks.23.wqkv", + "vision_tower.encoder.blocks.24.mlp.fc0", + "vision_tower.encoder.blocks.24.mlp.fc1", + "vision_tower.encoder.blocks.24.norm0", + "vision_tower.encoder.blocks.24.norm1", + "vision_tower.encoder.blocks.24.wo", + "vision_tower.encoder.blocks.24.wqkv", + "vision_tower.encoder.blocks.25.mlp.fc0", + "vision_tower.encoder.blocks.25.mlp.fc1", + "vision_tower.encoder.blocks.25.norm0", + "vision_tower.encoder.blocks.25.norm1", + "vision_tower.encoder.blocks.25.wo", + "vision_tower.encoder.blocks.25.wqkv", + "vision_tower.encoder.blocks.26.mlp.fc0", + "vision_tower.encoder.blocks.26.mlp.fc1", + "vision_tower.encoder.blocks.26.norm0", + "vision_tower.encoder.blocks.26.norm1", + "vision_tower.encoder.blocks.26.wo", + "vision_tower.encoder.blocks.26.wqkv", + "vision_tower.encoder.blocks.3.mlp.fc0", + "vision_tower.encoder.blocks.3.mlp.fc1", + "vision_tower.encoder.blocks.3.norm0", + "vision_tower.encoder.blocks.3.norm1", + "vision_tower.encoder.blocks.3.wo", + "vision_tower.encoder.blocks.3.wqkv", + "vision_tower.encoder.blocks.4.mlp.fc0", + "vision_tower.encoder.blocks.4.mlp.fc1", + "vision_tower.encoder.blocks.4.norm0", + "vision_tower.encoder.blocks.4.norm1", + "vision_tower.encoder.blocks.4.wo", + "vision_tower.encoder.blocks.4.wqkv", + "vision_tower.encoder.blocks.5.mlp.fc0", + "vision_tower.encoder.blocks.5.mlp.fc1", + "vision_tower.encoder.blocks.5.norm0", + "vision_tower.encoder.blocks.5.norm1", + "vision_tower.encoder.blocks.5.wo", + "vision_tower.encoder.blocks.5.wqkv", + "vision_tower.encoder.blocks.6.mlp.fc0", + "vision_tower.encoder.blocks.6.mlp.fc1", + "vision_tower.encoder.blocks.6.norm0", + "vision_tower.encoder.blocks.6.norm1", + "vision_tower.encoder.blocks.6.wo", + "vision_tower.encoder.blocks.6.wqkv", + "vision_tower.encoder.blocks.7.mlp.fc0", + "vision_tower.encoder.blocks.7.mlp.fc1", + "vision_tower.encoder.blocks.7.norm0", + "vision_tower.encoder.blocks.7.norm1", + "vision_tower.encoder.blocks.7.wo", + "vision_tower.encoder.blocks.7.wqkv", + "vision_tower.encoder.blocks.8.mlp.fc0", + "vision_tower.encoder.blocks.8.mlp.fc1", + "vision_tower.encoder.blocks.8.norm0", + "vision_tower.encoder.blocks.8.norm1", + "vision_tower.encoder.blocks.8.wo", + "vision_tower.encoder.blocks.8.wqkv", + "vision_tower.encoder.blocks.9.mlp.fc0", + "vision_tower.encoder.blocks.9.mlp.fc1", + "vision_tower.encoder.blocks.9.norm0", + "vision_tower.encoder.blocks.9.norm1", + "vision_tower.encoder.blocks.9.wo", + "vision_tower.encoder.blocks.9.wqkv" ], "algo_config": null, "softmax_quant_spec": null, @@ -208,7 +732,7 @@ "kv_cache_quant_config": {}, "kv_cache_post_rope": false, "quant_mode": "eager_mode", - "version": "0.11+4a34634b4a", + "version": "0.11.2+b560ff9e7f9", "export": { "kv_cache_group": [], "min_kv_scale": 0.0, @@ -217,4 +741,4 @@ "weight_merge_groups": null } } -} +} \ No newline at end of file diff --git a/docs/deploy_guidance.md b/docs/deploy_guidance.md new file mode 100644 index 0000000000000000000000000000000000000000..1aac57bead663d507b156284690add81f1146953 --- /dev/null +++ b/docs/deploy_guidance.md @@ -0,0 +1,82 @@ +# Kimi-K2.5 Deployment Guide + +> [!Note] +> This guide only provides some examples of deployment commands for Kimi-K2.5, which may not be the optimal configuration. Since inference engines are still being updated frequenty, please continue to follow the guidance from their homepage if you want to achieve better inference performance. + +> kimi_k2 reasoning parser and other related features have been merged into vLLM/sglang and will be available in the next release. For now, please use the nightly build Docker image. +## vLLM Deployment + +This model is available in nightly vLLM wheel: +``` +uv pip install -U vllm \ + --torch-backend=auto \ + --extra-index-url https://wheels.vllm.ai/nightly +``` + +Here is the example to serve this model on a H200 single node with TP8 via vLLM: +```bash +vllm serve $MODEL_PATH -tp 8 --mm-encoder-tp-mode data --trust-remote-code --tool-call-parser kimi_k2 --reasoning-parser kimi_k2 +``` +**Key notes** +- `--tool-call-parser kimi_k2`: Required for enabling tool calling +- `--reasoning-parser kimi_k2`: Kimi-K2.5 enables thinking mode by default. Make sure to pass this for correct reasoning processing. + +## SGLang Deployment + +This model is available in SGLang latest main: + +``` +pip install "sglang @ git+https://github.com/sgl-project/sglang.git#subdirectory=python" +pip install nvidia-cudnn-cu12==9.16.0.29 +``` + +Similarly, here is the example for it to run with TP8 on H200 in a single node via SGLang: +``` bash +sglang serve --model-path $MODEL_PATH --tp 8 --trust-remote-code --tool-call-parser kimi_k2 --reasoning-parser kimi_k2 +``` +**Key parameter notes:** +- `--tool-call-parser kimi_k2`: Required when enabling tool usage. +- `--reasoning-parser kimi_k2`: Required for correctly processing reasoning content. + +## KTransformers Deployment +### KTransformers+SGLang Inference Deployment +Launch with KTransformers + SGLang for CPU+GPU heterogeneous inference: + +``` +python -m sglang.launch_server \ + --model path/to/Kimi-K2.5/ \ + --kt-amx-weight-path path/to/Kimi-K2.5/ \ + --kt-cpuinfer 64 \ + --kt-threadpool-count 2 \ + --kt-num-gpu-experts 180 \ + --kt-amx-method AMXINT4 \ + --trust-remote-code \ + --mem-fraction-static 0.98 \ + --chunked-prefill-size 16384 \ + --max-running-requests 48 \ + --max-total-tokens 50000 \ + --tensor-parallel-size 8 \ + --enable-p2p-check \ + --disable-shared-experts-fusion +``` + +Achieves 640.12 tokens/s Prefill and 24.51 tokens/s Decode (48-way concurrency) on 8× NVIDIA L20 + 2× Intel 6454S. + +More details: https://github.com/kvcache-ai/ktransformers/blob/main/doc/en/Kimi-K2.5.md . + +### KTransformers+LLaMA-Factory Fine-tuning Deployment + +You can use below command to run LoRA SFT with KT+llamafactory. + +``` +# For LoRA SFT +USE_KT=1 llamafactory-cli train examples/train_lora/kimik2_lora_sft_kt.yaml +# For Chat with model after LoRA SFT +llamafactory-cli chat examples/inference/kimik2_lora_sft_kt.yaml +# For API with model after LoRA SFT +llamafactory-cli api examples/inference/kimik2_lora_sft_kt.yaml +``` + +This achieves end-to-end LoRA SFT Throughput: 44.55 token/s on 2× NVIDIA 4090 + Intel 8488C with 1.97T RAM and 200G swap memory. + +More details refer to https://github.com/kvcache-ai/ktransformers/blob/main/doc/en/SFT_Installation_Guide_KimiK2.5.md . diff --git a/figures/demo_video.mp4 b/figures/demo_video.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..d4d34d31bd1a855188d341793a3144d7f97dd7d4 --- /dev/null +++ b/figures/demo_video.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09b4d925aa0a7c712feef50765355f0625d8f6d46ea302fd98db9609e9070047 +size 270100 diff --git a/figures/kimi-logo.png b/figures/kimi-logo.png new file mode 100644 index 0000000000000000000000000000000000000000..870b8be6e07cc2c46f7173e800fbaff8af0af5d1 Binary files /dev/null and b/figures/kimi-logo.png differ diff --git a/model-00001-of-000064.safetensors b/model-00001-of-000064.safetensors index 809b7aaebbd49d29c3f749c978a4d5d41af7e822..bac54212a87351ceebecba6a07fae05ee3fe94ef 100644 --- a/model-00001-of-000064.safetensors +++ b/model-00001-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:18daf53a15070b9c70d8bb63420dbd39764af9118af67982eeb60749f5453233 -size 995001888 +oid sha256:14c37abfedf2540344d9c088f8394358ac06c6409809e7ccea10e6a792d25124 +size 412845960 diff --git a/model-00002-of-000064.safetensors b/model-00002-of-000064.safetensors index 41e754fe95f220d24b3a155a77e0c0f2a60fd747..541fdf5dc595ec58ba45d4dc916312f436841a97 100644 --- a/model-00002-of-000064.safetensors +++ b/model-00002-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3b80fdcfe1617d02fd492347cd8d21f2722087720264f02a344d0d923d0914f1 -size 9280387888 +oid sha256:792a1c34d7039f97a22bbe331210186a1efe648b217394033cd788c09f547c25 +size 9215704248 diff --git a/model-00003-of-000064.safetensors b/model-00003-of-000064.safetensors index ca5d96630d018726ebf60c7df16fc22825d72fc6..cf036cd93ad5475ce3fcb7958aec6d729a685f1f 100644 --- a/model-00003-of-000064.safetensors +++ b/model-00003-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3d9e99bdea44c88c3fc99b74167f9072c41c84a33e1b42d9e3771aa390c97eeb -size 9280387888 +oid sha256:fda96a9661fdda6853f2bc5514978d21b8176e9ac423f6da6dde4e5ab39d3e17 +size 9215704248 diff --git a/model-00004-of-000064.safetensors b/model-00004-of-000064.safetensors index bb34458afc75517fcba8213bb0ede1f1173f1e7c..67440616d55021a3d8582e6fa0b26158c4041ede 100644 --- a/model-00004-of-000064.safetensors +++ b/model-00004-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:485d3d217728d45507a2fdfde8f9600f3bc06d418ed55e4f7443ac1c2304f256 -size 9280387888 +oid sha256:1d7fdbd62d7e6e80c01957217c233c397a6e7311646d9083a8fa715331c08587 +size 9215704248 diff --git a/model-00005-of-000064.safetensors b/model-00005-of-000064.safetensors index b45a7792c334955d60bcda5dca30f01987d5b05f..e87016a935e840a8ca762a3afa1654da43a538a2 100644 --- a/model-00005-of-000064.safetensors +++ b/model-00005-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8a91f6c484c960703a9a0f9366db10ae6fc28fb1c1de84f5c5fd910d1a984a6b -size 9280387888 +oid sha256:0faf42d63e96cc159d1aeea33b704f5e9377e56c87db6f102bb88d2ec923f1d7 +size 9215704248 diff --git a/model-00006-of-000064.safetensors b/model-00006-of-000064.safetensors index 363057d9c9d281163c4bb29f2de26356fe33e162..5816bf1f9fd63b2626262f53fc72b63eb9c80c2b 100644 --- a/model-00006-of-000064.safetensors +++ b/model-00006-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:03bf67c02fe04d31ce317041ba9c3ae2d2d6804c2da7180a0890335979ed92b3 -size 9280387888 +oid sha256:10c9da3f02f978a504c12d12b9bd5d9b67264bd9b8e55eed3a6f691fff589f7e +size 9215704248 diff --git a/model-00007-of-000064.safetensors b/model-00007-of-000064.safetensors index 2adc6cf8fde7fbd279a3a8106ace9f119edc3f8f..d2967c69fc041068a82ba608fe994badd528e8f4 100644 --- a/model-00007-of-000064.safetensors +++ b/model-00007-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d376e7fe9d75b924986501db22b2e402a85f33031f271bf0d49edd32b28093de -size 9280387888 +oid sha256:9a65675206a855250093b68c9fcf5423dac4847b874df67d9cce8e98305402f1 +size 9215704248 diff --git a/model-00008-of-000064.safetensors b/model-00008-of-000064.safetensors index 9ee973d8b3945d8c172ca277ff75eac2a70c3cf7..5510478ed460c6e6cc1813b2f75504faf2b9b682 100644 --- a/model-00008-of-000064.safetensors +++ b/model-00008-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ae5dd2a875a0ddc3a6de77f14adb608adb553213899a667f74594a08540307e3 -size 9280387888 +oid sha256:0045cc5f47eca2266d4e6610226a69bfd2175a3dfc4334250a0acd9899f4eb43 +size 9215704248 diff --git a/model-00009-of-000064.safetensors b/model-00009-of-000064.safetensors index 60dd81283e999bd6ef9425b5c226c8ba6d842cae..9965525f86af15f2697605258a8f9c720a650884 100644 --- a/model-00009-of-000064.safetensors +++ b/model-00009-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b2a1b0c0d06cbdc03715c65c05f64f9ecf1705d8c2ec7f4a42b064c513a00d72 -size 9280387888 +oid sha256:14115c4305225f6c65f74a8bec71efd26fb7326a8ea1058f675f8530596b4186 +size 9215704248 diff --git a/model-00010-of-000064.safetensors b/model-00010-of-000064.safetensors index 332050e96bb09d8845492e2a0fdadf1eaeabd4ad..298d2a6eaf83a8f4e0121a1ba5b0875625726f95 100644 --- a/model-00010-of-000064.safetensors +++ b/model-00010-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ff40390d24131d0d9c210bf03b5bb48d42d5105887ea8f20348573a313db9897 -size 9280387888 +oid sha256:f424523790a658bbb7a4ec62bcbd5310add58b755ece211bb0c0dd82a15e745d +size 9215704248 diff --git a/model-00011-of-000064.safetensors b/model-00011-of-000064.safetensors index fa48aa72ccb056a99d71d740da0a6b37bb7568b8..01772cd2ceb20e6bd43471c7d60d534e2eb45651 100644 --- a/model-00011-of-000064.safetensors +++ b/model-00011-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5d28d28bbb697f45dde07299aa881b35239704e5f91cdb77609f468934a3ef37 -size 9280390208 +oid sha256:15f8b1b9457361aa7f2a9e34db370f8f287c233b15fa5e428602e35bb8f5db2c +size 9215706568 diff --git a/model-00012-of-000064.safetensors b/model-00012-of-000064.safetensors index 43701f8f46f041f3837c5a817b5ee035da672fb3..f79a5ff94e16445b3cfc56a2505b1ef40893ca12 100644 --- a/model-00012-of-000064.safetensors +++ b/model-00012-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:efe3b96284fac1b594944e9801726d84f71d7247b6a195c77ec8d98f1e582376 -size 9280390208 +oid sha256:226a876a929af9c38ca586f7661045552d86aff02d8a43880f0ccc78c274c938 +size 9215706568 diff --git a/model-00013-of-000064.safetensors b/model-00013-of-000064.safetensors index 889a02ed2f767903ccada00042c41385c7703a52..edc2c2a1e771513c5d270e1fc7a3441f6f9ae65a 100644 --- a/model-00013-of-000064.safetensors +++ b/model-00013-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bb29f46107bc4221fc3a16d76f61e348b9f96f5054ac2a1736f7a5be65800601 -size 9280390208 +oid sha256:ccd7616d80bc16837103e64d7fd2d020393001e5e59a43f416822c14afdef65c +size 9215706568 diff --git a/model-00014-of-000064.safetensors b/model-00014-of-000064.safetensors index ff8233f4c01697ef35dc072d47aa120c6f417757..4005a5c9c2dd3c0bf5ff4d3221b9b36de4826cd5 100644 --- a/model-00014-of-000064.safetensors +++ b/model-00014-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b2408502c83b4498998cff1fe6c5c5e940fbdead1652d9dd81459df0c89eeea1 -size 9280390208 +oid sha256:2dba0172ab2774268198058cd689a66d39e21fbd149730d315f0aef891119672 +size 9215706568 diff --git a/model-00015-of-000064.safetensors b/model-00015-of-000064.safetensors index 6545328fe4fd3cd989dcaba4baa3ab44cdcf2836..deafe588b7fffaa8007a519119733b6e17ae91d8 100644 --- a/model-00015-of-000064.safetensors +++ b/model-00015-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:22d9760a5997e71fedb17919636d1637992b05f90bcf6ba9872352bc328f9e93 -size 9280390208 +oid sha256:93346f577c4a9d44016a2829372c98c3b7f2c3567423202ac0550f51dde116b7 +size 9215706568 diff --git a/model-00016-of-000064.safetensors b/model-00016-of-000064.safetensors index 4fd5a748ab73738e039c33e316dd447cddfd8ff5..e69565a8176d3891f235148a0c1f822efd8f1c3c 100644 --- a/model-00016-of-000064.safetensors +++ b/model-00016-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7f6065fc376cd1750db9ea6ec0e86f2b43d22605a59c8c19dabb465da9d6598b -size 9280390208 +oid sha256:1ff3a6fcf528980439f8b2c24b74d8ed5abe3790db678132f46d9043319030bb +size 9215706568 diff --git a/model-00017-of-000064.safetensors b/model-00017-of-000064.safetensors index 2775d684cd4226b8b627512b4a5dff7fedd41195..71ea09a301d76cf1a9a667946db91f6f2e9b6aa5 100644 --- a/model-00017-of-000064.safetensors +++ b/model-00017-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5adf83a3c83d6ae280603a4d17a83e331e927f667208dfa72872ff0878f158f0 -size 9280390208 +oid sha256:77d4caa81cc735aed439a0c294413ba33fe78e1f7a5fc0bd81ecebd6261de255 +size 9215706568 diff --git a/model-00018-of-000064.safetensors b/model-00018-of-000064.safetensors index f3094d9cda86df52b95fc4057385fee95e882b61..de6c11d17bc322beaf04e15cbf0f452eeb908bdb 100644 --- a/model-00018-of-000064.safetensors +++ b/model-00018-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9bbd89e36450acaddf6baaeddf38b7cc71155077fbe5159e0c190de62eb8fcbd -size 9280390208 +oid sha256:6a84f144d2b12e81f3f0bfb4f950b6dbcd5c987eea8f2fce4d775cff04450408 +size 9215706568 diff --git a/model-00019-of-000064.safetensors b/model-00019-of-000064.safetensors index 04a47a42a0f703429b049ff75e38016e8e10f08a..e18b035dcf7a5cdd035c55300c6f7ad773a8f8ef 100644 --- a/model-00019-of-000064.safetensors +++ b/model-00019-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5058f1819d4e323a44d86e45d761d6bf7991895880b2d84cfe073d5ec2dda008 -size 9280390208 +oid sha256:12095b71eccd3826b28ed63a45865697311291c5b4bed53b8ae8bd0756f1e803 +size 9215706568 diff --git a/model-00020-of-000064.safetensors b/model-00020-of-000064.safetensors index 273efa18b8ef263bcbd627daeb65a8b9d5a93147..18d62ae4fe6d00ad09c9c5b10de8b80f7d2d0757 100644 --- a/model-00020-of-000064.safetensors +++ b/model-00020-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1477c77ea661bf6ac315eb69f557e1c7a24fff9e889774897755f761fa5cc74e -size 9280390208 +oid sha256:2d07e2ccfa9feabd932b67ef4c8d6a8c8379c9787b499dccba1ad5a5e91191ca +size 9215706568 diff --git a/model-00021-of-000064.safetensors b/model-00021-of-000064.safetensors index 24c45a20db68e6ed8cda178a09b5fdcdf293228f..cf73895526a7d9af296747ad29faa74bca75057d 100644 --- a/model-00021-of-000064.safetensors +++ b/model-00021-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:812a9a13dd1ef9b772baeaad8427cbd5431840c07fbc07551fb635dc679d6e6c -size 9280390208 +oid sha256:9a12dbcc606047811678759674f081f7bf5560a4e6098e1167c81088c4044635 +size 9215706568 diff --git a/model-00022-of-000064.safetensors b/model-00022-of-000064.safetensors index 51855622ba8ad006b776454934dc0d8a71f950a1..48071d5371c4a66379366352f51c2ebc050a59bd 100644 --- a/model-00022-of-000064.safetensors +++ b/model-00022-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:74b3748f8a0e7350e243da84debc4da7e9e8bb00e6211f052be6c5c7cee6234f -size 9280390208 +oid sha256:51d6bf38338af0d117ce67e499b1b7eac35be599e1718484b71cb3a9feadfc5f +size 9215706568 diff --git a/model-00023-of-000064.safetensors b/model-00023-of-000064.safetensors index ad3e039dabe93a834f4d67b60e02769e200f482e..c68c14dc260b0da0617d54a767e85cf1c9701cdf 100644 --- a/model-00023-of-000064.safetensors +++ b/model-00023-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:51f865ab4390fb736c1c064f63bd35a6ad1ead04567ace27bccb178f6d8dde94 -size 9280390208 +oid sha256:39fb888657d4005b5abc8eb13d2edfea0d2c18dbcc3b56374b198e0894cbf94e +size 9215706568 diff --git a/model-00024-of-000064.safetensors b/model-00024-of-000064.safetensors index 2c1e391b328d3bf2c1f8b616c20ee5840495fe6b..c224bdff75f19b2d7d66e678c1021f94e8a8d9f6 100644 --- a/model-00024-of-000064.safetensors +++ b/model-00024-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:07af75e2f4a76d0be4484dcdf466507c0f2b23ab599294ea5e8fe3e30de8436c -size 9280390208 +oid sha256:d685dbe4e1676b1ce53baf95adc2e62870a74628526baa05281cea48306610fe +size 9215706568 diff --git a/model-00025-of-000064.safetensors b/model-00025-of-000064.safetensors index 94272ffb087961f9ab08a91ba9e36eaf2d899ce5..563dd6c10647dc03a1784e5d1a373bc577e9fdaa 100644 --- a/model-00025-of-000064.safetensors +++ b/model-00025-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7dee177ef397da45da7f81fd42aee2058d9e14ce00d0df7a5f19caef6230e419 -size 9280390208 +oid sha256:f147528633e61d1e97a0604df23688838d071bf86d05dbb5dcdbb995ac349a88 +size 9215706568 diff --git a/model-00026-of-000064.safetensors b/model-00026-of-000064.safetensors index 7e9f1cc3c4ce7945ab3ba8108c412e1217d9e9b8..11a5c6117ea61977471e6ce54ae81b5ef2fa4d02 100644 --- a/model-00026-of-000064.safetensors +++ b/model-00026-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6410f9b5ae0fa36c28dc70a4d86f9da0ba0628bf5bcc695afa6181279a478890 -size 9280390208 +oid sha256:3404d1c7e76a17486454749e46eca43aa8d6a78a7500507e62cef1db5539ebc5 +size 9215706568 diff --git a/model-00027-of-000064.safetensors b/model-00027-of-000064.safetensors index 0bc14c57d32ffb9686f4e1569cda1fede1a6df3c..2812ad587be0ebe6d839043926678bf3db65ab13 100644 --- a/model-00027-of-000064.safetensors +++ b/model-00027-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:10fbd4f42708d8e40217b3d805192237d258b13c14e8a10f7b303e20390a5656 -size 9280390208 +oid sha256:775e12da35274b8183a976c1e486ba9c183a9fb1a758e5474fc41d4d5de4ecce +size 9215706568 diff --git a/model-00028-of-000064.safetensors b/model-00028-of-000064.safetensors index b94f639a532a213f569fef24e77bfa8350e6952f..3d3a03ca61f5c7a769ce34c72e16a907c0d6d938 100644 --- a/model-00028-of-000064.safetensors +++ b/model-00028-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:406fa91d7b312c028687e061f76e5aefc5ce642a48f039838b11f0544acee727 -size 9280390208 +oid sha256:c42babe21f267986aae253cc57533ba8030b92c91d7bfa0cf5f8d57b28ec0403 +size 9215706568 diff --git a/model-00029-of-000064.safetensors b/model-00029-of-000064.safetensors index 9f7f2a28d374be6f681a57c0d26d4c4018023c95..05e75219338e5393b03a91c935040cc12a2c2b43 100644 --- a/model-00029-of-000064.safetensors +++ b/model-00029-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ef406584f8afbd1b6d28fbe5a96a852375c6e3506296193face05dd3cdd1e6e8 -size 9280390208 +oid sha256:55ca5f77b82588917ba10755953f438c6904da4796dfbda6ee2e75df99990b88 +size 9215706568 diff --git a/model-00030-of-000064.safetensors b/model-00030-of-000064.safetensors index 304f66d00ea20928cb301036cc4c3cd1143d78a2..c372a3b0c1fbfcb8bc0b2a4dd16ca4403f2871f5 100644 --- a/model-00030-of-000064.safetensors +++ b/model-00030-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7f593e541d77102b6948cf7ad8c1aa7894587d06ec8607922a1821dbf9e1ddb1 -size 9280390208 +oid sha256:7107c97607726f7b2be725e76cf8bc4499684f6ae99e1c6077f4752bf6b9608d +size 9215706568 diff --git a/model-00031-of-000064.safetensors b/model-00031-of-000064.safetensors index 2b5608eb3f16cb451a5859286bae6370ba33f0b2..5f8172936d2d440f1b17bc19037af089ee542beb 100644 --- a/model-00031-of-000064.safetensors +++ b/model-00031-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a0a898fc052c039ee66416c3a03a86ad4a242b6cd10601386be13c6524bce1d4 -size 9280390208 +oid sha256:f2cb6862d16b9f9989528a0521ef008ed2abb4d2ff519d126bff74c2086d6596 +size 9215706568 diff --git a/model-00032-of-000064.safetensors b/model-00032-of-000064.safetensors index 76b1c686c37f6deda15a5f809192cd2d9a1508bb..e4bdb3e7cba94d7d57ccb720ca77f72372fc3360 100644 --- a/model-00032-of-000064.safetensors +++ b/model-00032-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7f5078fd742de320f492d9c9f7c380844a33660b8f8f0205eeaf374957e7ade3 -size 9280390208 +oid sha256:c7e9071b8f60c1ef742cb61b3cfe6bfc4d753c6a1ea3dc69287a556e049fb968 +size 9215706568 diff --git a/model-00033-of-000064.safetensors b/model-00033-of-000064.safetensors index 2f455dc40ee51f420d94319ceb4f5b4c0bdef4d6..3aa96a22492929b6a38256a37109392326ca9567 100644 --- a/model-00033-of-000064.safetensors +++ b/model-00033-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:099e832cfb44be1400e3cb6c6da7337e71ded14662993782215c23d42b80dcb7 -size 9280390208 +oid sha256:e94bafff4fd281a993de205b542ce10110e93850e6c65c61d03be6c364a222a2 +size 9215706568 diff --git a/model-00034-of-000064.safetensors b/model-00034-of-000064.safetensors index 81ba67ac61110c8ddf4cf10310202d3ea5a69910..ab8a6878b576bf8575d4fe1c54e59e92aa269fba 100644 --- a/model-00034-of-000064.safetensors +++ b/model-00034-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0c418ab2f416a664dc4624e74e498bd8bf131ec95a106f3c514699daba6c74dc -size 9280390208 +oid sha256:9d17e014b26c65a9154a45957111a08fed38000084b8930a9a28c6b0a7ff0c83 +size 9215706568 diff --git a/model-00035-of-000064.safetensors b/model-00035-of-000064.safetensors index 87bf01097401a17d6c1622eb12783d3b5e1b1a09..b6285920e3000eb45d022fef4e361ad65171b212 100644 --- a/model-00035-of-000064.safetensors +++ b/model-00035-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cc2e3664fa3e816abf3d38743e436aac089bbd22d73b5c5d26837c4d0c112aa0 -size 9280390208 +oid sha256:229857de9029e67ee27dbe60e2f248ec46c105fa08bf76f80e60de6d6411c8f5 +size 9215706568 diff --git a/model-00036-of-000064.safetensors b/model-00036-of-000064.safetensors index 2637af3065c9f86f665e9abe07c3f75be22f647d..c6ac4e07333dcbefcfa68f04e230b6aef90a13bd 100644 --- a/model-00036-of-000064.safetensors +++ b/model-00036-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b19c8b026f97ff1d10d6cf71a4de178537f5e71dde682ab3beb3cd12a7334d7e -size 9280390208 +oid sha256:95b0a3e5ae595ea2550c99042795130493e3b7d5421cb2ecf6af97c7a55c4f4e +size 9215706568 diff --git a/model-00037-of-000064.safetensors b/model-00037-of-000064.safetensors index 7ebe793c6249e9653131eac31f36149330ef6243..5bb2689b706cd4ee99deadb84f1d366cee510b85 100644 --- a/model-00037-of-000064.safetensors +++ b/model-00037-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:775702ce0a1e5f015f2089ec556f7bf962e1a147eb99b4cd71c564a98bc4b6d4 -size 9280390208 +oid sha256:f2611ef97858d1d6b09cc6e429839e727d3d15d967abe3da8bfad655079fedb5 +size 9215706568 diff --git a/model-00038-of-000064.safetensors b/model-00038-of-000064.safetensors index 025b464499372d9723ac5cca03a102c3ca43ec6b..617e20f8f9c3edf2906630320f1cf056a3597471 100644 --- a/model-00038-of-000064.safetensors +++ b/model-00038-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3ea29d9ccd21191a1a8c13d9139b498334099ad342ab0703c692db989e54dd2b -size 9280390208 +oid sha256:3bf1afe774559fa6718b2a4f1c485f3460ac74ca63ead287c473c5f7053e0ccc +size 9215706568 diff --git a/model-00039-of-000064.safetensors b/model-00039-of-000064.safetensors index 78173f93243158de82cf1396931f9360d23bfce2..4c304917ed7d02843a5da5bea1469d61d9e0418f 100644 --- a/model-00039-of-000064.safetensors +++ b/model-00039-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dd091f20926b11a58428c6cc9c67bb4b842a2182eb5d8173af087eac86d99aad -size 9280390208 +oid sha256:90c1709b2436b892bf319128a3880f9ad660381c0254136928698e94d4cc7454 +size 9215706568 diff --git a/model-00040-of-000064.safetensors b/model-00040-of-000064.safetensors index e94b4cc75093b31c2cf3fb83b1663f7ad0d76d7c..d97a26eaa2f7e2668c2768dc24d171be26b9a270 100644 --- a/model-00040-of-000064.safetensors +++ b/model-00040-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:25b11a04fbdedecd19ec047e597ac82be1172807f93f03f137c23bc0eac7a76f -size 9280390208 +oid sha256:a9ac158f4752b092b37ce4cf51d7a11640caa24faa72e4e96028b6da1400d88e +size 9215706568 diff --git a/model-00041-of-000064.safetensors b/model-00041-of-000064.safetensors index c1784cec491b51ab9a6ba0f8c01b005c81c4e594..99fd4f90b33d648488d426528c0bc46949851ed1 100644 --- a/model-00041-of-000064.safetensors +++ b/model-00041-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5ae5719fe28bac0dd17677e67861bd632930877ce0d8a0b15b11a624b0aba69f -size 9280390208 +oid sha256:7dfcac8e04b9e71fdbc875dbd8b7d633df49b1ba8fc096adf040fce451a51970 +size 9215706568 diff --git a/model-00042-of-000064.safetensors b/model-00042-of-000064.safetensors index f99b93e179bef67e23b43486a697ba1a3c9df1e9..38a6fe1cd06a1db4173d087f93124688d4c956b6 100644 --- a/model-00042-of-000064.safetensors +++ b/model-00042-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1f7bf4f3aea3bf79d9c1744b16134974db972a4dd7e0afe2b48ee48aedeb4e4a -size 9280390208 +oid sha256:d688b5dd61818c24d75c8f28ba28516ec7247eb850fad0db107d4dc9153e3a7d +size 9215706568 diff --git a/model-00043-of-000064.safetensors b/model-00043-of-000064.safetensors index 975669d32690e922b336ff64ca882a71c9d4dca7..f59b00185d9e8c26012b1368e250d800dd5a6ab6 100644 --- a/model-00043-of-000064.safetensors +++ b/model-00043-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:de4d7f327b6926f1b6640023ed631ad5484ec76bf58b3d368f6a73e2b9cbb1e9 -size 9280390208 +oid sha256:c0b768d9a9cef432800fbb488542c3e27dfce7795ebe6a0eb058dfc2d0919525 +size 9215706568 diff --git a/model-00044-of-000064.safetensors b/model-00044-of-000064.safetensors index 29ad187c705278f432301f9855cc73599af37d99..eda28a439567ee9043b35b45f24de29f293da832 100644 --- a/model-00044-of-000064.safetensors +++ b/model-00044-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3b8137f44508adaf77d60413549baab93218515b3bc7ba4036cd92fee45a89c7 -size 9280390208 +oid sha256:2181b8e7841f13727fe71d23a38ecd790be3a838592a96293de5e784bb6225e9 +size 9215706568 diff --git a/model-00045-of-000064.safetensors b/model-00045-of-000064.safetensors index aadafa411162fa07900cd141e65455fc98263c5a..ffda8802991a9a5052f0f736ea05c0d61f2ee5f9 100644 --- a/model-00045-of-000064.safetensors +++ b/model-00045-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:74ce59ce421a965633243644492e5b0d3ba34abd22a7d675cf8994c9e75bca3c -size 9280390208 +oid sha256:f20284ff0ca9f7d951629732e32b0bfa14352bd3ff3011348d41b81a8570c56b +size 9215706568 diff --git a/model-00046-of-000064.safetensors b/model-00046-of-000064.safetensors index 7ffc92414e2603faa7196a086649e7a8ecf6d483..a710cd369e2e74469ffd5cc63d82f15476877fa1 100644 --- a/model-00046-of-000064.safetensors +++ b/model-00046-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6a9005a806c66dd8486432b5165d8f866b4de8f1fd95eb6f9743a36120739ec0 -size 9280390208 +oid sha256:9b63f22e2b7e4760b536ede2d0612e62d931dacf51ab0d7fcce2d0cbd97532fe +size 9215706568 diff --git a/model-00047-of-000064.safetensors b/model-00047-of-000064.safetensors index 5ca9ad509a447ede4b5cf0b6f22852545877448a..51621e02d6af3144dffce55a79abbcb02371b031 100644 --- a/model-00047-of-000064.safetensors +++ b/model-00047-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:21e369a8250297f133232f69ad92e8fa87563a5e455eeffe2f8dc6fe4a1698da -size 9280390208 +oid sha256:6d936118ee9e2dcb4fe529e1f65985b00084cee397202964bd7002b3fd5720ce +size 9215706568 diff --git a/model-00048-of-000064.safetensors b/model-00048-of-000064.safetensors index 6f7c886bec3ac81fb729ec5f28a6d29d0ecfa01b..5b939f3587420c0c48f66bf24b2658ad5ead45d0 100644 --- a/model-00048-of-000064.safetensors +++ b/model-00048-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ce10420490e9b227e717c569019ba6ff3cab5db0968d3232ae75769132de63ab -size 9280390208 +oid sha256:3da2fcfb13008fd7ef30deb8be7529f5123462350c0598972ea44edf8d0844f1 +size 9215706568 diff --git a/model-00049-of-000064.safetensors b/model-00049-of-000064.safetensors index 79809bfc2b502906671f254584db4f7dc276cd6f..47588bd568d5b74f80a56af466ff4e3e3b7f1d8b 100644 --- a/model-00049-of-000064.safetensors +++ b/model-00049-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:263468bb2a1bad24db749743b0c780af04e28e60839466fd42f918c3e591b31a -size 9280390208 +oid sha256:eef5ad639c43afc0042fe14a29390866afc2b94867c17f2e71f97dd21f9ddec2 +size 9215706568 diff --git a/model-00050-of-000064.safetensors b/model-00050-of-000064.safetensors index b76046fdb699526797dfb527b75a87f8f65dcbe3..3ad90bbbdafd6fa04d40a4b16e211521bbea7e6b 100644 --- a/model-00050-of-000064.safetensors +++ b/model-00050-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f00c87dd92064d77e8d01ee7f3f07d9925139cab9f2a65c67c25cbfba290d249 -size 9280390208 +oid sha256:fdf075282c27626a74d2c4bb64cc9c9275083ebc833b4a642945a0ef1a3dcba6 +size 9215706568 diff --git a/model-00051-of-000064.safetensors b/model-00051-of-000064.safetensors index 4d4ca4cc706b0a60e5deaafce41cf0f2d96b3689..3927bc54d415d8cc7ec478a2fd3eb61dfe3de092 100644 --- a/model-00051-of-000064.safetensors +++ b/model-00051-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a7df3d5013b4dcbc27227ef13b8daa3c8f6a37497424abe01b8d776c7420752c -size 9280390208 +oid sha256:52a757597d25bee9fe2ff2d5a814e9df37ce5fa5ec38bf7a2f731177051bafcc +size 9215706568 diff --git a/model-00052-of-000064.safetensors b/model-00052-of-000064.safetensors index 790ba3ac6139dbe99504e044122d87db3fa653bb..a140e38bd67319abec52c20d1270e61abce7bac4 100644 --- a/model-00052-of-000064.safetensors +++ b/model-00052-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dae364766ec7c2711beb5d2a9adc210fe70ff4b30c1309929a5d03bb1e6269e5 -size 9280390208 +oid sha256:384f01aebca5480294ef47805396b1e34d4c94760188f4326a5e1d42ab09fd3e +size 9215706568 diff --git a/model-00053-of-000064.safetensors b/model-00053-of-000064.safetensors index 658ab7ce6c68702effe34d38480746a27f53300a..b9fe3efedf1b1d29c78aab8766813e93a07d1214 100644 --- a/model-00053-of-000064.safetensors +++ b/model-00053-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2189b07d91e9c169244f642a7caa037ec7f318c427d5a1d1b12f131abd07c7d3 -size 9280390208 +oid sha256:532e00fd20f6646e0c59f6c7628f00c360879d660233ecf50f5104622bd9327a +size 9215706568 diff --git a/model-00054-of-000064.safetensors b/model-00054-of-000064.safetensors index aaf5ace444109cdabd6e6a88f6d761c06cd3fc4a..1bbc7e82b346c842368c81b26d7e224cf71547ba 100644 --- a/model-00054-of-000064.safetensors +++ b/model-00054-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cd9d131de0ab8f7aca1581326eb46c7ee8e09575ad727de92d4801e9df92780b -size 9280390208 +oid sha256:2352c2b188bfd7adb36991382dc92a31f2491e61002da7f38cc2b5a2da29491a +size 9215706568 diff --git a/model-00055-of-000064.safetensors b/model-00055-of-000064.safetensors index 770675380a067f8ec8f650caf5411837bc39d7d6..e74786e319c766e270405ed27bccd41cdeed4480 100644 --- a/model-00055-of-000064.safetensors +++ b/model-00055-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:372914b9b228e70cca19792b79f1b5e09507e3632109d4c8a91bf97309c172bc -size 9280390208 +oid sha256:3d509e03f08f0c2e7ec520e2d074111061950442fffdf54dfc0658efd3a768fc +size 9215706568 diff --git a/model-00056-of-000064.safetensors b/model-00056-of-000064.safetensors index d26cb1676ba79afab8ecb7be85459790fa537411..8f238871c197cf28b29a979d7be050f373518cf0 100644 --- a/model-00056-of-000064.safetensors +++ b/model-00056-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2426daa83907054c6e8fa57a4f1ac3f7f070af656ff486827188125d94962914 -size 9280390208 +oid sha256:75c2483b319be110ea766c9d9b9085798b6e93b02239e247459557adfa7ab824 +size 9215706568 diff --git a/model-00057-of-000064.safetensors b/model-00057-of-000064.safetensors index 28783e8c978a93cab37eacecce092d27f68e9681..659c3e50246b13575ec68861cdd56d69a9277a9e 100644 --- a/model-00057-of-000064.safetensors +++ b/model-00057-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c32541beb824822145b08d1e6394747295a47dcdcd70e10ad8b0f48fb45d22f5 -size 9280390208 +oid sha256:23cdd5235bbb93a8056f7305ce9888b66eb3a82e257d8b56400266967cb4516d +size 9215706568 diff --git a/model-00058-of-000064.safetensors b/model-00058-of-000064.safetensors index 6f26cab434e090cb4f48fd69718104086704878b..76f8ed3277e1ebf9760b7032eea177ee7ed4f98d 100644 --- a/model-00058-of-000064.safetensors +++ b/model-00058-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8834ac4f4f3d35c697a97f24eac362e9203e74398429c2ea8e988f613c7ce105 -size 9280390208 +oid sha256:b2ee0b6efa900eaf31b9a5f0b133d670ce94b029c5c038f681afeb25c110ad9e +size 9215706568 diff --git a/model-00059-of-000064.safetensors b/model-00059-of-000064.safetensors index 145762314ce8194fc4fc3b70c778f4c4dd7e2ad2..84fadcb351c53397a65d36d32284fbe018d48ba7 100644 --- a/model-00059-of-000064.safetensors +++ b/model-00059-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8bc9fb43371b79ca51cd0aae8894856349fb80c9b422e1a02cd51b3e87ce0a87 -size 9280390208 +oid sha256:32adf79e13e52d482f36a07711da09564f921d3169f373fa97dcdab099305f64 +size 9215706568 diff --git a/model-00060-of-000064.safetensors b/model-00060-of-000064.safetensors index aa83cd8ee709d2e92f2d9dfcdd894814f2101c7f..05cb1478dea3e216d94bc533b778a34c369819f7 100644 --- a/model-00060-of-000064.safetensors +++ b/model-00060-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:72b3f8f5b302c5003c00139805511cec3be46418d3c7cbc829010398b89b1ed1 -size 9280390208 +oid sha256:292f35fa04745a0cc8979338a2a05a9d2e03b5b646178ce422a1657724901623 +size 9215706568 diff --git a/model-00061-of-000064.safetensors b/model-00061-of-000064.safetensors index c26b57ca431097d992a26201992ae2a0c358be93..a726fe399a1b1f81cd0db219a3a319be8290d31f 100644 --- a/model-00061-of-000064.safetensors +++ b/model-00061-of-000064.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:61c02fb371a0f94afde03ddd7f7ed75b1758af8e70abf787109b05575c01426e -size 9280390208 +oid sha256:2e8d972d3b854ed7140eb16755aefb0b587c3d3f684fb351b781ec2f6a87ee40 +size 9215706568 diff --git a/model.safetensors.index.json b/model.safetensors.index.json index c816ad6a185e4601dae580ea9b5c5aba119f30db..983f20428a4bd37ef9236d7a583bb3927837160a 100644 --- a/model.safetensors.index.json +++ b/model.safetensors.index.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cd4c5d95393b92b656b466ccc503e4f4ba8d921575e54b04807eae71a2b03634 -size 15867898 +oid sha256:473e178a75dc4d1df5d150d48428bcc3aece58e3e5bf135046bb7b57e7a85f13 +size 15889844 diff --git a/quark_profile.yaml b/quark_profile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ce755c0d8342addc1d2a8a26210ef116c5bcc1c3 --- /dev/null +++ b/quark_profile.yaml @@ -0,0 +1,78 @@ +# Quark Profiling Results + +memory_usage: +- step: "Start" + timestamp: 1775015372.9551535 + relative_time_secs: 0.0 + cpu_memory_mb: 3967.14 + gpu_memory_mb: 774.32 + disk_read_mb: 0.0 + disk_write_mb: 0.0 +- step: "File-to-File Quantization Start" + timestamp: 1775015373.1001534 + relative_time_secs: 0.14499998092651367 + cpu_memory_mb: 3967.14 + gpu_memory_mb: 774.32 + disk_read_mb: 0.0 + disk_write_mb: 0.0 +- step: "File-to-File Quantization Start" + timestamp: 1775015373.207735 + relative_time_secs: 0.2525815963745117 + cpu_memory_mb: 3967.14 + gpu_memory_mb: 774.32 + disk_read_mb: 0.0 + disk_write_mb: 0.0 +- step: "File-to-File Quantization End" + timestamp: 1775015373.3390446 + relative_time_secs: 0.38389110565185547 + cpu_memory_mb: 3967.14 + gpu_memory_mb: 774.32 + disk_read_mb: 0.0 + disk_write_mb: 0.0 +- step: "File-to-File Quantization End" + timestamp: 1775015373.47948 + relative_time_secs: 0.5243265628814697 + cpu_memory_mb: 3967.14 + gpu_memory_mb: 774.32 + disk_read_mb: 0.0 + disk_write_mb: 0.0 +- step: "End" + timestamp: 1775015373.6052163 + relative_time_secs: 0.6500627994537354 + cpu_memory_mb: 3967.14 + gpu_memory_mb: 774.32 + disk_read_mb: 0.0 + disk_write_mb: 0.0 + +# Summary Metrics +total_quantization_time_seconds: 0.6501 +peak_memory_mb: 3967.14 +peak_gpu_memory_mb: 774.32 +total_disk_read_mb: 0.0 +total_disk_write_mb: 0.0 + +# Metric Definitions: +# +# Checkpoint Metrics (per record): +# - step: Name of the profiling checkpoint. Common steps include: +# - "Start": Initial state when profiling begins +# - "Model Loaded": After loading the ONNX model into memory +# - "Pre-process Start/End": Before and after model preprocessing +# - "Calibration Start/End": Before and after calibration data collection +# - "Quantization (MatMulNBits) Start/End": MatMulNBits quantization phase +# - "Quantization (Static) Start/End": Static quantization phase +# - "Post-process Start/End": Before and after post-processing +# - "Fast Finetune Start/End": Before and after fast finetuning (if enabled) +# - timestamp: Unix timestamp (seconds since epoch) when this measurement was taken. Useful for correlating with external logs or events. +# - relative_time_secs: Time elapsed (in seconds) since the "Start" step. Useful for understanding the duration of each phase relative to the beginning of profiling. +# - cpu_memory_mb: Current Resident Set Size (RSS) in megabytes at this step. This includes memory from the main process and all child processes. RSS represents the portion of memory held in RAM (not swapped out). +# - gpu_memory_mb: Current GPU memory usage in megabytes. This represents actual GPU memory used by the process, including allocations from PyTorch, ONNX Runtime, TensorRT, and other frameworks. Only available when PyTorch with CUDA/ROCm is installed and GPU is available. +# - disk_read_mb: Cumulative disk bytes read (in megabytes) since the start of profiling. Measured relative to the baseline captured at the 'Start' checkpoint, including I/O from the main process and all child processes. Only available when psutil is installed and the OS exposes per-process I/O counters (Linux /proc//io, Windows; not available on macOS without root). +# - disk_write_mb: Cumulative disk bytes written (in megabytes) since the start of profiling. Measured relative to the baseline captured at the 'Start' checkpoint, including I/O from the main process and all child processes. Only available when psutil is installed and the OS exposes per-process I/O counters (Linux /proc//io, Windows; not available on macOS without root). +# +# Summary Metrics (overall): +# - total_quantization_time_seconds: Total elapsed time (in seconds) from the start of profiling to the end of the quantization process. +# - peak_memory_mb: Peak resident set size (RSS) in megabytes for the main process during the entire profiling session. On Linux, this is read from VmHWM (high water mark) in /proc//status. On Windows, this is the peak working set size. This metric may not be available on all platforms. +# - peak_gpu_memory_mb: Peak GPU memory usage in megabytes during the entire profiling session. This is the maximum GPU memory used, including allocations from PyTorch, ONNX Runtime, TensorRT, and other frameworks. Only available when PyTorch with CUDA/ROCm is installed and GPU is available. +# - total_disk_read_mb: Total disk bytes read (in megabytes) during the entire profiling session. Computed as the difference between the final and baseline cumulative read counters, including I/O from the main process and all child processes. Only available when psutil is installed and the OS exposes per-process I/O counters (Linux /proc//io, Windows; not available on macOS without root). +# - total_disk_write_mb: Total disk bytes written (in megabytes) during the entire profiling session. Computed as the difference between the final and baseline cumulative write counters, including I/O from the main process and all child processes. Only available when psutil is installed and the OS exposes per-process I/O counters (Linux /proc//io, Windows; not available on macOS without root). diff --git a/tokenization_kimi.py b/tokenization_kimi.py index 0b5a39706cc48c5e7209431de17aa2670f1862e9..7868ea7598734c24cbbbbf904799c76b29af9803 100644 --- a/tokenization_kimi.py +++ b/tokenization_kimi.py @@ -9,11 +9,7 @@ import tiktoken from tiktoken.load import load_tiktoken_bpe from tokenizers import AddedToken -try: - from transformers.models.gpt2.tokenization_gpt2 import bytes_to_unicode -except: - from transformers.convert_slow_tokenizer import bytes_to_unicode - +from transformers.convert_slow_tokenizer import bytes_to_unicode from transformers.tokenization_utils import PreTrainedTokenizer from .tool_declaration_ts import encode_tools_to_typescript_style