danielhanchen commited on
Commit
3ce346d
·
verified ·
1 Parent(s): 2d4d367

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +21 -3
  2. config.json +44 -31
  3. processor_config.json +38 -0
  4. tokenizer_config.json +19 -12
README.md CHANGED
@@ -1,13 +1,31 @@
1
  ---
 
 
2
  tags:
3
  - compressed-tensors
 
4
  license: other
5
  license_name: modified-mit
6
  library_name: transformers
7
  pipeline_tag: image-text-to-text
8
- base_model:
9
- - moonshotai/Kimi-K2.7-Code
10
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  <div align="center">
12
  <picture>
13
  <img src="figures/kimi-logo.png" width="30%" alt="Kimi K2.7 Code">
@@ -334,4 +352,4 @@ See [THIRD PARTY NOTICES](THIRD_PARTY_NOTICES.md)
334
 
335
  ## 9. Contact Us
336
 
337
- If you have any questions, please reach out at [support@moonshot.ai](mailto:support@moonshot.ai).
 
1
  ---
2
+ base_model:
3
+ - moonshotai/Kimi-K2.7-Code
4
  tags:
5
  - compressed-tensors
6
+ - unsloth
7
  license: other
8
  license_name: modified-mit
9
  library_name: transformers
10
  pipeline_tag: image-text-to-text
 
 
11
  ---
12
+ <div>
13
+ <p style="margin-top: 0;margin-bottom: 0;">
14
+ <em><a href="https://docs.unsloth.ai/basics/unsloth-dynamic-v2.0-gguf">Unsloth Dynamic 2.0</a> achieves superior accuracy & outperforms other leading quants.</em>
15
+ </p>
16
+ <div style="display: flex; gap: 5px; align-items: center; ">
17
+ <a href="https://github.com/unslothai/unsloth/">
18
+ <img src="https://github.com/unslothai/unsloth/raw/main/images/unsloth%20new%20logo.png" width="133">
19
+ </a>
20
+ <a href="https://discord.gg/unsloth">
21
+ <img src="https://github.com/unslothai/unsloth/raw/main/images/Discord%20button.png" width="173">
22
+ </a>
23
+ <a href="https://docs.unsloth.ai/">
24
+ <img src="https://raw.githubusercontent.com/unslothai/unsloth/refs/heads/main/images/documentation%20green%20button.png" width="143">
25
+ </a>
26
+ </div>
27
+ </div>
28
+
29
  <div align="center">
30
  <picture>
31
  <img src="figures/kimi-logo.png" width="30%" alt="Kimi K2.7 Code">
 
352
 
353
  ## 9. Contact Us
354
 
355
+ If you have any questions, please reach out at [support@moonshot.ai](mailto:support@moonshot.ai).
config.json CHANGED
@@ -8,12 +8,47 @@
8
  "AutoModelForCausalLM": "modeling_kimi_k25.KimiK25ForConditionalGeneration"
9
  },
10
  "bos_token_id": 163584,
11
- "dtype": "bfloat16",
12
  "eos_token_id": 163586,
13
  "ignore_index": -100,
14
  "media_placeholder_token_id": 163605,
15
  "model_type": "kimi_k25",
16
  "pad_token_id": 163839,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  "text_config": {
18
  "_name_or_path": "",
19
  "add_cross_attention": false,
@@ -28,24 +63,15 @@
28
  "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
29
  },
30
  "aux_loss_alpha": 0.001,
31
- "bad_words_ids": null,
32
- "begin_suppress_tokens": null,
33
  "bos_token_id": 163584,
34
  "chunk_size_feed_forward": 0,
35
  "cross_attention_hidden_size": null,
36
  "decoder_start_token_id": null,
37
- "diversity_penalty": 0.0,
38
- "do_sample": false,
39
- "dtype": "bfloat16",
40
- "early_stopping": false,
41
- "encoder_no_repeat_ngram_size": 0,
42
  "eos_token_id": 163586,
43
  "ep_size": 1,
44
- "exponential_decay_length_penalty": null,
45
  "finetuning_task": null,
46
  "first_k_dense_replace": 1,
47
- "forced_bos_token_id": null,
48
- "forced_eos_token_id": null,
49
  "hidden_act": "silu",
50
  "hidden_size": 7168,
51
  "id2label": {
@@ -61,29 +87,21 @@
61
  "LABEL_0": 0,
62
  "LABEL_1": 1
63
  },
64
- "length_penalty": 1.0,
65
- "max_length": 20,
66
  "max_position_embeddings": 262144,
67
- "min_length": 0,
68
- "model_type": "kimi_k2",
69
  "moe_intermediate_size": 2048,
70
  "moe_layer_freq": 1,
71
  "n_group": 1,
72
  "n_routed_experts": 384,
73
  "n_shared_experts": 1,
74
- "no_repeat_ngram_size": 0,
75
  "norm_topk_prob": true,
76
  "num_attention_heads": 64,
77
- "num_beam_groups": 1,
78
- "num_beams": 1,
79
  "num_experts_per_tok": 8,
80
  "num_hidden_layers": 61,
81
  "num_key_value_heads": 64,
82
  "num_nextn_predict_layers": 0,
83
- "num_return_sequences": 1,
84
  "output_attentions": false,
85
  "output_hidden_states": false,
86
- "output_scores": false,
87
  "pad_token_id": 163839,
88
  "prefix": null,
89
  "pretraining_tp": 1,
@@ -127,18 +145,17 @@
127
  "quant_method": "compressed-tensors",
128
  "quantization_status": "compressed"
129
  },
130
- "remove_invalid_values": false,
131
- "repetition_penalty": 1.0,
132
  "return_dict": true,
133
- "return_dict_in_generate": false,
134
  "rms_norm_eps": 1e-05,
135
- "rope_scaling": {
136
  "beta_fast": 32.0,
137
  "beta_slow": 1.0,
138
  "factor": 64.0,
139
  "mscale": 1.0,
140
  "mscale_all_dim": 1.0,
141
  "original_max_position_embeddings": 4096,
 
 
142
  "type": "yarn"
143
  },
144
  "rope_theta": 50000.0,
@@ -146,30 +163,25 @@
146
  "scoring_func": "sigmoid",
147
  "sep_token_id": null,
148
  "seq_aux": true,
149
- "suppress_tokens": null,
150
  "task_specific_params": null,
151
- "temperature": 1.0,
152
  "tf_legacy_loss": false,
153
  "tie_encoder_decoder": false,
154
  "tie_word_embeddings": false,
155
  "tokenizer_class": null,
156
- "top_k": 50,
157
- "top_p": 1.0,
158
  "topk_group": 1,
159
  "topk_method": "noaux_tc",
160
  "torchscript": false,
161
- "transformers_version": "4.56.2",
162
- "typical_p": 1.0,
163
  "use_bfloat16": false,
164
  "use_cache": true,
165
  "v_head_dim": 128,
166
  "vocab_size": 163840
167
  },
168
  "tie_word_embeddings": false,
 
 
169
  "use_unified_vision_chunk": true,
170
  "video_placeholder": "<|kimi_k25_video_placeholder|>",
171
  "vision_config": {
172
- "_attn_implementation": "flash_attention_2",
173
  "init_pos_emb_height": 64,
174
  "init_pos_emb_time": 4,
175
  "init_pos_emb_width": 64,
@@ -180,6 +192,7 @@
180
  "merge_type": "sd2_tpool",
181
  "mm_hidden_size": 1152,
182
  "mm_projector_type": "patchmerger",
 
183
  "patch_size": 14,
184
  "pos_emb_type": "divided_fixed",
185
  "projector_hidden_act": "gelu",
 
8
  "AutoModelForCausalLM": "modeling_kimi_k25.KimiK25ForConditionalGeneration"
9
  },
10
  "bos_token_id": 163584,
11
+ "torch_dtype": "bfloat16",
12
  "eos_token_id": 163586,
13
  "ignore_index": -100,
14
  "media_placeholder_token_id": 163605,
15
  "model_type": "kimi_k25",
16
  "pad_token_id": 163839,
17
+ "quantization_config": {
18
+ "config_groups": {
19
+ "group_0": {
20
+ "input_activations": null,
21
+ "output_activations": null,
22
+ "targets": [
23
+ "Linear"
24
+ ],
25
+ "weights": {
26
+ "actorder": null,
27
+ "block_structure": null,
28
+ "dynamic": false,
29
+ "group_size": 32,
30
+ "num_bits": 4,
31
+ "observer": "minmax",
32
+ "observer_kwargs": {},
33
+ "strategy": "group",
34
+ "symmetric": true,
35
+ "type": "int"
36
+ }
37
+ }
38
+ },
39
+ "format": "pack-quantized",
40
+ "ignore": [
41
+ "re:.*self_attn.*",
42
+ "re:.*shared_experts.*",
43
+ "re:.*mlp\\.(gate|up|gate_up|down)_proj.*",
44
+ "re:.*lm_head.*",
45
+ "re:.*vision_tower.*",
46
+ "re:.*mm_projector.*"
47
+ ],
48
+ "kv_cache_scheme": null,
49
+ "quant_method": "compressed-tensors",
50
+ "quantization_status": "compressed"
51
+ },
52
  "text_config": {
53
  "_name_or_path": "",
54
  "add_cross_attention": false,
 
63
  "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
64
  },
65
  "aux_loss_alpha": 0.001,
 
 
66
  "bos_token_id": 163584,
67
  "chunk_size_feed_forward": 0,
68
  "cross_attention_hidden_size": null,
69
  "decoder_start_token_id": null,
70
+ "torch_dtype": "bfloat16",
 
 
 
 
71
  "eos_token_id": 163586,
72
  "ep_size": 1,
 
73
  "finetuning_task": null,
74
  "first_k_dense_replace": 1,
 
 
75
  "hidden_act": "silu",
76
  "hidden_size": 7168,
77
  "id2label": {
 
87
  "LABEL_0": 0,
88
  "LABEL_1": 1
89
  },
 
 
90
  "max_position_embeddings": 262144,
91
+ "model_type": "deepseek_v3",
 
92
  "moe_intermediate_size": 2048,
93
  "moe_layer_freq": 1,
94
  "n_group": 1,
95
  "n_routed_experts": 384,
96
  "n_shared_experts": 1,
 
97
  "norm_topk_prob": true,
98
  "num_attention_heads": 64,
 
 
99
  "num_experts_per_tok": 8,
100
  "num_hidden_layers": 61,
101
  "num_key_value_heads": 64,
102
  "num_nextn_predict_layers": 0,
 
103
  "output_attentions": false,
104
  "output_hidden_states": false,
 
105
  "pad_token_id": 163839,
106
  "prefix": null,
107
  "pretraining_tp": 1,
 
145
  "quant_method": "compressed-tensors",
146
  "quantization_status": "compressed"
147
  },
 
 
148
  "return_dict": true,
 
149
  "rms_norm_eps": 1e-05,
150
+ "rope_parameters": {
151
  "beta_fast": 32.0,
152
  "beta_slow": 1.0,
153
  "factor": 64.0,
154
  "mscale": 1.0,
155
  "mscale_all_dim": 1.0,
156
  "original_max_position_embeddings": 4096,
157
+ "rope_theta": 50000.0,
158
+ "rope_type": "yarn",
159
  "type": "yarn"
160
  },
161
  "rope_theta": 50000.0,
 
163
  "scoring_func": "sigmoid",
164
  "sep_token_id": null,
165
  "seq_aux": true,
 
166
  "task_specific_params": null,
 
167
  "tf_legacy_loss": false,
168
  "tie_encoder_decoder": false,
169
  "tie_word_embeddings": false,
170
  "tokenizer_class": null,
 
 
171
  "topk_group": 1,
172
  "topk_method": "noaux_tc",
173
  "torchscript": false,
 
 
174
  "use_bfloat16": false,
175
  "use_cache": true,
176
  "v_head_dim": 128,
177
  "vocab_size": 163840
178
  },
179
  "tie_word_embeddings": false,
180
+ "transformers_version": "5.13.0.dev0",
181
+ "unsloth_fixed": true,
182
  "use_unified_vision_chunk": true,
183
  "video_placeholder": "<|kimi_k25_video_placeholder|>",
184
  "vision_config": {
 
185
  "init_pos_emb_height": 64,
186
  "init_pos_emb_time": 4,
187
  "init_pos_emb_width": 64,
 
192
  "merge_type": "sd2_tpool",
193
  "mm_hidden_size": 1152,
194
  "mm_projector_type": "patchmerger",
195
+ "model_type": "",
196
  "patch_size": 14,
197
  "pos_emb_type": "divided_fixed",
198
  "projector_hidden_act": "gelu",
processor_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_map": {
3
+ "AutoProcessor": "kimi_k25_processor.KimiK25Processor"
4
+ },
5
+ "image_processor": {
6
+ "auto_map": {
7
+ "AutoImageProcessor": "kimi_k25_vision_processing.KimiK25VisionProcessor",
8
+ "AutoProcessor": "kimi_k25_processor.KimiK25Processor"
9
+ },
10
+ "image_processor_type": "KimiK25VisionProcessor",
11
+ "media_proc_cfg": {
12
+ "config_type": "media_proc.processors.moonvit.MoonViTMediaProcessorConfig",
13
+ "fixed_output_tokens": null,
14
+ "image_mean": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "image_std": [
20
+ 0.5,
21
+ 0.5,
22
+ 0.5
23
+ ],
24
+ "in_patch_limit": 16384,
25
+ "in_patch_limit_each_frame": 4096,
26
+ "in_patch_limit_video": 655360,
27
+ "max_num_frames_each_video": null,
28
+ "merge_kernel_size": 2,
29
+ "patch_limit_on_one_side": 512,
30
+ "patch_size": 14,
31
+ "sample_fps": 8.0,
32
+ "temporal_merge_kernel_size": 4,
33
+ "timestamp_mode": "hh:mm:ss.fff"
34
+ },
35
+ "num_frames_per_chunk": 4
36
+ },
37
+ "processor_class": "KimiK25Processor"
38
+ }
tokenizer_config.json CHANGED
@@ -185,7 +185,18 @@
185
  "special": true
186
  }
187
  },
188
- "additional_special_tokens": [
 
 
 
 
 
 
 
 
 
 
 
189
  "<|im_end|>",
190
  "<|im_user|>",
191
  "<|im_assistant|>",
@@ -199,17 +210,13 @@
199
  "<|media_end|>",
200
  "<|media_pad|>"
201
  ],
202
- "bos_token": "[BOS]",
203
- "clean_up_tokenization_spaces": false,
204
- "eos_token": "[EOS]",
205
- "model_max_length": 1000000000000000019884624838656,
206
  "pad_token": "[PAD]",
207
- "unk_token": "[UNK]",
 
208
  "tokenizer_class": "TikTokenTokenizer",
209
- "auto_map": {
210
- "AutoTokenizer": [
211
- "tokenization_kimi.TikTokenTokenizer",
212
- null
213
- ]
214
- }
215
  }
 
185
  "special": true
186
  }
187
  },
188
+ "auto_map": {
189
+ "AutoProcessor": "kimi_k25_processor.KimiK25Processor",
190
+ "AutoTokenizer": [
191
+ "tokenization_kimi.TikTokenTokenizer",
192
+ null
193
+ ]
194
+ },
195
+ "backend": "custom",
196
+ "bos_token": "[BOS]",
197
+ "clean_up_tokenization_spaces": false,
198
+ "eos_token": "[EOS]",
199
+ "extra_special_tokens": [
200
  "<|im_end|>",
201
  "<|im_user|>",
202
  "<|im_assistant|>",
 
210
  "<|media_end|>",
211
  "<|media_pad|>"
212
  ],
213
+ "is_local": true,
214
+ "local_files_only": false,
215
+ "model_max_length": 262144,
 
216
  "pad_token": "[PAD]",
217
+ "padding_side": "left",
218
+ "processor_class": "KimiK25Processor",
219
  "tokenizer_class": "TikTokenTokenizer",
220
+ "unk_token": "[UNK]",
221
+ "chat_template": "{%- macro render_content(msg) -%}\n {%- set c = msg.get('content') -%}\n {%- if c is string -%}\n {{ c }}\n {%- elif c is not none -%}\n {% for content in c -%}\n {% if content['type'] == 'image' or content['type'] == 'image_url' -%}\n <|media_begin|>image<|media_content|><|media_pad|><|media_end|>\n {% elif content['type'] == 'video' or content['type']== 'video_url'-%}\n <|kimi_k25_video_placeholder|>\n {% else -%}\n {{ content['text'] }}\n {%- endif -%}\n {%- endfor -%}\n {%- endif -%}\n{%- endmacro -%}\n\n{% macro set_roles(message) -%}\n {%- set role_name = message.get('name') or message['role'] -%}\n {%- if message['role'] == 'user' -%}\n <|im_user|>{{role_name}}<|im_middle|>\n {%- elif message['role'] == 'assistant' -%}\n <|im_assistant|>{{role_name}}<|im_middle|>\n {%- else -%}\n <|im_system|>{{role_name}}<|im_middle|>\n {%- endif -%}\n{%- endmacro -%}\n\n\n{%- macro render_toolcalls(message) -%}\n <|tool_calls_section_begin|>\n {%- for tool_call in message['tool_calls'] -%}\n {%- set formatted_id = tool_call['id'] -%}\n <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{% if tool_call['function']['arguments'] is string %}{{ tool_call['function']['arguments'] }}{% else %}{{ tool_call['function']['arguments'] | tojson }}{% endif %}<|tool_call_end|>\n {%- endfor -%}\n <|tool_calls_section_end|>\n{%- endmacro -%}\n\n\n{%- set preserve_thinking = preserve_thinking | default(true) -%}\n{# Find last non-tool-call assistant message. If preserve_thinking, keep -1 so hist is empty and all msgs use suffix (retain reasoning). #}\n{%- set ns = namespace(last_non_tool_call_assistant_msg=-1) -%}\n{%- if not preserve_thinking -%}\n{%- for idx in range(messages|length-1, -1, -1) -%}\n {%- if messages[idx]['role'] == 'assistant' and not messages[idx].get('tool_calls') -%}\n {%- set ns.last_non_tool_call_assistant_msg = idx -%}\n {%- break -%}\n {%- endif -%}\n{%- endfor -%}\n{%- endif -%}\n\n{# split all messages into history & suffix, reasoning_content in suffix should be reserved.#}\n{%- set hist_msgs = messages[:ns.last_non_tool_call_assistant_msg+1] -%}\n{%- set suffix_msgs = messages[ns.last_non_tool_call_assistant_msg+1:] -%}\n\n{%- if tools -%}\n {%- if tools_ts_str -%}\n <|im_system|>tool_declare<|im_middle|>{{ tools_ts_str }}<|im_end|>\n {%- else -%}\n <|im_system|>tool_declare<|im_middle|>{{ tools | tojson(separators=(',', ':')) }}<|im_end|>\n {%- endif -%}\n{%- endif -%}\n\n \n{%- for message in hist_msgs -%}\n {{set_roles(message)}}\n {%- if message['role'] == 'assistant' -%}\n <think></think>{{render_content(message)}}\n {%- if message.get('tool_calls') -%}\n {{render_toolcalls(message)}}\n {%- endif -%}\n {%- elif message['role'] == 'tool' -%}\n {%- set tool_call_id = message.tool_call_id -%}\n ## Return of {{ tool_call_id }}\n{{render_content(message)}}\n {%- elif message['content'] is not none -%}\n {{render_content(message)}}\n {%- endif -%}\n <|im_end|>\n{%- endfor -%}\n\n{%- for message in suffix_msgs -%}\n {{set_roles(message)}}\n {%- if message['role'] == 'assistant' -%}\n {%- if thinking is defined and thinking is false and preserve_thinking is false -%}\n <think></think>{{render_content(message)}}\n {%- else -%}\n {%- set rc = message.get('reasoning', message.get('reasoning_content', '')) -%}\n <think>{{rc}}</think>{{render_content(message)}}\n {%- endif -%}\n {%- if message.get('tool_calls') -%}\n {{render_toolcalls(message)}}\n {%- endif -%}\n {%- elif message['role'] == 'tool' -%}\n {%- set tool_call_id = message.tool_call_id -%}\n ## Return of {{ tool_call_id }}\n{{render_content(message)}}\n {%- elif message['content'] is not none -%}\n {{render_content(message)}}\n {%- endif -%}\n <|im_end|>\n{%- endfor -%}\n\n\n{%- if add_generation_prompt -%}\n <|im_assistant|>assistant<|im_middle|>\n {%- if thinking is defined and thinking is false -%}\n <think></think>\n {%- else -%}\n <think>\n {%- endif -%}\n{%- endif -%}"
 
 
 
 
222
  }