Bittoby1040 commited on
Commit
aeb922a
·
verified ·
1 Parent(s): ac2a6a5

Clean repository before upload

Browse files
.gitattributes DELETED
@@ -1,36 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
36
- tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
adapter_config.json DELETED
@@ -1,51 +0,0 @@
1
- {
2
- "alora_invocation_tokens": null,
3
- "alpha_pattern": {
4
- ".*\\.gate_up_proj": 256
5
- },
6
- "arrow_config": null,
7
- "auto_mapping": null,
8
- "bias": "none",
9
- "corda_config": null,
10
- "ensure_weight_tying": false,
11
- "eva_config": null,
12
- "exclude_modules": null,
13
- "fan_in_fan_out": false,
14
- "inference_mode": true,
15
- "init_lora_weights": true,
16
- "layer_replication": null,
17
- "layers_pattern": null,
18
- "layers_to_transform": null,
19
- "loftq_config": {},
20
- "lora_alpha": 128,
21
- "lora_bias": false,
22
- "lora_dropout": 0.0,
23
- "lora_ga_config": null,
24
- "megatron_config": null,
25
- "megatron_core": "megatron.core",
26
- "modules_to_save": null,
27
- "peft_type": "LORA",
28
- "peft_version": "0.19.1",
29
- "qalora_group_size": 16,
30
- "r": 64,
31
- "rank_pattern": {
32
- ".*\\.gate_up_proj": 128
33
- },
34
- "revision": null,
35
- "target_modules": [
36
- "kv_a_proj_with_mqa",
37
- "o_proj",
38
- "kv_b_proj",
39
- "q_proj"
40
- ],
41
- "target_parameters": [
42
- "gate_up_proj",
43
- "down_proj"
44
- ],
45
- "task_type": "CAUSAL_LM",
46
- "trainable_token_indices": null,
47
- "use_bdlora": null,
48
- "use_dora": false,
49
- "use_qalora": false,
50
- "use_rslora": false
51
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
adapter_model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:cfb328732e96891eb7af56fb0267736111b9f18a16386cf30e97637eb51ac6e6
3
- size 5729906896
 
 
 
 
chat_template.jinja DELETED
@@ -1,112 +0,0 @@
1
- {%- macro render_content(msg) -%}
2
- {%- set c = msg.get('content') -%}
3
- {%- if c is string -%}
4
- {{ c }}
5
- {%- elif c is not none -%}
6
- {% for content in c -%}
7
- {% if content['type'] == 'image' or content['type'] == 'image_url' -%}
8
- <|media_begin|>image<|media_content|><|media_pad|><|media_end|>
9
- {% elif content['type'] == 'video' or content['type']== 'video_url'-%}
10
- <|kimi_k25_video_placeholder|>
11
- {% else -%}
12
- {{ content['text'] }}
13
- {%- endif -%}
14
- {%- endfor -%}
15
- {%- endif -%}
16
- {%- endmacro -%}
17
-
18
- {% macro set_roles(message) -%}
19
- {%- set role_name = message.get('name') or message['role'] -%}
20
- {%- if message['role'] == 'user' -%}
21
- <|im_user|>{{role_name}}<|im_middle|>
22
- {%- elif message['role'] == 'assistant' -%}
23
- <|im_assistant|>{{role_name}}<|im_middle|>
24
- {%- else -%}
25
- <|im_system|>{{role_name}}<|im_middle|>
26
- {%- endif -%}
27
- {%- endmacro -%}
28
-
29
-
30
- {%- macro render_toolcalls(message) -%}
31
- <|tool_calls_section_begin|>
32
- {%- for tool_call in message['tool_calls'] -%}
33
- {%- set formatted_id = tool_call['id'] -%}
34
- <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{% if tool_call['function']['arguments'] is string %}{{ tool_call['function']['arguments'] }}{% else %}{{ tool_call['function']['arguments'] | tojson }}{% endif %}<|tool_call_end|>
35
- {%- endfor -%}
36
- <|tool_calls_section_end|>
37
- {%- endmacro -%}
38
-
39
-
40
- {%- set preserve_thinking = preserve_thinking | default(false) -%}
41
- {# Find last non-tool-call assistant message. If preserve_thinking, keep -1 so hist is empty and all msgs use suffix (retain reasoning). #}
42
- {%- set ns = namespace(last_non_tool_call_assistant_msg=-1) -%}
43
- {%- if not preserve_thinking -%}
44
- {%- for idx in range(messages|length-1, -1, -1) -%}
45
- {%- if messages[idx]['role'] == 'assistant' and not messages[idx].get('tool_calls') -%}
46
- {%- set ns.last_non_tool_call_assistant_msg = idx -%}
47
- {%- break -%}
48
- {%- endif -%}
49
- {%- endfor -%}
50
- {%- endif -%}
51
-
52
- {# split all messages into history & suffix, reasoning_content in suffix should be reserved.#}
53
- {%- set hist_msgs = messages[:ns.last_non_tool_call_assistant_msg+1] -%}
54
- {%- set suffix_msgs = messages[ns.last_non_tool_call_assistant_msg+1:] -%}
55
-
56
- {%- if tools -%}
57
- {%- if tools_ts_str -%}
58
- <|im_system|>tool_declare<|im_middle|>{{ tools_ts_str }}<|im_end|>
59
- {%- else -%}
60
- <|im_system|>tool_declare<|im_middle|>{{ tools | tojson(separators=(',', ':')) }}<|im_end|>
61
- {%- endif -%}
62
- {%- endif -%}
63
-
64
-
65
- {%- for message in hist_msgs -%}
66
- {{set_roles(message)}}
67
- {%- if message['role'] == 'assistant' -%}
68
- <think></think>{{render_content(message)}}
69
- {%- if message.get('tool_calls') -%}
70
- {{render_toolcalls(message)}}
71
- {%- endif -%}
72
- {%- elif message['role'] == 'tool' -%}
73
- {%- set tool_call_id = message.tool_call_id -%}
74
- ## Return of {{ tool_call_id }}
75
- {{render_content(message)}}
76
- {%- elif message['content'] is not none -%}
77
- {{render_content(message)}}
78
- {%- endif -%}
79
- <|im_end|>
80
- {%- endfor -%}
81
-
82
- {%- for message in suffix_msgs -%}
83
- {{set_roles(message)}}
84
- {%- if message['role'] == 'assistant' -%}
85
- {%- if thinking is defined and thinking is false and preserve_thinking is false -%}
86
- <think></think>{{render_content(message)}}
87
- {%- else -%}
88
- {%- set rc = message.get('reasoning', message.get('reasoning_content', '')) -%}
89
- <think>{{rc}}</think>{{render_content(message)}}
90
- {%- endif -%}
91
- {%- if message.get('tool_calls') -%}
92
- {{render_toolcalls(message)}}
93
- {%- endif -%}
94
- {%- elif message['role'] == 'tool' -%}
95
- {%- set tool_call_id = message.tool_call_id -%}
96
- ## Return of {{ tool_call_id }}
97
- {{render_content(message)}}
98
- {%- elif message['content'] is not none -%}
99
- {{render_content(message)}}
100
- {%- endif -%}
101
- <|im_end|>
102
- {%- endfor -%}
103
-
104
-
105
- {%- if add_generation_prompt -%}
106
- <|im_assistant|>assistant<|im_middle|>
107
- {%- if thinking is defined and thinking is false -%}
108
- <think></think>
109
- {%- else -%}
110
- <think>
111
- {%- endif -%}
112
- {%- endif -%}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config.json DELETED
@@ -1,58 +0,0 @@
1
- {
2
- "architectures": [
3
- "DeepseekV3ForCausalLM"
4
- ],
5
- "attention_bias": false,
6
- "attention_dropout": 0.0,
7
- "aux_loss_alpha": 0.001,
8
- "bos_token_id": 163584,
9
- "dtype": "bfloat16",
10
- "eos_token_id": 163585,
11
- "ep_size": 1,
12
- "first_k_dense_replace": 1,
13
- "head_dim": 64,
14
- "hidden_act": "silu",
15
- "hidden_size": 2048,
16
- "initializer_range": 0.02,
17
- "intermediate_size": 11264,
18
- "kv_lora_rank": 512,
19
- "max_position_embeddings": 131072,
20
- "model_type": "deepseek_v3",
21
- "moe_intermediate_size": 1408,
22
- "moe_layer_freq": 1,
23
- "n_group": 1,
24
- "n_routed_experts": 64,
25
- "n_shared_experts": 2,
26
- "norm_topk_prob": true,
27
- "num_attention_heads": 16,
28
- "num_experts_per_tok": 6,
29
- "num_hidden_layers": 27,
30
- "num_key_value_heads": 16,
31
- "num_local_experts": 64,
32
- "num_nextn_predict_layers": 1,
33
- "num_shared_experts": 2,
34
- "pad_token_id": 163839,
35
- "pretraining_tp": 1,
36
- "q_lora_rank": null,
37
- "qk_head_dim": 192,
38
- "qk_nope_head_dim": 128,
39
- "qk_rope_head_dim": 64,
40
- "rms_norm_eps": 1e-05,
41
- "rope_interleave": true,
42
- "rope_parameters": {
43
- "rope_theta": 800000.0,
44
- "rope_type": "default"
45
- },
46
- "rope_scaling": null,
47
- "rope_theta": 10000.0,
48
- "routed_scaling_factor": 2.446,
49
- "scoring_func": "sigmoid",
50
- "seq_aux": true,
51
- "tie_word_embeddings": false,
52
- "topk_group": 1,
53
- "topk_method": "noaux_tc",
54
- "transformers_version": "4.57.6",
55
- "use_cache": false,
56
- "v_head_dim": 128,
57
- "vocab_size": 163840
58
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
generation_config.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "bos_token_id": 163584,
4
- "eos_token_id": 163585,
5
- "pad_token_id": 163839,
6
- "transformers_version": "4.57.6"
7
- }
 
 
 
 
 
 
 
 
model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3e799b09d78222b73276c2e0443a21644844b7516ae1fe01c11c7b67f3713fc
3
- size 31920890384
 
 
 
 
special_tokens_map.json DELETED
@@ -1,44 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- "<|im_end|>",
4
- "<|im_user|>",
5
- "<|im_assistant|>",
6
- "<|start_header_id|>",
7
- "<|end_header_id|>",
8
- "[EOT]",
9
- "<|im_system|>",
10
- "<|im_middle|>",
11
- "<|media_begin|>",
12
- "<|media_content|>",
13
- "<|media_end|>",
14
- "<|media_pad|>"
15
- ],
16
- "bos_token": {
17
- "content": "[BOS]",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "eos_token": {
24
- "content": "[EOS]",
25
- "lstrip": false,
26
- "normalized": false,
27
- "rstrip": false,
28
- "single_word": false
29
- },
30
- "pad_token": {
31
- "content": "[PAD]",
32
- "lstrip": false,
33
- "normalized": false,
34
- "rstrip": false,
35
- "single_word": false
36
- },
37
- "unk_token": {
38
- "content": "[UNK]",
39
- "lstrip": false,
40
- "normalized": false,
41
- "rstrip": false,
42
- "single_word": false
43
- }
44
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tokenizer.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b39c9bd00faa509721177f36bdd132f52924f67ed12394bf38b222de87aeee91
3
- size 19545546
 
 
 
 
tokenizer_config.json DELETED
@@ -1,209 +0,0 @@
1
- {
2
- "added_tokens_decoder": {
3
- "163584": {
4
- "content": "[BOS]",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false,
9
- "special": true
10
- },
11
- "163585": {
12
- "content": "[EOS]",
13
- "lstrip": false,
14
- "normalized": false,
15
- "rstrip": false,
16
- "single_word": false,
17
- "special": true
18
- },
19
- "163586": {
20
- "content": "<|im_end|>",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": true
26
- },
27
- "163587": {
28
- "content": "<|im_user|>",
29
- "lstrip": false,
30
- "normalized": false,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": true
34
- },
35
- "163588": {
36
- "content": "<|im_assistant|>",
37
- "lstrip": false,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": true
42
- },
43
- "163590": {
44
- "content": "<|start_header_id|>",
45
- "lstrip": false,
46
- "normalized": false,
47
- "rstrip": false,
48
- "single_word": false,
49
- "special": true
50
- },
51
- "163591": {
52
- "content": "<|end_header_id|>",
53
- "lstrip": false,
54
- "normalized": false,
55
- "rstrip": false,
56
- "single_word": false,
57
- "special": true
58
- },
59
- "163593": {
60
- "content": "[EOT]",
61
- "lstrip": false,
62
- "normalized": false,
63
- "rstrip": false,
64
- "single_word": false,
65
- "special": true
66
- },
67
- "163594": {
68
- "content": "<|im_system|>",
69
- "lstrip": false,
70
- "normalized": false,
71
- "rstrip": false,
72
- "single_word": false,
73
- "special": true
74
- },
75
- "163595": {
76
- "content": "<|tool_calls_section_begin|>",
77
- "lstrip": false,
78
- "normalized": false,
79
- "rstrip": false,
80
- "single_word": false,
81
- "special": false
82
- },
83
- "163596": {
84
- "content": "<|tool_calls_section_end|>",
85
- "lstrip": false,
86
- "normalized": false,
87
- "rstrip": false,
88
- "single_word": false,
89
- "special": false
90
- },
91
- "163597": {
92
- "content": "<|tool_call_begin|>",
93
- "lstrip": false,
94
- "normalized": false,
95
- "rstrip": false,
96
- "single_word": false,
97
- "special": false
98
- },
99
- "163598": {
100
- "content": "<|tool_call_argument_begin|>",
101
- "lstrip": false,
102
- "normalized": false,
103
- "rstrip": false,
104
- "single_word": false,
105
- "special": false
106
- },
107
- "163599": {
108
- "content": "<|tool_call_end|>",
109
- "lstrip": false,
110
- "normalized": false,
111
- "rstrip": false,
112
- "single_word": false,
113
- "special": false
114
- },
115
- "163601": {
116
- "content": "<|im_middle|>",
117
- "lstrip": false,
118
- "normalized": false,
119
- "rstrip": false,
120
- "single_word": false,
121
- "special": true
122
- },
123
- "163602": {
124
- "content": "<|media_begin|>",
125
- "lstrip": false,
126
- "normalized": false,
127
- "rstrip": false,
128
- "single_word": false,
129
- "special": true
130
- },
131
- "163603": {
132
- "content": "<|media_content|>",
133
- "lstrip": false,
134
- "normalized": false,
135
- "rstrip": false,
136
- "single_word": false,
137
- "special": true
138
- },
139
- "163604": {
140
- "content": "<|media_end|>",
141
- "lstrip": false,
142
- "normalized": false,
143
- "rstrip": false,
144
- "single_word": false,
145
- "special": true
146
- },
147
- "163605": {
148
- "content": "<|media_pad|>",
149
- "lstrip": false,
150
- "normalized": false,
151
- "rstrip": false,
152
- "single_word": false,
153
- "special": true
154
- },
155
- "163606": {
156
- "content": "<think>",
157
- "lstrip": false,
158
- "normalized": false,
159
- "rstrip": false,
160
- "single_word": false,
161
- "special": false
162
- },
163
- "163607": {
164
- "content": "</think>",
165
- "lstrip": false,
166
- "normalized": false,
167
- "rstrip": false,
168
- "single_word": false,
169
- "special": false
170
- },
171
- "163838": {
172
- "content": "[UNK]",
173
- "lstrip": false,
174
- "normalized": false,
175
- "rstrip": false,
176
- "single_word": false,
177
- "special": true
178
- },
179
- "163839": {
180
- "content": "[PAD]",
181
- "lstrip": false,
182
- "normalized": false,
183
- "rstrip": false,
184
- "single_word": false,
185
- "special": true
186
- }
187
- },
188
- "additional_special_tokens": [
189
- "<|im_end|>",
190
- "<|im_user|>",
191
- "<|im_assistant|>",
192
- "<|start_header_id|>",
193
- "<|end_header_id|>",
194
- "[EOT]",
195
- "<|im_system|>",
196
- "<|im_middle|>",
197
- "<|media_begin|>",
198
- "<|media_content|>",
199
- "<|media_end|>",
200
- "<|media_pad|>"
201
- ],
202
- "bos_token": "[BOS]",
203
- "clean_up_tokenization_spaces": false,
204
- "eos_token": "[EOS]",
205
- "extra_special_tokens": {},
206
- "model_max_length": 1000000000000000019884624838656,
207
- "pad_token": "[PAD]",
208
- "unk_token": "[UNK]"
209
- }