Bittoby1040 commited on
Commit
a382ddf
·
verified ·
1 Parent(s): ffab428

Clean repository before upload

Browse files
.gitattributes DELETED
@@ -1,36 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
36
- tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
adapter_config.json DELETED
@@ -1,44 +0,0 @@
1
- {
2
- "alora_invocation_tokens": null,
3
- "alpha_pattern": {},
4
- "arrow_config": null,
5
- "auto_mapping": null,
6
- "bias": "none",
7
- "corda_config": null,
8
- "ensure_weight_tying": false,
9
- "eva_config": null,
10
- "exclude_modules": null,
11
- "fan_in_fan_out": false,
12
- "inference_mode": true,
13
- "init_lora_weights": true,
14
- "layer_replication": null,
15
- "layers_pattern": null,
16
- "layers_to_transform": null,
17
- "loftq_config": {},
18
- "lora_alpha": 128,
19
- "lora_bias": false,
20
- "lora_dropout": 0.05,
21
- "lora_ga_config": null,
22
- "megatron_config": null,
23
- "megatron_core": "megatron.core",
24
- "modules_to_save": null,
25
- "peft_type": "LORA",
26
- "peft_version": "0.19.1",
27
- "qalora_group_size": 16,
28
- "r": 64,
29
- "rank_pattern": {},
30
- "revision": null,
31
- "target_modules": [
32
- "q_proj",
33
- "k_proj",
34
- "v_proj",
35
- "o_proj"
36
- ],
37
- "target_parameters": [],
38
- "task_type": "CAUSAL_LM",
39
- "trainable_token_indices": null,
40
- "use_bdlora": null,
41
- "use_dora": false,
42
- "use_qalora": false,
43
- "use_rslora": false
44
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
adapter_model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2f5fed994f04a29ee259760ae900b1c61b388ef3c0132044376ecdd6453bc4b
3
- size 31865104
 
 
 
 
chat_template.jinja DELETED
@@ -1,112 +0,0 @@
1
- {%- macro render_content(msg) -%}
2
- {%- set c = msg.get('content') -%}
3
- {%- if c is string -%}
4
- {{ c }}
5
- {%- elif c is not none -%}
6
- {% for content in c -%}
7
- {% if content['type'] == 'image' or content['type'] == 'image_url' -%}
8
- <|media_begin|>image<|media_content|><|media_pad|><|media_end|>
9
- {% elif content['type'] == 'video' or content['type']== 'video_url'-%}
10
- <|kimi_k25_video_placeholder|>
11
- {% else -%}
12
- {{ content['text'] }}
13
- {%- endif -%}
14
- {%- endfor -%}
15
- {%- endif -%}
16
- {%- endmacro -%}
17
-
18
- {% macro set_roles(message) -%}
19
- {%- set role_name = message.get('name') or message['role'] -%}
20
- {%- if message['role'] == 'user' -%}
21
- <|im_user|>{{role_name}}<|im_middle|>
22
- {%- elif message['role'] == 'assistant' -%}
23
- <|im_assistant|>{{role_name}}<|im_middle|>
24
- {%- else -%}
25
- <|im_system|>{{role_name}}<|im_middle|>
26
- {%- endif -%}
27
- {%- endmacro -%}
28
-
29
-
30
- {%- macro render_toolcalls(message) -%}
31
- <|tool_calls_section_begin|>
32
- {%- for tool_call in message['tool_calls'] -%}
33
- {%- set formatted_id = tool_call['id'] -%}
34
- <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{% if tool_call['function']['arguments'] is string %}{{ tool_call['function']['arguments'] }}{% else %}{{ tool_call['function']['arguments'] | tojson }}{% endif %}<|tool_call_end|>
35
- {%- endfor -%}
36
- <|tool_calls_section_end|>
37
- {%- endmacro -%}
38
-
39
-
40
- {%- set preserve_thinking = preserve_thinking | default(false) -%}
41
- {# Find last non-tool-call assistant message. If preserve_thinking, keep -1 so hist is empty and all msgs use suffix (retain reasoning). #}
42
- {%- set ns = namespace(last_non_tool_call_assistant_msg=-1) -%}
43
- {%- if not preserve_thinking -%}
44
- {%- for idx in range(messages|length-1, -1, -1) -%}
45
- {%- if messages[idx]['role'] == 'assistant' and not messages[idx].get('tool_calls') -%}
46
- {%- set ns.last_non_tool_call_assistant_msg = idx -%}
47
- {%- break -%}
48
- {%- endif -%}
49
- {%- endfor -%}
50
- {%- endif -%}
51
-
52
- {# split all messages into history & suffix, reasoning_content in suffix should be reserved.#}
53
- {%- set hist_msgs = messages[:ns.last_non_tool_call_assistant_msg+1] -%}
54
- {%- set suffix_msgs = messages[ns.last_non_tool_call_assistant_msg+1:] -%}
55
-
56
- {%- if tools -%}
57
- {%- if tools_ts_str -%}
58
- <|im_system|>tool_declare<|im_middle|>{{ tools_ts_str }}<|im_end|>
59
- {%- else -%}
60
- <|im_system|>tool_declare<|im_middle|>{{ tools | tojson(separators=(',', ':')) }}<|im_end|>
61
- {%- endif -%}
62
- {%- endif -%}
63
-
64
-
65
- {%- for message in hist_msgs -%}
66
- {{set_roles(message)}}
67
- {%- if message['role'] == 'assistant' -%}
68
- <think></think>{{render_content(message)}}
69
- {%- if message.get('tool_calls') -%}
70
- {{render_toolcalls(message)}}
71
- {%- endif -%}
72
- {%- elif message['role'] == 'tool' -%}
73
- {%- set tool_call_id = message.tool_call_id -%}
74
- ## Return of {{ tool_call_id }}
75
- {{render_content(message)}}
76
- {%- elif message['content'] is not none -%}
77
- {{render_content(message)}}
78
- {%- endif -%}
79
- <|im_end|>
80
- {%- endfor -%}
81
-
82
- {%- for message in suffix_msgs -%}
83
- {{set_roles(message)}}
84
- {%- if message['role'] == 'assistant' -%}
85
- {%- if thinking is defined and thinking is false and preserve_thinking is false -%}
86
- <think></think>{{render_content(message)}}
87
- {%- else -%}
88
- {%- set rc = message.get('reasoning', message.get('reasoning_content', '')) -%}
89
- <think>{{rc}}</think>{{render_content(message)}}
90
- {%- endif -%}
91
- {%- if message.get('tool_calls') -%}
92
- {{render_toolcalls(message)}}
93
- {%- endif -%}
94
- {%- elif message['role'] == 'tool' -%}
95
- {%- set tool_call_id = message.tool_call_id -%}
96
- ## Return of {{ tool_call_id }}
97
- {{render_content(message)}}
98
- {%- elif message['content'] is not none -%}
99
- {{render_content(message)}}
100
- {%- endif -%}
101
- <|im_end|>
102
- {%- endfor -%}
103
-
104
-
105
- {%- if add_generation_prompt -%}
106
- <|im_assistant|>assistant<|im_middle|>
107
- {%- if thinking is defined and thinking is false -%}
108
- <think></think>
109
- {%- else -%}
110
- <think>
111
- {%- endif -%}
112
- {%- endif -%}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config.json DELETED
@@ -1,55 +0,0 @@
1
- {
2
- "architectures": [
3
- "DeepseekV3ForCausalLM"
4
- ],
5
- "attention_bias": false,
6
- "attention_dropout": 0.0,
7
- "aux_loss_alpha": 0.001,
8
- "bos_token_id": 163584,
9
- "dtype": "bfloat16",
10
- "eos_token_id": 163585,
11
- "ep_size": 1,
12
- "first_k_dense_replace": 1,
13
- "head_dim": 64,
14
- "hidden_act": "silu",
15
- "hidden_size": 2048,
16
- "initializer_range": 0.02,
17
- "intermediate_size": 11264,
18
- "kv_lora_rank": 512,
19
- "max_position_embeddings": 131072,
20
- "model_type": "deepseek_v3",
21
- "moe_intermediate_size": 1408,
22
- "moe_layer_freq": 1,
23
- "n_group": 1,
24
- "n_routed_experts": 64,
25
- "n_shared_experts": 2,
26
- "norm_topk_prob": true,
27
- "num_attention_heads": 16,
28
- "num_experts_per_tok": 6,
29
- "num_hidden_layers": 27,
30
- "num_key_value_heads": 16,
31
- "num_nextn_predict_layers": 1,
32
- "num_shared_experts": 2,
33
- "pad_token_id": 163839,
34
- "pretraining_tp": 1,
35
- "q_lora_rank": null,
36
- "qk_head_dim": 192,
37
- "qk_nope_head_dim": 128,
38
- "qk_rope_head_dim": 64,
39
- "rms_norm_eps": 1e-05,
40
- "rope_interleave": true,
41
- "rope_parameters": {
42
- "rope_theta": 800000.0,
43
- "rope_type": "default"
44
- },
45
- "routed_scaling_factor": 2.446,
46
- "scoring_func": "sigmoid",
47
- "seq_aux": true,
48
- "tie_word_embeddings": false,
49
- "topk_group": 1,
50
- "topk_method": "noaux_tc",
51
- "transformers_version": "5.8.1",
52
- "use_cache": false,
53
- "v_head_dim": 128,
54
- "vocab_size": 163840
55
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
generation_config.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "bos_token_id": 163584,
4
- "eos_token_id": 163585,
5
- "pad_token_id": 163839,
6
- "transformers_version": "5.8.1"
7
- }
 
 
 
 
 
 
 
 
model-00001-of-00007.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:11844c30848c15289a9cc545bb98b578066bc926d4c35443c6b3f9f64a75581d
3
- size 4996317112
 
 
 
 
model-00002-of-00007.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:98f7e4b6aec31dd0741d5c04895bcdcb83770cb758f44abfe78b8d1718b3a31c
3
- size 4699860696
 
 
 
 
model-00003-of-00007.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c4f445af2a42b8abb356bd9383db0e42940b68889bcaa417c05027f5bf4cdbe
3
- size 4678885128
 
 
 
 
model-00004-of-00007.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4181dae1acbaa54e8d2d512f3e0a5e4bee6c6f49479714a9cb97af007eb5075
3
- size 4678885544
 
 
 
 
model-00005-of-00007.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:03312ba05824052bc517148fe094a9fcd81c3a3fc88c376bb3481d24a9948855
3
- size 4678885544
 
 
 
 
model-00006-of-00007.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7016b61807291e25dc538589601c7960b67ea04afa59cb466219045abb7ee006
3
- size 4678885544
 
 
 
 
model-00007-of-00007.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:265c32ba15aadd7b74a7094be82f57ddc058f7e27f7b7e9e58ecb65d26b34d46
3
- size 3509164048
 
 
 
 
model.safetensors.index.json DELETED
The diff for this file is too large to render. See raw diff
 
tokenizer.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:261590fd5234433c558e867ea7d9b8d8a29ebc6bf51d9d43a9d714e4793db0e9
3
- size 19545255
 
 
 
 
tokenizer_config.json DELETED
@@ -1,12 +0,0 @@
1
- {
2
- "backend": "tokenizers",
3
- "bos_token": "[BOS]",
4
- "clean_up_tokenization_spaces": false,
5
- "eos_token": "[EOS]",
6
- "is_local": true,
7
- "local_files_only": false,
8
- "model_max_length": 1000000000000000019884624838656,
9
- "pad_token": "[PAD]",
10
- "tokenizer_class": "TokenizersBackend",
11
- "unk_token": "[UNK]"
12
- }