diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..aa7aacd0134a92c3c1943fdecc75cd8b7420cce6 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +model.safetensors.index.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..2ab98ef068d62829d17c5ade1827b9f013fa2bbf --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,86 @@ +[gMASK] +{%- if tools -%} +<|system|> +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{% for tool in tools %} +{{ tool | tojson(ensure_ascii=False) }} +{% endfor %} + + +For each function call, output the function name and arguments within the following XML format: +{function-name}{arg-key-1}{arg-value-1}{arg-key-2}{arg-value-2}...{%- endif -%} +{%- macro visible_text(content) -%} + {%- if content is string -%} + {{- content }} + {%- elif content is iterable and content is not mapping -%} + {%- for item in content -%} + {%- if item is mapping and item.type == 'text' -%} + {{- item.text }} + {%- elif item is string -%} + {{- item }} + {%- endif -%} + {%- endfor -%} + {%- else -%} + {{- content }} + {%- endif -%} +{%- endmacro -%} +{%- set ns = namespace(last_user_index=-1) %} +{%- for m in messages %} + {%- if m.role == 'user' %} + {% set ns.last_user_index = loop.index0 -%} + {%- endif %} +{%- endfor %} +{% for m in messages %} +{%- if m.role == 'user' -%}<|user|>{{ visible_text(m.content) }} +{%- elif m.role == 'assistant' -%} +<|assistant|> +{%- set reasoning_content = '' %} +{%- set content = visible_text(m.content) %} +{%- if m.reasoning_content is string %} + {%- set reasoning_content = m.reasoning_content %} +{%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- endif %} +{%- endif %} +{%- if ((clear_thinking is defined and not clear_thinking) or loop.index0 > ns.last_user_index) and reasoning_content -%} +{{ '' + reasoning_content.strip() + ''}} +{%- else -%} +{{ '' }} +{%- endif -%} +{%- if content.strip() -%} +{{ content.strip() }} +{%- endif -%} +{% if m.tool_calls %} +{% for tc in m.tool_calls %} +{%- if tc.function %} + {%- set tc = tc.function %} +{%- endif %} +{{- '' + tc.name -}} +{% set _args = tc.arguments %}{% for k, v in _args.items() %}{{ k }}{{ v | tojson(ensure_ascii=False) if v is not string else v }}{% endfor %}{% endfor %} +{% endif %} +{%- elif m.role == 'tool' -%} +{%- if m.content is string -%} +{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|observation|>' }} +{%- endif %} +{{- '' }} +{{- m.content }} +{{- '' }} +{%- else -%} +<|observation|>{% for tr in m.content %} +{{ tr.output if tr.output is defined else tr }}{% endfor -%} +{% endif -%} +{%- elif m.role == 'system' -%} +<|system|>{{ visible_text(m.content) }} +{%- endif -%} +{%- endfor -%} +{%- if add_generation_prompt -%} + <|assistant|>{{- '' if (enable_thinking is defined and not enable_thinking) else '' -}} +{%- endif -%} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6bb963aa623712467bfffb2752308606e2f85207 --- /dev/null +++ b/config.json @@ -0,0 +1,171 @@ +{ + "architectures": [ + "Glm4MoeForCausalLM" + ], + "attention_bias": true, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "eos_token_id": [ + 151329, + 151336, + 151338 + ], + "first_k_dense_replace": 3, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "intermediate_size": 12288, + "max_position_embeddings": 202752, + "model_type": "glm4_moe", + "moe_intermediate_size": 1536, + "n_group": 1, + "n_routed_experts": 160, + "n_shared_experts": 1, + "norm_topk_prob": true, + "num_attention_heads": 96, + "num_experts_per_tok": 8, + "num_hidden_layers": 92, + "num_key_value_heads": 8, + "num_nextn_predict_layers": 1, + "pad_token_id": 151329, + "partial_rotary_factor": 0.5, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000, + "routed_scaling_factor": 2.5, + "tie_word_embeddings": false, + "topk_group": 1, + "transformers_version": "4.57.1", + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 151552, + "quantization_config": { + "config_groups": { + "group_0": { + "input_activations": { + "dynamic": false, + "num_bits": 4, + "type": "float", + "group_size": 16 + }, + "weights": { + "dynamic": false, + "num_bits": 4, + "type": "float", + "group_size": 16 + }, + "targets": [ + "Linear" + ] + } + }, + "ignore": [ + "lm_head", + "model.layers.0.self_attn*", + "model.layers.1.self_attn*", + "model.layers.10.self_attn*", + "model.layers.11.self_attn*", + "model.layers.12.self_attn*", + "model.layers.13.self_attn*", + "model.layers.14.self_attn*", + "model.layers.15.self_attn*", + "model.layers.16.self_attn*", + "model.layers.17.self_attn*", + "model.layers.18.self_attn*", + "model.layers.19.self_attn*", + "model.layers.2.self_attn*", + "model.layers.20.self_attn*", + "model.layers.21.self_attn*", + "model.layers.22.self_attn*", + "model.layers.23.self_attn*", + "model.layers.24.self_attn*", + "model.layers.25.self_attn*", + "model.layers.26.self_attn*", + "model.layers.27.self_attn*", + "model.layers.28.self_attn*", + "model.layers.29.self_attn*", + "model.layers.3.self_attn*", + "model.layers.30.self_attn*", + "model.layers.31.self_attn*", + "model.layers.32.self_attn*", + "model.layers.33.self_attn*", + "model.layers.34.self_attn*", + "model.layers.35.self_attn*", + "model.layers.36.self_attn*", + "model.layers.37.self_attn*", + "model.layers.38.self_attn*", + "model.layers.39.self_attn*", + "model.layers.4.self_attn*", + "model.layers.40.self_attn*", + "model.layers.41.self_attn*", + "model.layers.42.self_attn*", + "model.layers.43.self_attn*", + "model.layers.44.self_attn*", + "model.layers.45.self_attn*", + "model.layers.46.self_attn*", + "model.layers.47.self_attn*", + "model.layers.48.self_attn*", + "model.layers.49.self_attn*", + "model.layers.5.self_attn*", + "model.layers.50.self_attn*", + "model.layers.51.self_attn*", + "model.layers.52.self_attn*", + "model.layers.53.self_attn*", + "model.layers.54.self_attn*", + "model.layers.55.self_attn*", + "model.layers.56.self_attn*", + "model.layers.57.self_attn*", + "model.layers.58.self_attn*", + "model.layers.59.self_attn*", + "model.layers.6.self_attn*", + "model.layers.60.self_attn*", + "model.layers.61.self_attn*", + "model.layers.62.self_attn*", + "model.layers.63.self_attn*", + "model.layers.64.self_attn*", + "model.layers.65.self_attn*", + "model.layers.66.self_attn*", + "model.layers.67.self_attn*", + "model.layers.68.self_attn*", + "model.layers.69.self_attn*", + "model.layers.7.self_attn*", + "model.layers.70.self_attn*", + "model.layers.71.self_attn*", + "model.layers.72.self_attn*", + "model.layers.73.self_attn*", + "model.layers.74.self_attn*", + "model.layers.75.self_attn*", + "model.layers.76.self_attn*", + "model.layers.77.self_attn*", + "model.layers.78.self_attn*", + "model.layers.79.self_attn*", + "model.layers.8.self_attn*", + "model.layers.80.self_attn*", + "model.layers.81.self_attn*", + "model.layers.82.self_attn*", + "model.layers.83.self_attn*", + "model.layers.84.self_attn*", + "model.layers.85.self_attn*", + "model.layers.86.self_attn*", + "model.layers.87.self_attn*", + "model.layers.88.self_attn*", + "model.layers.89.self_attn*", + "model.layers.9.self_attn*", + "model.layers.90.self_attn*", + "model.layers.91.self_attn*", + "model.layers.92*" + ], + "quant_algo": "NVFP4", + "kv_cache_scheme": { + "dynamic": false, + "num_bits": 8, + "type": "float" + }, + "producer": { + "name": "modelopt", + "version": "0.41.0" + }, + "quant_method": "modelopt" + } +} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f51194759eb31dde6fbc75a28e3fb7036f68161a --- /dev/null +++ b/generation_config.json @@ -0,0 +1,11 @@ +{ + "_from_model_config": true, + "eos_token_id": [ + 151329, + 151336, + 151338 + ], + "pad_token_id": 151329, + "temperature": 1.0, + "transformers_version": "4.56.2" +} diff --git a/hf_quant_config.json b/hf_quant_config.json new file mode 100644 index 0000000000000000000000000000000000000000..910772fc39797df6be92d37b6fd0412d06c23d62 --- /dev/null +++ b/hf_quant_config.json @@ -0,0 +1,107 @@ +{ + "producer": { + "name": "modelopt", + "version": "0.41.0" + }, + "quantization": { + "quant_algo": "NVFP4", + "kv_cache_quant_algo": "FP8", + "group_size": 16, + "exclude_modules": [ + "lm_head", + "model.layers.0.self_attn*", + "model.layers.1.self_attn*", + "model.layers.10.self_attn*", + "model.layers.11.self_attn*", + "model.layers.12.self_attn*", + "model.layers.13.self_attn*", + "model.layers.14.self_attn*", + "model.layers.15.self_attn*", + "model.layers.16.self_attn*", + "model.layers.17.self_attn*", + "model.layers.18.self_attn*", + "model.layers.19.self_attn*", + "model.layers.2.self_attn*", + "model.layers.20.self_attn*", + "model.layers.21.self_attn*", + "model.layers.22.self_attn*", + "model.layers.23.self_attn*", + "model.layers.24.self_attn*", + "model.layers.25.self_attn*", + "model.layers.26.self_attn*", + "model.layers.27.self_attn*", + "model.layers.28.self_attn*", + "model.layers.29.self_attn*", + "model.layers.3.self_attn*", + "model.layers.30.self_attn*", + "model.layers.31.self_attn*", + "model.layers.32.self_attn*", + "model.layers.33.self_attn*", + "model.layers.34.self_attn*", + "model.layers.35.self_attn*", + "model.layers.36.self_attn*", + "model.layers.37.self_attn*", + "model.layers.38.self_attn*", + "model.layers.39.self_attn*", + "model.layers.4.self_attn*", + "model.layers.40.self_attn*", + "model.layers.41.self_attn*", + "model.layers.42.self_attn*", + "model.layers.43.self_attn*", + "model.layers.44.self_attn*", + "model.layers.45.self_attn*", + "model.layers.46.self_attn*", + "model.layers.47.self_attn*", + "model.layers.48.self_attn*", + "model.layers.49.self_attn*", + "model.layers.5.self_attn*", + "model.layers.50.self_attn*", + "model.layers.51.self_attn*", + "model.layers.52.self_attn*", + "model.layers.53.self_attn*", + "model.layers.54.self_attn*", + "model.layers.55.self_attn*", + "model.layers.56.self_attn*", + "model.layers.57.self_attn*", + "model.layers.58.self_attn*", + "model.layers.59.self_attn*", + "model.layers.6.self_attn*", + "model.layers.60.self_attn*", + "model.layers.61.self_attn*", + "model.layers.62.self_attn*", + "model.layers.63.self_attn*", + "model.layers.64.self_attn*", + "model.layers.65.self_attn*", + "model.layers.66.self_attn*", + "model.layers.67.self_attn*", + "model.layers.68.self_attn*", + "model.layers.69.self_attn*", + "model.layers.7.self_attn*", + "model.layers.70.self_attn*", + "model.layers.71.self_attn*", + "model.layers.72.self_attn*", + "model.layers.73.self_attn*", + "model.layers.74.self_attn*", + "model.layers.75.self_attn*", + "model.layers.76.self_attn*", + "model.layers.77.self_attn*", + "model.layers.78.self_attn*", + "model.layers.79.self_attn*", + "model.layers.8.self_attn*", + "model.layers.80.self_attn*", + "model.layers.81.self_attn*", + "model.layers.82.self_attn*", + "model.layers.83.self_attn*", + "model.layers.84.self_attn*", + "model.layers.85.self_attn*", + "model.layers.86.self_attn*", + "model.layers.87.self_attn*", + "model.layers.88.self_attn*", + "model.layers.89.self_attn*", + "model.layers.9.self_attn*", + "model.layers.90.self_attn*", + "model.layers.91.self_attn*", + "model.layers.92*" + ] + } +} \ No newline at end of file diff --git a/model-00001-of-00044.safetensors b/model-00001-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..de4b8187d0316da7660fab5139b30750af167ef2 --- /dev/null +++ b/model-00001-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c39f7efc803ccfa2f647875fffdd6fed8ed0365b614af4650e6b9773c10b320 +size 5000150024 diff --git a/model-00002-of-00044.safetensors b/model-00002-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..51823c79084efd9a709e18b177f7c3176f2d49ab --- /dev/null +++ b/model-00002-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8e84c2ffeb814d3ff4bd6fa2b92f8ae8d64657a4b2affd2742a5137cb0d89b3 +size 4921861160 diff --git a/model-00003-of-00044.safetensors b/model-00003-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6d9ea746bc09d2d7b2593b27cc113114ba96c630 --- /dev/null +++ b/model-00003-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d282fbf39cf62dbbea39e85a74a0971f43fddece9d21d703940818271b47974 +size 4969207952 diff --git a/model-00004-of-00044.safetensors b/model-00004-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4f1394b78e17c179912333564d2b44c36c67811d --- /dev/null +++ b/model-00004-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:068a4483a887fbdc63a71a012456f582c804e701564fdefd0938c595c98570a1 +size 4996873824 diff --git a/model-00005-of-00044.safetensors b/model-00005-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b349653af9612ceaefd325595cd78be667293429 --- /dev/null +++ b/model-00005-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:301c2d61e32958bbe8e86543984b560acd68ca37a48844a65467d3fdd61307e9 +size 4999347728 diff --git a/model-00006-of-00044.safetensors b/model-00006-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4b5e34ed000193a3390bda846cce379d32b8f7c --- /dev/null +++ b/model-00006-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a22253abc988074bccf81256d9043ed857d06b537c35306e785a0f8d9c6f909 +size 4999347808 diff --git a/model-00007-of-00044.safetensors b/model-00007-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6652267a28771b87d5448eed2de07c01997545b9 --- /dev/null +++ b/model-00007-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac1a835bb0309d72ccae04613cdac17e856a366f143e629b45856157c912fbd5 +size 4999347800 diff --git a/model-00008-of-00044.safetensors b/model-00008-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fdaf1c498a3efe53a7de03bcbdb269949e9f7a4a --- /dev/null +++ b/model-00008-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dcf77b8273b1665d6801424bbfbc510e9d0632df68046856997d963823247f2 +size 4999347808 diff --git a/model-00009-of-00044.safetensors b/model-00009-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f7a877bc61b026a37bdfabdb8869afbc3224f096 --- /dev/null +++ b/model-00009-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bad915b1c9c1b10b018201638e23d30311b4477acc19ddda4b8988a0b453c98 +size 4999347808 diff --git a/model-00010-of-00044.safetensors b/model-00010-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..448fd7ca5a5a9629799f025d2e87f0cfd42fe10f --- /dev/null +++ b/model-00010-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cc2731b43889d9a9823dd46c4e5dce2c1fe35bdba940b8eae7760294850d245 +size 4999347800 diff --git a/model-00011-of-00044.safetensors b/model-00011-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bec9ce0f454911b413318aef656cee12f72f6e8c --- /dev/null +++ b/model-00011-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edd5c2ec5d66baececa953acabcf2549e741a8abf61f97fdc73133fd46511a4d +size 4999347808 diff --git a/model-00012-of-00044.safetensors b/model-00012-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a0e6e49755fcec2991666451f015fae1457f0d85 --- /dev/null +++ b/model-00012-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d128aa164e063854b3814028c3a5704c42c29586b1d33ea10f9bde9624a850f3 +size 4999347928 diff --git a/model-00013-of-00044.safetensors b/model-00013-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..183127c411ec32bdfa5d2b2e53f8de9a3a43f049 --- /dev/null +++ b/model-00013-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:226113283ed63c1d8530344050ca4d5cf38a8168cf76659bbf5bc23cd94b9397 +size 4999347960 diff --git a/model-00014-of-00044.safetensors b/model-00014-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6844840c10b310c96e6ea90b4808b3be96fdf98f --- /dev/null +++ b/model-00014-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10c4b6361fcdce16836b616e450b89ab87445d822d658b460e464693530f3ac4 +size 4999348024 diff --git a/model-00015-of-00044.safetensors b/model-00015-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2c201a9b0f0caef57f19c1d520b0e19000e18236 --- /dev/null +++ b/model-00015-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:874ac3e7c39442f673167112e4165889a36d43784e5dc8084c5a51a31d66f4b0 +size 4999348192 diff --git a/model-00016-of-00044.safetensors b/model-00016-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e664af4d6aa0028c0f36e4fb47af4b1be7997ac0 --- /dev/null +++ b/model-00016-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e65a597176392b9032587a4e7ef34a7d65b0fa1dbbfefe44183149c70c14b1d0 +size 4965614944 diff --git a/model-00017-of-00044.safetensors b/model-00017-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7c23520f9f99e2e24c704421963d4c85442f3ec --- /dev/null +++ b/model-00017-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e00c00604a5901ee25157c5776e043ed95e54978cabc9386f94fee73220c5faa +size 4969211856 diff --git a/model-00018-of-00044.safetensors b/model-00018-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..19de9c4b4eb03020509728ea68617075ab9b319e --- /dev/null +++ b/model-00018-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e89947bcc9d0b5e88abae04aeee6e88910f10c12fe266f2e070261e358651a8f +size 4996877712 diff --git a/model-00019-of-00044.safetensors b/model-00019-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..be9a519e8e30f4ddbb311a7c7b2663a42db77206 --- /dev/null +++ b/model-00019-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ed602dc348768afd1aa68d4438f92a6fc92c7cfca0e18c5e5040e56329b0964 +size 4999347728 diff --git a/model-00020-of-00044.safetensors b/model-00020-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a2027d512a6ad430870730298a7824999f9fb7a2 --- /dev/null +++ b/model-00020-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26422eb5989fe86dc3cda8d8d6f93451c6699b0b8578198da7ba22bdeb25db08 +size 4999347808 diff --git a/model-00021-of-00044.safetensors b/model-00021-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20a2f964bf6071e26ee2a1033deaad1ca9a33abe --- /dev/null +++ b/model-00021-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b98767c7869041f2b958e80508cebc9fc389ac5ec3b2af247e29231b01340e9 +size 4999347800 diff --git a/model-00022-of-00044.safetensors b/model-00022-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b65cce94fd63f6b51bfbf76402911a50c057d289 --- /dev/null +++ b/model-00022-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae0eacc27afb4ae1b2e212a89e45c65c1aff0a5601c856679d6c25fba0729868 +size 4999347808 diff --git a/model-00023-of-00044.safetensors b/model-00023-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..563b2c901f733c4c345a73729185471ba436f98d --- /dev/null +++ b/model-00023-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:325914f55b6e2cf3a367029e36099c022e740a2169ca9f83dfd02c33d109884d +size 4999347808 diff --git a/model-00024-of-00044.safetensors b/model-00024-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..85645a47a6fdcb046b79a534aba92eafc3edc9a2 --- /dev/null +++ b/model-00024-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cefbf420e6b3d106e3d80686eb0dd156fd7b6ade8b0307272593b31c7977cc8 +size 4999347800 diff --git a/model-00025-of-00044.safetensors b/model-00025-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e377b4641d8d747f154434babab2a3c0e49f9f40 --- /dev/null +++ b/model-00025-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33f42893a9a0b43b88ac65f25d3dfbf9d067a2b7be0d4ebd8ea73d6415d566bd +size 4999347808 diff --git a/model-00026-of-00044.safetensors b/model-00026-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2a5dbc5a80486bcac25fe71eada8aec7b7d4b8f9 --- /dev/null +++ b/model-00026-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0b6d3db0225511e9f28f62132b0c5cd624348eb2624c54b955b20c731162926 +size 4999347928 diff --git a/model-00027-of-00044.safetensors b/model-00027-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1dbec8571ffc83aa5481b9668586247586446799 --- /dev/null +++ b/model-00027-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e01ef81166589cd5ac604a577ff2197561f21da778a98b2bcd1519be161a950 +size 4999347960 diff --git a/model-00028-of-00044.safetensors b/model-00028-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cffb401f5d3a455d865d443fc79e5f1a7b5c6768 --- /dev/null +++ b/model-00028-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa2dcddfdefff92fa6ca4dd2c06b83bf2bb04d41faa9fc93251906b9b0a59b3d +size 4999348024 diff --git a/model-00029-of-00044.safetensors b/model-00029-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71e2f3ee740124c64cdc5d0d5d956454f0051dc9 --- /dev/null +++ b/model-00029-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c85b6b9961a65aeeb1e6158c0693db0dfe8775d162714ed21f30b9622a6af9fc +size 4999348192 diff --git a/model-00030-of-00044.safetensors b/model-00030-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eeb18cf1e771921dfbb398bec468e79f46fdb550 --- /dev/null +++ b/model-00030-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dea0b184987260b031107e63c8478d899edd98cfa7c0c2262ef3c560a95a0b4 +size 4965614944 diff --git a/model-00031-of-00044.safetensors b/model-00031-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e55817e05f55fc1c69611fc499e310bdd8b937fe --- /dev/null +++ b/model-00031-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:959858266ed17686b4635b30b417e3939d49ea4afe85d98c7d195fbee953c213 +size 4969211856 diff --git a/model-00032-of-00044.safetensors b/model-00032-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..791bd5a4354fa6c91481175e5c622a9ac6800abf --- /dev/null +++ b/model-00032-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08b04b59da6879b1aaa3bcc60c6dfc56192342859d3f818ea6a0870c75559f3e +size 4996877712 diff --git a/model-00033-of-00044.safetensors b/model-00033-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..29bc7a8accd674a07c4867ce0459c6abee898187 --- /dev/null +++ b/model-00033-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25c61a2ef4b910109136d3319d900eeb158e7f61ad34ee7ba630d60ee3ee51c3 +size 4999347728 diff --git a/model-00034-of-00044.safetensors b/model-00034-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..606f0f9af44daf0693175e37a44c4cf48a56a4e2 --- /dev/null +++ b/model-00034-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2b864c46e32818810e13b03956baf304fd5cb0ae38e9fd17b81a3c316b878d1 +size 4999347808 diff --git a/model-00035-of-00044.safetensors b/model-00035-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df0d9de7a48a9967792aabde79f9a27bcdb18e6f --- /dev/null +++ b/model-00035-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:242c936229d08e9e8df91810c291fe29ec85f57edd505bd3bc641b4e2c0d2570 +size 4999347800 diff --git a/model-00036-of-00044.safetensors b/model-00036-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..64fb3befc2da63a076df3ab46eabe2f3caf2c585 --- /dev/null +++ b/model-00036-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a179f3784b91b87e83eb66a6fa2b850e6edd623df8b601064e5fba8a9310db27 +size 4999347808 diff --git a/model-00037-of-00044.safetensors b/model-00037-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8b6c3ccd0e71eecf57d008b4533d2e10bf78ff90 --- /dev/null +++ b/model-00037-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:796e3f3783088396453ea0d4f3768537734994d76f275b78786da789fcf97d23 +size 4999347808 diff --git a/model-00038-of-00044.safetensors b/model-00038-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5fbeafecf7ec4cbc5a495994c5dbf5458375466a --- /dev/null +++ b/model-00038-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d20e6b09a57371777ddf2b57278935b0ef7e0c8566eef8474056f9e7e456f093 +size 4999347800 diff --git a/model-00039-of-00044.safetensors b/model-00039-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b5f5f88b2eece5e1c4a55fc84ad12d7545ad9ce3 --- /dev/null +++ b/model-00039-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee800faa6c132d69c1270c6e60a778a4c08322b3d8ae244c0e1722ed69deca2c +size 4999347808 diff --git a/model-00040-of-00044.safetensors b/model-00040-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d716f4b5cfee009ff84808cd23bac80d2eb3dff0 --- /dev/null +++ b/model-00040-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0dbbba1d224183fa2e76a94ad8b2b7cd615a5a8c8930275d1e82b7de21bb31c +size 4999347928 diff --git a/model-00041-of-00044.safetensors b/model-00041-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f78c770e81cb93106321f076112e51c5238c752c --- /dev/null +++ b/model-00041-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:196df17946086145f36e302ce5c7fc36ad74ce1b316980f12b3088dd9aca1bcb +size 4999347960 diff --git a/model-00042-of-00044.safetensors b/model-00042-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4ab5f54492157efc8e76e04e6bb74f579fd3cadb --- /dev/null +++ b/model-00042-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a687e235a38c2a4f45cace4a2e4cca3815610c3a285c911c55ac1c1572116e49 +size 4999348024 diff --git a/model-00043-of-00044.safetensors b/model-00043-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e21ef0e4ba4cd773cc1e6d04d6488b046d3c8755 --- /dev/null +++ b/model-00043-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0ff36e81a17536c9c0612d39281ad8d05e070fa1e142e013c088dfe83a5149b +size 4999348192 diff --git a/model-00044-of-00044.safetensors b/model-00044-of-00044.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e330ff08118e7eb359a6aa957fb9715027f5926e --- /dev/null +++ b/model-00044-of-00044.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fabd3e7e639765b857ec9004f23fe1322812d45b5efaeacbee164dc8b35096a7 +size 4106327920 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..acb34ea0e1e6f4c42bfaac16e950a734ec57b9dd --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46b6de452023890532d59e0ec6b8570b5ed9bc7a130c0f105debed443d576bfd +size 16547896 diff --git a/mtp.safetensors b/mtp.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..338451fe52e9417e8811679d614cb27315a1723e --- /dev/null +++ b/mtp.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8ad4e6152da40142d21eb935fea628c500d7529508c86c3c01c1dbce0f34d1a +size 11079987712 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..7645faed5973c324a314e6c1157a946a960f284c --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,34 @@ +{ + "additional_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>", + "<|begin_of_audio|>", + "<|end_of_audio|>", + "<|begin_of_transcription|>", + "<|end_of_transcription|>", + "<|code_prefix|>", + "<|code_middle|>", + "<|code_suffix|>", + "/nothink" + ], + "eos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|endoftext|>" +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..e3ed3c66baf1ec4de61840b0abf02142687bfed8 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bda8e2146c3bb7b7e0fc96dcc4f0aeff041c6c27952e3ace0665663ebff346ba +size 19970700 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..75e11cfb2e0cc09f19391ec2278b4825a4c3fae9 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,325 @@ +{ + "added_tokens_decoder": { + "151329": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151330": { + "content": "[MASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151331": { + "content": "[gMASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151332": { + "content": "[sMASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151333": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151334": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151335": { + "content": "<|system|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151336": { + "content": "<|user|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151337": { + "content": "<|assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151338": { + "content": "<|observation|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151339": { + "content": "<|begin_of_image|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151340": { + "content": "<|end_of_image|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151341": { + "content": "<|begin_of_video|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151342": { + "content": "<|end_of_video|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151343": { + "content": "<|begin_of_audio|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151344": { + "content": "<|end_of_audio|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151345": { + "content": "<|begin_of_transcription|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151346": { + "content": "<|end_of_transcription|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151347": { + "content": "<|code_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151348": { + "content": "<|code_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151349": { + "content": "<|code_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151350": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151351": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151352": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151353": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151354": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151355": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151356": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151357": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151358": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151359": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151360": { + "content": "/nothink", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151361": { + "content": "<|begin_of_box|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151362": { + "content": "<|end_of_box|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151363": { + "content": "<|image|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151364": { + "content": "<|video|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>", + "<|begin_of_audio|>", + "<|end_of_audio|>", + "<|begin_of_transcription|>", + "<|end_of_transcription|>", + "<|code_prefix|>", + "<|code_middle|>", + "<|code_suffix|>", + "/nothink" + ], + "clean_up_tokenization_spaces": false, + "do_lower_case": false, + "eos_token": "<|endoftext|>", + "extra_special_tokens": {}, + "model_max_length": 128000, + "pad_token": "<|endoftext|>", + "padding_side": "left", + "remove_space": false, + "tokenizer_class": "PreTrainedTokenizerFast" +}