Upload 5 files
Browse files- chat_template.jinja +24 -0
- config.json +68 -0
- generation_config.json +11 -0
- tokenizer.model +3 -0
- tokenizer_config.json +0 -0
chat_template.jinja
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- if not add_generation_prompt is defined -%}
|
| 2 |
+
{%- set add_generation_prompt = true -%}
|
| 3 |
+
{%- endif -%}
|
| 4 |
+
{%- if not cls_token is defined -%}
|
| 5 |
+
{%- set cls_token = "<|begin_of_sentence|>" -%}
|
| 6 |
+
{%- endif -%}
|
| 7 |
+
{%- if not sep_token is defined -%}
|
| 8 |
+
{%- set sep_token = "<|end_of_sentence|>" -%}
|
| 9 |
+
{%- endif -%}
|
| 10 |
+
{{- cls_token -}}
|
| 11 |
+
{%- for message in messages -%}
|
| 12 |
+
{%- if message["role"] == "user" -%}
|
| 13 |
+
{{- "User: " + message["content"] + "
|
| 14 |
+
" -}}
|
| 15 |
+
{%- elif message["role"] == "assistant" -%}
|
| 16 |
+
{{- "Assistant: " + message["content"] + sep_token -}}
|
| 17 |
+
{%- elif message["role"] == "system" -%}
|
| 18 |
+
{{- message["content"] + "
|
| 19 |
+
" -}}
|
| 20 |
+
{%- endif -%}
|
| 21 |
+
{%- endfor -%}
|
| 22 |
+
{%- if add_generation_prompt -%}
|
| 23 |
+
{{- "Assistant: " -}}
|
| 24 |
+
{%- endif -%}
|
config.json
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_tail_layers": false,
|
| 3 |
+
"architectures": [
|
| 4 |
+
"Ernie4_5_ForCausalLM"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.0,
|
| 7 |
+
"bos_token_id": 1,
|
| 8 |
+
"compression_ratio": 1.0,
|
| 9 |
+
"dtype": "bfloat16",
|
| 10 |
+
"enable_mtp_magic_send": false,
|
| 11 |
+
"eos_token_id": 2,
|
| 12 |
+
"fuse_gate_detach_matmul": true,
|
| 13 |
+
"fuse_linear": false,
|
| 14 |
+
"fuse_ln": false,
|
| 15 |
+
"fuse_rms_norm": true,
|
| 16 |
+
"fuse_rope": true,
|
| 17 |
+
"fuse_swiglu": true,
|
| 18 |
+
"global_aux_loss": false,
|
| 19 |
+
"head_dim": 128,
|
| 20 |
+
"hidden_act": "silu",
|
| 21 |
+
"hidden_dropout_prob": 0.0,
|
| 22 |
+
"hidden_size": 1024,
|
| 23 |
+
"ignored_index": -100,
|
| 24 |
+
"initializer_range": 0.018041293779826325,
|
| 25 |
+
"intermediate_size": 3072,
|
| 26 |
+
"max_position_embeddings": 131072,
|
| 27 |
+
"model_type": "ernie4_5",
|
| 28 |
+
"moe_aux_loss_lambda": 1e-05,
|
| 29 |
+
"moe_capacity": [],
|
| 30 |
+
"moe_dense_experts_token_type_id": 3,
|
| 31 |
+
"moe_gate": "topk",
|
| 32 |
+
"moe_gate_act": "softmax",
|
| 33 |
+
"moe_group_experts": false,
|
| 34 |
+
"moe_group_orthogonal_loss": true,
|
| 35 |
+
"moe_intermediate_size": 0,
|
| 36 |
+
"moe_k": 2,
|
| 37 |
+
"moe_layer_end_index": 17,
|
| 38 |
+
"moe_layer_interval": 2,
|
| 39 |
+
"moe_layer_start_index": 0,
|
| 40 |
+
"moe_norm_gate_logits": true,
|
| 41 |
+
"moe_num_experts": null,
|
| 42 |
+
"moe_num_shared_experts": 0,
|
| 43 |
+
"moe_orthogonal_loss_lambda": 0.0,
|
| 44 |
+
"moe_reverse_token_drop": false,
|
| 45 |
+
"moe_use_aux_free": false,
|
| 46 |
+
"moe_use_hard_gate": false,
|
| 47 |
+
"moe_z_loss_lambda": 0.0,
|
| 48 |
+
"multi_token_pred_lambda": 0.3,
|
| 49 |
+
"num_attention_heads": 16,
|
| 50 |
+
"num_hidden_layers": 18,
|
| 51 |
+
"num_key_value_heads": 2,
|
| 52 |
+
"num_nextn_predict_layers": 0,
|
| 53 |
+
"pad_token_id": 0,
|
| 54 |
+
"paddleformers_version": "0.1",
|
| 55 |
+
"rms_norm_eps": 1e-05,
|
| 56 |
+
"rope_theta": 500000,
|
| 57 |
+
"sinkhorn_2gate": true,
|
| 58 |
+
"sinkhorn_temp": 0.03,
|
| 59 |
+
"tensor_parallel_degree": -1,
|
| 60 |
+
"tie_word_embeddings": true,
|
| 61 |
+
"token_balance_loss": false,
|
| 62 |
+
"token_balance_seqlen": false,
|
| 63 |
+
"use_bias": false,
|
| 64 |
+
"use_recompute_mtp": false,
|
| 65 |
+
"use_rmsnorm": true,
|
| 66 |
+
"vocab_size": 103424,
|
| 67 |
+
"weight_share_add_bias": true
|
| 68 |
+
}
|
generation_config.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_sample": true,
|
| 3 |
+
"top_p": 0.8,
|
| 4 |
+
"temperature": 0.8,
|
| 5 |
+
"bos_token_id": 1,
|
| 6 |
+
"eos_token_id": 2,
|
| 7 |
+
"pad_token_id": 0,
|
| 8 |
+
"repetition_penalty": 1.0,
|
| 9 |
+
"frequency_penalty": 0.0,
|
| 10 |
+
"presence_penalty": 0.0
|
| 11 |
+
}
|
tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34ef7db83df785924fb83d7b887b6e822a031c56e15cff40aaf9b982988180df
|
| 3 |
+
size 1614363
|
tokenizer_config.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|