cloud19 commited on
Commit
7645120
·
verified ·
1 Parent(s): 2359572

Add TRT-LLM converted checkpoint (ready for build)

Browse files
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ trtllm_checkpoint/tokenizer.json filter=lfs diff=lfs merge=lfs -text
trtllm_checkpoint/chat_template.jinja ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] | trim + '
2
+
3
+ ' %}{% set messages = messages[1:] %}{% else %}{% set system_message = '' %}{% endif %}{{- bos_token + system_message}}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST]' + message['content'] | trim + '[/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] | trim + eos_token }}{% endif %}{% endfor %}
trtllm_checkpoint/config.json ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "producer": {
3
+ "name": "modelopt",
4
+ "version": "0.37.0"
5
+ },
6
+ "architecture": "MistralForCausalLM",
7
+ "dtype": "bfloat16",
8
+ "logits_dtype": "float16",
9
+ "num_hidden_layers": 40,
10
+ "num_attention_heads": 32,
11
+ "num_key_value_heads": 8,
12
+ "hidden_size": 5120,
13
+ "norm_epsilon": 1e-05,
14
+ "vocab_size": 131072,
15
+ "max_position_embeddings": 1024000,
16
+ "hidden_act": "silu",
17
+ "use_parallel_embedding": true,
18
+ "embedding_sharding_dim": 0,
19
+ "head_size": 128,
20
+ "intermediate_size": 14336,
21
+ "position_embedding_type": "rope_gpt_neox",
22
+ "share_embedding_table": false,
23
+ "residual_mlp": false,
24
+ "bias": false,
25
+ "rotary_pct": 1.0,
26
+ "rank": 0,
27
+ "decoder": "mistral",
28
+ "rmsnorm": true,
29
+ "lm_head_bias": false,
30
+ "mlp_bias": false,
31
+ "attn_bias": false,
32
+ "rotary_base": 1000000.0,
33
+ "rotary_scaling": null,
34
+ "disable_weight_only_quant_plugin": false,
35
+ "moe": {
36
+ "num_experts": 0,
37
+ "shared_expert_intermediate_size": 0,
38
+ "top_k": 0,
39
+ "normalization_mode": null,
40
+ "sparse_mixer_epsilon": 0.01,
41
+ "tp_mode": 0,
42
+ "device_limited_n_group": 0,
43
+ "device_limited_topk_group": 0,
44
+ "device_limited_routed_scaling_factor": 1.0
45
+ },
46
+ "remove_duplicated_kv_heads": false,
47
+ "fc_after_embed": false,
48
+ "use_input_layernorm_in_first_layer": true,
49
+ "use_last_layernorm": true,
50
+ "layer_idx_offset": 0,
51
+ "embedding_multiplier": 1.0,
52
+ "attention_multiplier": 1.0,
53
+ "residual_multiplier": 1.0,
54
+ "output_multiplier_scale": 1.0,
55
+ "has_partial_lora_mask": false,
56
+ "runtime_defaults": null,
57
+ "mapping": {
58
+ "world_size": 1,
59
+ "gpus_per_node": 8,
60
+ "cp_size": 1,
61
+ "tp_size": 1,
62
+ "pp_size": 1,
63
+ "moe_tp_size": 1,
64
+ "moe_cluster_size": 1,
65
+ "moe_ep_size": 1,
66
+ "attn_tp_size": 1,
67
+ "attn_cp_size": 1,
68
+ "cp_config": {},
69
+ "auto_parallel": false,
70
+ "enable_attention_dp": false,
71
+ "enable_lm_head_tp_in_adp": false
72
+ },
73
+ "quantization": {
74
+ "quant_algo": "NVFP4",
75
+ "kv_cache_quant_algo": null,
76
+ "group_size": 16,
77
+ "smoothquant_val": 0.5,
78
+ "clamp_val": null,
79
+ "use_meta_recipe": false,
80
+ "has_zero_point": false,
81
+ "pre_quant_scale": false,
82
+ "exclude_modules": [
83
+ "lm_head"
84
+ ],
85
+ "mamba_ssm_cache_dtype": null
86
+ },
87
+ "qk_layernorm": false,
88
+ "rotary_embedding_dim": 128,
89
+ "tie_word_embeddings": false
90
+ }
trtllm_checkpoint/rank0.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:464bcb3334684b3061d00c016b267ce129f72c11a103ad7569b9969bd8e9c0cc
3
+ size 8819460504
trtllm_checkpoint/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
trtllm_checkpoint/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8771d3c14b4206fd6b54c60d037eeafce2f37382046c2369c36cd2edd2f099d7
3
+ size 17078391
trtllm_checkpoint/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff