Dasuperhub commited on
Commit
e710a90
·
verified ·
1 Parent(s): 79b5e8b

Soussou v2

Browse files
mlc-chat-config.json CHANGED
@@ -10,31 +10,31 @@
10
  "num_attention_heads": 16,
11
  "num_hidden_layers": 28,
12
  "num_key_value_heads": 8,
13
- "rms_norm_eps": 9.999999974752427e-07,
14
- "rope_theta": 1000000.0,
15
  "vocab_size": 151936,
16
  "tie_word_embeddings": true,
17
- "context_window_size": 4096,
18
- "prefill_chunk_size": 1024,
19
  "tensor_parallel_shards": 1,
20
  "head_dim": 128,
21
- "dtype": "float32",
22
  "max_batch_size": 128,
23
  "weight_block_size": null
24
  },
25
  "vocab_size": 151936,
26
- "context_window_size": 4096,
27
  "sliding_window_size": -1,
28
- "prefill_chunk_size": 1024,
29
  "attention_sink_size": -1,
30
  "tensor_parallel_shards": 1,
31
  "pipeline_parallel_stages": 1,
32
  "active_vocab_size": 151669,
33
- "temperature": 1.0,
34
  "presence_penalty": 0.0,
35
  "frequency_penalty": 0.0,
36
  "repetition_penalty": 1.0,
37
- "top_p": 1.0,
38
  "tokenizer_files": [
39
  "tokenizer.json",
40
  "vocab.json",
@@ -48,9 +48,9 @@
48
  "strip_space_in_decode": false
49
  },
50
  "conv_template": {
51
- "name": "qwen2",
52
  "system_template": "<|im_start|>system\n{system_message}<|im_end|>\n",
53
- "system_message": "You are a helpful assistant.",
54
  "system_prefix_token_ids": null,
55
  "add_role_after_system_message": true,
56
  "roles": {
@@ -69,17 +69,18 @@
69
  "role_content_sep": "\n",
70
  "role_empty_sep": "\n",
71
  "stop_str": [
72
- "<|endoftext|>",
73
  "<|im_end|>"
74
  ],
75
  "stop_token_ids": [
76
- 151643,
77
- 151645
78
  ],
79
  "function_string": "",
80
  "use_function_calling": false
81
  },
82
- "pad_token_id": 151654,
83
- "bos_token_id": 1,
84
- "eos_token_id": 151645
 
 
 
85
  }
 
10
  "num_attention_heads": 16,
11
  "num_hidden_layers": 28,
12
  "num_key_value_heads": 8,
13
+ "rms_norm_eps": 1e-06,
14
+ "rope_theta": 1000000,
15
  "vocab_size": 151936,
16
  "tie_word_embeddings": true,
17
+ "context_window_size": 2048,
18
+ "prefill_chunk_size": 2048,
19
  "tensor_parallel_shards": 1,
20
  "head_dim": 128,
21
+ "dtype": "float16",
22
  "max_batch_size": 128,
23
  "weight_block_size": null
24
  },
25
  "vocab_size": 151936,
26
+ "context_window_size": 2048,
27
  "sliding_window_size": -1,
28
+ "prefill_chunk_size": 2048,
29
  "attention_sink_size": -1,
30
  "tensor_parallel_shards": 1,
31
  "pipeline_parallel_stages": 1,
32
  "active_vocab_size": 151669,
33
+ "temperature": 0.6,
34
  "presence_penalty": 0.0,
35
  "frequency_penalty": 0.0,
36
  "repetition_penalty": 1.0,
37
+ "top_p": 0.95,
38
  "tokenizer_files": [
39
  "tokenizer.json",
40
  "vocab.json",
 
48
  "strip_space_in_decode": false
49
  },
50
  "conv_template": {
51
+ "name": "chatml",
52
  "system_template": "<|im_start|>system\n{system_message}<|im_end|>\n",
53
+ "system_message": "A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.",
54
  "system_prefix_token_ids": null,
55
  "add_role_after_system_message": true,
56
  "roles": {
 
69
  "role_content_sep": "\n",
70
  "role_empty_sep": "\n",
71
  "stop_str": [
 
72
  "<|im_end|>"
73
  ],
74
  "stop_token_ids": [
75
+ 2
 
76
  ],
77
  "function_string": "",
78
  "use_function_calling": false
79
  },
80
+ "pad_token_id": 151643,
81
+ "bos_token_id": 151643,
82
+ "eos_token_id": [
83
+ 151645,
84
+ 151643
85
+ ]
86
  }
params_shard_0.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:807e91704ca3f7202f2166e9400c0d099403c965859c21f7a49143159cda5060
3
  size 77791232
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:647af449534630e6845b04a4a8536c37e2f6958ffe897aeef39a3c049e7027e0
3
  size 77791232
params_shard_1.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e6253d9e49771c27651a2e41d99c0bafe37bc1d7ec26b5ccef661a23d11f587
3
  size 32740608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ff72fc9490bcc53cd30a37a782a71c9cd97288e1a2a0da9379292dfda74fc0b
3
  size 32740608
params_shard_2.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d96dd9ae8fe652516669bfc46ac1b80b809811983e9a56d2153215119a567a0
3
  size 31866624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91130cf45c2caeb70924f8dbfbdd8de0bfd3d9fd9b2b9218a26d2b8d8ef6e5ee
3
  size 31866624
params_shard_3.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e48ffc5ab79c161b5ab4c9043651a4bcdcfc620d36123867c320a920bcea1b80
3
  size 33505024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81216a1d537e0a18bc175440ba1743976946e2d0bd451f538dea18b6f90d0d9b
3
  size 33505024
params_shard_4.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63d5c2655a3d793f79422e5073dc6c1120297431e0aa3fa8c06dba4c4780f97f
3
  size 32000000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9f0736791e1f8a1342d0b64ddd3d90e5e152a3593386f2f97f4dd7df91bbef3
3
  size 32000000
params_shard_5.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e61269a3ea4128730394356fee28ca088cc4e1af2ade5357397598c0ba00596b
3
  size 31866624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a2e3193fdd1613db01d9dda9efa00d1bc1c752e60507d6e25520422dbbc0da1
3
  size 31866624
params_shard_6.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22ac02543c849a953310c17b358851fa3957317a183dbf7e958d90339cdb3897
3
  size 33505024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08c5340fb0b8c35492ca8a99334a5cd34e1b4992f8d6bc9c8232c4254584e152
3
  size 33505024
params_shard_7.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e91eb9269f01921f3406ecbfbad3d7d1e1d6d81d612d7b75611e807db0ea000b
3
  size 32000000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b74d8cf1f7774327e4a25729f116741a9ba1bd279629c3941310d975e19ab23
3
  size 32000000
params_shard_8.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67b734a36f2e3ce330537b7422f346e4447c88b1d361e4515bf4a2b93336f19b
3
  size 30097152
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9192b74f36c39b4501b7230da15b84ef176b60463bf34ec27b42fecee617c03
3
  size 30097152
tensor-cache.json CHANGED
@@ -22,7 +22,7 @@
22
  "byteOffset": 0
23
  }
24
  ],
25
- "md5sum": "429401a184d8234c32a93e952c5a258d"
26
  },
27
  {
28
  "dataPath": "params_shard_1.bin",
@@ -371,7 +371,7 @@
371
  "byteOffset": 32740352
372
  }
373
  ],
374
- "md5sum": "d4af3fa314e39230f409de3592410b36"
375
  },
376
  {
377
  "dataPath": "params_shard_2.bin",
@@ -849,7 +849,7 @@
849
  "byteOffset": 31670016
850
  }
851
  ],
852
- "md5sum": "3145759e5e2fd359fc93295190b6825f"
853
  },
854
  {
855
  "dataPath": "params_shard_3.bin",
@@ -1316,7 +1316,7 @@
1316
  "byteOffset": 32456448
1317
  }
1318
  ],
1319
- "md5sum": "a6d13ad9a22333f43f4aae688ef37246"
1320
  },
1321
  {
1322
  "dataPath": "params_shard_4.bin",
@@ -1803,7 +1803,7 @@
1803
  "byteOffset": 31999744
1804
  }
1805
  ],
1806
- "md5sum": "9c65acce9affb7954a325f7ce4889170"
1807
  },
1808
  {
1809
  "dataPath": "params_shard_5.bin",
@@ -2281,7 +2281,7 @@
2281
  "byteOffset": 31670016
2282
  }
2283
  ],
2284
- "md5sum": "0c5f7533bde5bdd10feb650f4e9bca51"
2285
  },
2286
  {
2287
  "dataPath": "params_shard_6.bin",
@@ -2748,7 +2748,7 @@
2748
  "byteOffset": 32456448
2749
  }
2750
  ],
2751
- "md5sum": "ad4d145aaceee10dbae50f42b47b1004"
2752
  },
2753
  {
2754
  "dataPath": "params_shard_7.bin",
@@ -3235,7 +3235,7 @@
3235
  "byteOffset": 31999744
3236
  }
3237
  ],
3238
- "md5sum": "f06fa646a3bdde00f0402981d6a4fb12"
3239
  },
3240
  {
3241
  "dataPath": "params_shard_8.bin",
@@ -3691,7 +3691,7 @@
3691
  "byteOffset": 30095104
3692
  }
3693
  ],
3694
- "md5sum": "a8cea4efe95302d934dfbd14695842ab"
3695
  }
3696
  ]
3697
  }
 
22
  "byteOffset": 0
23
  }
24
  ],
25
+ "md5sum": "8f172d4f1858833a0911bda0af198120"
26
  },
27
  {
28
  "dataPath": "params_shard_1.bin",
 
371
  "byteOffset": 32740352
372
  }
373
  ],
374
+ "md5sum": "a556c7f021cd6f062425e1f3e9b9a5fe"
375
  },
376
  {
377
  "dataPath": "params_shard_2.bin",
 
849
  "byteOffset": 31670016
850
  }
851
  ],
852
+ "md5sum": "98e85e3d6a849d08935f711122e81d38"
853
  },
854
  {
855
  "dataPath": "params_shard_3.bin",
 
1316
  "byteOffset": 32456448
1317
  }
1318
  ],
1319
+ "md5sum": "48a1e492f4d57541166382a94fbc3a82"
1320
  },
1321
  {
1322
  "dataPath": "params_shard_4.bin",
 
1803
  "byteOffset": 31999744
1804
  }
1805
  ],
1806
+ "md5sum": "d2003b8056ec7df7175d0e145e967247"
1807
  },
1808
  {
1809
  "dataPath": "params_shard_5.bin",
 
2281
  "byteOffset": 31670016
2282
  }
2283
  ],
2284
+ "md5sum": "b6dc0e96e3cad740c88b100d36c71176"
2285
  },
2286
  {
2287
  "dataPath": "params_shard_6.bin",
 
2748
  "byteOffset": 32456448
2749
  }
2750
  ],
2751
+ "md5sum": "034365735675813486b117a86a302bd8"
2752
  },
2753
  {
2754
  "dataPath": "params_shard_7.bin",
 
3235
  "byteOffset": 31999744
3236
  }
3237
  ],
3238
+ "md5sum": "b7a4ceb7c6bee651598c2c10a2ec1daa"
3239
  },
3240
  {
3241
  "dataPath": "params_shard_8.bin",
 
3691
  "byteOffset": 30095104
3692
  }
3693
  ],
3694
+ "md5sum": "1c195e2831e9ac36402b9b4b68d04729"
3695
  }
3696
  ]
3697
  }