Dasuperhub commited on
Commit
9cb9f0e
·
verified ·
1 Parent(s): e50d446

Guinius Giro 002.14.26 — 11,950 examples, fresh Qwen3-0.6B, mega engine v3

Browse files
mlc-chat-config.json CHANGED
@@ -14,7 +14,7 @@
14
  "rope_theta": 1000000,
15
  "vocab_size": 151936,
16
  "tie_word_embeddings": true,
17
- "context_window_size": 2048,
18
  "prefill_chunk_size": 2048,
19
  "tensor_parallel_shards": 1,
20
  "head_dim": 128,
@@ -23,7 +23,7 @@
23
  "weight_block_size": null
24
  },
25
  "vocab_size": 151936,
26
- "context_window_size": 2048,
27
  "sliding_window_size": -1,
28
  "prefill_chunk_size": 2048,
29
  "attention_sink_size": -1,
@@ -48,9 +48,9 @@
48
  "strip_space_in_decode": false
49
  },
50
  "conv_template": {
51
- "name": "chatml",
52
  "system_template": "<|im_start|>system\n{system_message}<|im_end|>\n",
53
- "system_message": "A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.",
54
  "system_prefix_token_ids": null,
55
  "add_role_after_system_message": true,
56
  "roles": {
@@ -69,10 +69,12 @@
69
  "role_content_sep": "\n",
70
  "role_empty_sep": "\n",
71
  "stop_str": [
 
72
  "<|im_end|>"
73
  ],
74
  "stop_token_ids": [
75
- 2
 
76
  ],
77
  "function_string": "",
78
  "use_function_calling": false
 
14
  "rope_theta": 1000000,
15
  "vocab_size": 151936,
16
  "tie_word_embeddings": true,
17
+ "context_window_size": 40960,
18
  "prefill_chunk_size": 2048,
19
  "tensor_parallel_shards": 1,
20
  "head_dim": 128,
 
23
  "weight_block_size": null
24
  },
25
  "vocab_size": 151936,
26
+ "context_window_size": 40960,
27
  "sliding_window_size": -1,
28
  "prefill_chunk_size": 2048,
29
  "attention_sink_size": -1,
 
48
  "strip_space_in_decode": false
49
  },
50
  "conv_template": {
51
+ "name": "qwen2",
52
  "system_template": "<|im_start|>system\n{system_message}<|im_end|>\n",
53
+ "system_message": "You are a helpful assistant.",
54
  "system_prefix_token_ids": null,
55
  "add_role_after_system_message": true,
56
  "roles": {
 
69
  "role_content_sep": "\n",
70
  "role_empty_sep": "\n",
71
  "stop_str": [
72
+ "<|endoftext|>",
73
  "<|im_end|>"
74
  ],
75
  "stop_token_ids": [
76
+ 151643,
77
+ 151645
78
  ],
79
  "function_string": "",
80
  "use_function_calling": false
params_shard_1.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02f9c200445b294d1a3240a016878b3d1c350531bbdaa54234ae59a28a423f7c
3
  size 32740608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d22ca55dfddcb1c72d72448e898be381acbdf7a98f91e83494deb9ea2f16532d
3
  size 32740608
params_shard_2.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63c1bb2bd3e8a1c6438dd805053b8b88ead73242172bb9e58907571445e0d3ae
3
  size 31866624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:220d64af94f86f942f406ce100ae8aafc23ae19d5a24b52b58956e7616bef8f1
3
  size 31866624
params_shard_3.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8438278c7a6dce7501d24782435c464c2bfe76d630eea096e653346c6e3d47d
3
  size 33505024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:365cfc4b4e943bc48e8221e0fa51109d33c1c5eb139bc5f65dd89ec58a8f9d3f
3
  size 33505024
params_shard_4.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a707ffcd1d7a4d79bc93f33575c0e84e0b1e30312f4a5c889c78721a6b7cec18
3
  size 32000000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81d0b7beee8280e60595b4088c1c954857b43f081d408aad98b2725d87c4f986
3
  size 32000000
params_shard_5.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b7027bc8f618ed4092cc4451e7e2ee6117021fc5a29617a5e94ebd473b900ca
3
  size 31866624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:326d77ac87c7cb2f1c04be82c4f9f31fffecb8acd826a64f00b93287476c8f27
3
  size 31866624
params_shard_6.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb9fa45fa933d6534ffeecf7b7d8e628443731370a1b47feadeb61920ae3e705
3
  size 33505024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fd482e2cea91a8d8b3ba0734afe1e8adb95488fe02921fa59bc6f11b87998e7
3
  size 33505024
params_shard_7.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af54a139bca844b549219f7d22672b0ee7f7b6ead37cd628bc68ef6fd0081fc1
3
  size 32000000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5008178aa4d503208cbdedcdf9a2fc4ad7486b9c64ea88b33cf0164b988c85a5
3
  size 32000000
params_shard_8.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ab0c526c003d12d3eea6f65383c486baaeef145c8c921da69004277de85e119
3
  size 30097152
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:773c7a6edcb21dd22f8b13842c9ff99e688000f80d8c2ae5b531764821213d97
3
  size 30097152
tensor-cache.json CHANGED
@@ -371,7 +371,7 @@
371
  "byteOffset": 32740352
372
  }
373
  ],
374
- "md5sum": "2a5c35e098e4b349642351a85c1159c4"
375
  },
376
  {
377
  "dataPath": "params_shard_2.bin",
@@ -849,7 +849,7 @@
849
  "byteOffset": 31670016
850
  }
851
  ],
852
- "md5sum": "0196d572e69c4eeacc5c278701ac9055"
853
  },
854
  {
855
  "dataPath": "params_shard_3.bin",
@@ -1316,7 +1316,7 @@
1316
  "byteOffset": 32456448
1317
  }
1318
  ],
1319
- "md5sum": "2718bb150cfe7427bd7e1ebc0d7f271f"
1320
  },
1321
  {
1322
  "dataPath": "params_shard_4.bin",
@@ -1803,7 +1803,7 @@
1803
  "byteOffset": 31999744
1804
  }
1805
  ],
1806
- "md5sum": "bca8507cfb9c4237cb29eeb0e8312751"
1807
  },
1808
  {
1809
  "dataPath": "params_shard_5.bin",
@@ -2281,7 +2281,7 @@
2281
  "byteOffset": 31670016
2282
  }
2283
  ],
2284
- "md5sum": "20b3f87dd71e1fc43784a711e7abc581"
2285
  },
2286
  {
2287
  "dataPath": "params_shard_6.bin",
@@ -2748,7 +2748,7 @@
2748
  "byteOffset": 32456448
2749
  }
2750
  ],
2751
- "md5sum": "cdc024bc8641803155333f54fbaa57ec"
2752
  },
2753
  {
2754
  "dataPath": "params_shard_7.bin",
@@ -3235,7 +3235,7 @@
3235
  "byteOffset": 31999744
3236
  }
3237
  ],
3238
- "md5sum": "6e4cb1503a258515bdacce4e348848ba"
3239
  },
3240
  {
3241
  "dataPath": "params_shard_8.bin",
@@ -3691,7 +3691,7 @@
3691
  "byteOffset": 30095104
3692
  }
3693
  ],
3694
- "md5sum": "8aaeaeef9928133cc53701d3af235f33"
3695
  }
3696
  ]
3697
  }
 
371
  "byteOffset": 32740352
372
  }
373
  ],
374
+ "md5sum": "37a319196de8632e0350c71e79af9830"
375
  },
376
  {
377
  "dataPath": "params_shard_2.bin",
 
849
  "byteOffset": 31670016
850
  }
851
  ],
852
+ "md5sum": "5f8af7ca75856ddb27973c72dd0c6beb"
853
  },
854
  {
855
  "dataPath": "params_shard_3.bin",
 
1316
  "byteOffset": 32456448
1317
  }
1318
  ],
1319
+ "md5sum": "d25b760cebf3448975dd0e4180068fed"
1320
  },
1321
  {
1322
  "dataPath": "params_shard_4.bin",
 
1803
  "byteOffset": 31999744
1804
  }
1805
  ],
1806
+ "md5sum": "e9822aee8fd1a54f134a2f3020ce82cb"
1807
  },
1808
  {
1809
  "dataPath": "params_shard_5.bin",
 
2281
  "byteOffset": 31670016
2282
  }
2283
  ],
2284
+ "md5sum": "64e80d7e07771a6ab88eed4dcb3cd7dd"
2285
  },
2286
  {
2287
  "dataPath": "params_shard_6.bin",
 
2748
  "byteOffset": 32456448
2749
  }
2750
  ],
2751
+ "md5sum": "abe158fd2db8670885a749a747f41d1b"
2752
  },
2753
  {
2754
  "dataPath": "params_shard_7.bin",
 
3235
  "byteOffset": 31999744
3236
  }
3237
  ],
3238
+ "md5sum": "4b6290fd58a55ade50174f3d5d1e2ca6"
3239
  },
3240
  {
3241
  "dataPath": "params_shard_8.bin",
 
3691
  "byteOffset": 30095104
3692
  }
3693
  ],
3694
+ "md5sum": "53cd04b508f80194e315ad451673d67a"
3695
  }
3696
  ]
3697
  }
tokenizer_config.json CHANGED
@@ -231,8 +231,9 @@
231
  "eos_token": "<|im_end|>",
232
  "errors": "replace",
233
  "extra_special_tokens": {},
234
- "model_max_length": 131072,
235
- "pad_token": "<|endoftext|>",
 
236
  "split_special_tokens": false,
237
  "tokenizer_class": "Qwen2Tokenizer",
238
  "unk_token": null
 
231
  "eos_token": "<|im_end|>",
232
  "errors": "replace",
233
  "extra_special_tokens": {},
234
+ "model_max_length": 40960,
235
+ "pad_token": "<|vision_pad|>",
236
+ "padding_side": "left",
237
  "split_special_tokens": false,
238
  "tokenizer_class": "Qwen2Tokenizer",
239
  "unk_token": null