Update context_window_size & prefill_chunk_size
Browse files- mlc-chat-config.json +4 -4
mlc-chat-config.json
CHANGED
|
@@ -9,8 +9,8 @@
|
|
| 9 |
"rms_norm_eps": 1e-05,
|
| 10 |
"vocab_size": 77209,
|
| 11 |
"position_embedding_base": 500000.0,
|
| 12 |
-
"context_window_size":
|
| 13 |
-
"prefill_chunk_size":
|
| 14 |
"num_key_value_heads": 8,
|
| 15 |
"head_dim": 128,
|
| 16 |
"tensor_parallel_shards": 1,
|
|
@@ -731,9 +731,9 @@
|
|
| 731 |
"max_batch_size": 1
|
| 732 |
},
|
| 733 |
"vocab_size": 77209,
|
| 734 |
-
"context_window_size":
|
| 735 |
"sliding_window_size": -1,
|
| 736 |
-
"prefill_chunk_size":
|
| 737 |
"attention_sink_size": -1,
|
| 738 |
"tensor_parallel_shards": 1,
|
| 739 |
"max_batch_size": 80,
|
|
|
|
| 9 |
"rms_norm_eps": 1e-05,
|
| 10 |
"vocab_size": 77209,
|
| 11 |
"position_embedding_base": 500000.0,
|
| 12 |
+
"context_window_size": 768,
|
| 13 |
+
"prefill_chunk_size": 768,
|
| 14 |
"num_key_value_heads": 8,
|
| 15 |
"head_dim": 128,
|
| 16 |
"tensor_parallel_shards": 1,
|
|
|
|
| 731 |
"max_batch_size": 1
|
| 732 |
},
|
| 733 |
"vocab_size": 77209,
|
| 734 |
+
"context_window_size": 768,
|
| 735 |
"sliding_window_size": -1,
|
| 736 |
+
"prefill_chunk_size": 768,
|
| 737 |
"attention_sink_size": -1,
|
| 738 |
"tensor_parallel_shards": 1,
|
| 739 |
"max_batch_size": 80,
|