Initial commit
Browse files- added_tokens.json +3 -0
- logs.txt +105 -0
- merges.txt +0 -0
- mlc-chat-config.json +40 -0
- tokenizer.json +0 -0
- tokenizer_config.json +32 -0
- vocab.json +0 -0
added_tokens.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"[PAD]": 49152
|
| 3 |
+
}
|
logs.txt
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/home/junrushao/micromamba/envs/python311/bin/python -m mlc_chat gen_config /home/junrushao/tmp/tmp5aeqhx5w/repo --quantization q4f32_1 --conv-template wizard_coder_or_math --output /home/junrushao/tmp/tmpblhqsgfd --context-window-size 8192
|
| 2 |
+
[2023-12-29 13:45:49] INFO auto_config.py:115: [92mFound[0m model configuration: /home/junrushao/tmp/tmp5aeqhx5w/repo/config.json
|
| 3 |
+
[2023-12-29 13:45:49] INFO auto_config.py:151: [92mFound[0m model type: [1mgpt_bigcode[0m. Use `--model-type` to override.
|
| 4 |
+
[2023-12-29 13:45:49] INFO gpt_bigcode_model.py:41: [1mcontext_window_size[0m not found in config.json. Falling back to [1mn_positions[0m (8192)
|
| 5 |
+
[2023-12-29 13:45:49] INFO flags_model_config_override.py:63: Default [1mprefill_chunk_size[0m to [1mcontext_window_size[0m (8192) because it is not provided
|
| 6 |
+
[2023-12-29 13:45:49] INFO flags_model_config_override.py:112: Overriding [1mcontext_window_size[0m from 8192 to 8192
|
| 7 |
+
[2023-12-29 13:45:49] INFO flags_model_config_override.py:112: Overriding [1mprefill_chunk_size[0m from 8192 to 8192
|
| 8 |
+
[2023-12-29 13:45:49] INFO gen_config.py:115: [generation_config.json] Setting [1mbos_token_id[0m: 0
|
| 9 |
+
[2023-12-29 13:45:49] INFO gen_config.py:115: [generation_config.json] Setting [1meos_token_id[0m: 0
|
| 10 |
+
[2023-12-29 13:45:49] INFO gen_config.py:129: [91mNot found[0m tokenizer config: /home/junrushao/tmp/tmp5aeqhx5w/repo/tokenizer.model
|
| 11 |
+
[2023-12-29 13:45:49] INFO gen_config.py:127: [92mFound[0m tokenizer config: /home/junrushao/tmp/tmp5aeqhx5w/repo/tokenizer.json. Copying to [1m/home/junrushao/tmp/tmpblhqsgfd/tokenizer.json[0m
|
| 12 |
+
[2023-12-29 13:45:49] INFO gen_config.py:127: [92mFound[0m tokenizer config: /home/junrushao/tmp/tmp5aeqhx5w/repo/vocab.json. Copying to [1m/home/junrushao/tmp/tmpblhqsgfd/vocab.json[0m
|
| 13 |
+
[2023-12-29 13:45:49] INFO gen_config.py:127: [92mFound[0m tokenizer config: /home/junrushao/tmp/tmp5aeqhx5w/repo/merges.txt. Copying to [1m/home/junrushao/tmp/tmpblhqsgfd/merges.txt[0m
|
| 14 |
+
[2023-12-29 13:45:49] INFO gen_config.py:127: [92mFound[0m tokenizer config: /home/junrushao/tmp/tmp5aeqhx5w/repo/added_tokens.json. Copying to [1m/home/junrushao/tmp/tmpblhqsgfd/added_tokens.json[0m
|
| 15 |
+
[2023-12-29 13:45:49] INFO gen_config.py:127: [92mFound[0m tokenizer config: /home/junrushao/tmp/tmp5aeqhx5w/repo/tokenizer_config.json. Copying to [1m/home/junrushao/tmp/tmpblhqsgfd/tokenizer_config.json[0m
|
| 16 |
+
[2023-12-29 13:45:49] INFO gen_config.py:69: [System default] Setting [1mpad_token_id[0m: 0
|
| 17 |
+
[2023-12-29 13:45:49] INFO gen_config.py:69: [System default] Setting [1mtemperature[0m: 0.7
|
| 18 |
+
[2023-12-29 13:45:49] INFO gen_config.py:69: [System default] Setting [1mrepetition_penalty[0m: 1.0
|
| 19 |
+
[2023-12-29 13:45:49] INFO gen_config.py:69: [System default] Setting [1mtop_p[0m: 0.95
|
| 20 |
+
[2023-12-29 13:45:49] INFO gen_config.py:69: [System default] Setting [1mmean_gen_len[0m: 128
|
| 21 |
+
[2023-12-29 13:45:49] INFO gen_config.py:69: [System default] Setting [1mmax_gen_len[0m: 512
|
| 22 |
+
[2023-12-29 13:45:49] INFO gen_config.py:69: [System default] Setting [1mshift_fill_factor[0m: 0.3
|
| 23 |
+
[2023-12-29 13:45:49] INFO gen_config.py:157: Dumping configuration file to: [1m/home/junrushao/tmp/tmpblhqsgfd/mlc-chat-config.json[0m
|
| 24 |
+
/home/junrushao/micromamba/envs/python311/bin/python -m mlc_chat convert_weight /home/junrushao/tmp/tmp5aeqhx5w/repo --quantization q4f32_1 --source-format auto --output /home/junrushao/tmp/tmpblhqsgfd
|
| 25 |
+
[2023-12-29 13:45:50] INFO auto_config.py:115: [92mFound[0m model configuration: /home/junrushao/tmp/tmp5aeqhx5w/repo/config.json
|
| 26 |
+
[2023-12-29 13:45:51] INFO auto_device.py:76: [92mFound[0m device: cuda:0
|
| 27 |
+
[2023-12-29 13:45:51] INFO auto_device.py:76: [92mFound[0m device: cuda:1
|
| 28 |
+
[2023-12-29 13:45:51] INFO auto_device.py:76: [92mFound[0m device: cuda:2
|
| 29 |
+
[2023-12-29 13:45:51] INFO auto_device.py:76: [92mFound[0m device: cuda:3
|
| 30 |
+
[2023-12-29 13:45:51] INFO auto_device.py:85: [91mNot found[0m device: rocm:0
|
| 31 |
+
[2023-12-29 13:45:52] INFO auto_device.py:85: [91mNot found[0m device: metal:0
|
| 32 |
+
[2023-12-29 13:45:52] INFO auto_device.py:85: [91mNot found[0m device: vulkan:0
|
| 33 |
+
[2023-12-29 13:45:52] INFO auto_device.py:85: [91mNot found[0m device: opencl:0
|
| 34 |
+
[2023-12-29 13:45:52] INFO auto_device.py:33: Using device: [1mcuda:0[0m
|
| 35 |
+
[2023-12-29 13:45:52] INFO auto_weight.py:70: Finding weights in: /home/junrushao/tmp/tmp5aeqhx5w/repo
|
| 36 |
+
[2023-12-29 13:45:52] INFO auto_weight.py:129: [92mFound[0m source weight format: huggingface-torch. Source configuration: /home/junrushao/tmp/tmp5aeqhx5w/repo/pytorch_model.bin
|
| 37 |
+
[2023-12-29 13:45:52] INFO auto_weight.py:149: [91mNot found[0m Huggingface Safetensor
|
| 38 |
+
[2023-12-29 13:45:52] INFO auto_weight.py:106: Using source weight configuration: [1m/home/junrushao/tmp/tmp5aeqhx5w/repo/pytorch_model.bin[0m. Use `--source` to override.
|
| 39 |
+
[2023-12-29 13:45:52] INFO auto_weight.py:110: Using source weight format: [1mhuggingface-torch[0m. Use `--source-format` to override.
|
| 40 |
+
[2023-12-29 13:45:52] INFO auto_config.py:151: [92mFound[0m model type: [1mgpt_bigcode[0m. Use `--model-type` to override.
|
| 41 |
+
[2023-12-29 13:45:52] INFO gpt_bigcode_model.py:41: [1mcontext_window_size[0m not found in config.json. Falling back to [1mn_positions[0m (8192)
|
| 42 |
+
Traceback (most recent call last):
|
| 43 |
+
File "<frozen runpy>", line 198, in _run_module_as_main
|
| 44 |
+
File "<frozen runpy>", line 88, in _run_code
|
| 45 |
+
File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/__main__.py", line 39, in <module>
|
| 46 |
+
main()
|
| 47 |
+
File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/__main__.py", line 28, in main
|
| 48 |
+
cli.main(sys.argv[2:])
|
| 49 |
+
File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/cli/convert_weight.py", line 87, in main
|
| 50 |
+
convert_weight(
|
| 51 |
+
File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/interface/convert_weight.py", line 147, in convert_weight
|
| 52 |
+
_convert_args(args)
|
| 53 |
+
File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/interface/convert_weight.py", line 64, in _convert_args
|
| 54 |
+
model, quantize_map = args.model.quantize[args.quantization.kind](
|
| 55 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 56 |
+
File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/model/gpt_bigcode/gpt_bigcode_quantization.py", line 21, in group_quant
|
| 57 |
+
model = quantization.quantize_model(
|
| 58 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 59 |
+
File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/quantization/group_quantization.py", line 117, in quantize_model
|
| 60 |
+
model = mutator.visit(name_prefix, model)
|
| 61 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 62 |
+
File "/home/junrushao/Projects/tvm-dev/python/tvm/relax/frontend/nn/visitor.py", line 140, in visit
|
| 63 |
+
setattr(node, key, self.visit_module(_get_child_name(name, key), value))
|
| 64 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 65 |
+
File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/quantization/group_quantization.py", line 113, in visit_module
|
| 66 |
+
return self.visit(name, node)
|
| 67 |
+
^^^^^^^^^^^^^^^^^^^^^^
|
| 68 |
+
File "/home/junrushao/Projects/tvm-dev/python/tvm/relax/frontend/nn/visitor.py", line 138, in visit
|
| 69 |
+
setattr(node, key, self.visit_modulelist(_get_child_name(name, key), value))
|
| 70 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 71 |
+
File "/home/junrushao/Projects/tvm-dev/python/tvm/relax/frontend/nn/visitor.py", line 98, in visit_modulelist
|
| 72 |
+
return self.visit(name, node)
|
| 73 |
+
^^^^^^^^^^^^^^^^^^^^^^
|
| 74 |
+
File "/home/junrushao/Projects/tvm-dev/python/tvm/relax/frontend/nn/visitor.py", line 130, in visit
|
| 75 |
+
node[i] = self.visit_module(f"{name}.{i}", node[i])
|
| 76 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 77 |
+
File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/quantization/group_quantization.py", line 113, in visit_module
|
| 78 |
+
return self.visit(name, node)
|
| 79 |
+
^^^^^^^^^^^^^^^^^^^^^^
|
| 80 |
+
File "/home/junrushao/Projects/tvm-dev/python/tvm/relax/frontend/nn/visitor.py", line 140, in visit
|
| 81 |
+
setattr(node, key, self.visit_module(_get_child_name(name, key), value))
|
| 82 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 83 |
+
File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/quantization/group_quantization.py", line 113, in visit_module
|
| 84 |
+
return self.visit(name, node)
|
| 85 |
+
^^^^^^^^^^^^^^^^^^^^^^
|
| 86 |
+
File "/home/junrushao/Projects/tvm-dev/python/tvm/relax/frontend/nn/visitor.py", line 140, in visit
|
| 87 |
+
setattr(node, key, self.visit_module(_get_child_name(name, key), value))
|
| 88 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 89 |
+
File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/quantization/group_quantization.py", line 107, in visit_module
|
| 90 |
+
return GroupQuantizeLinear.from_linear(node, self.config)
|
| 91 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 92 |
+
File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/quantization/group_quantization.py", line 323, in from_linear
|
| 93 |
+
_apply_sharding(shard, f"{shard.name}_q_weight", quantized_linear.q_weight)
|
| 94 |
+
File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/quantization/group_quantization.py", line 444, in _apply_sharding
|
| 95 |
+
assert weight.shape[0] == sum(shard.rows)
|
| 96 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 97 |
+
AssertionError
|
| 98 |
+
[1mWeight conversion with arguments:[0m
|
| 99 |
+
[1m--config[0m /home/junrushao/tmp/tmp5aeqhx5w/repo/config.json
|
| 100 |
+
[1m--quantization[0m GroupQuantize(name='q4f32_1', kind='group-quant', group_size=32, quantize_dtype='int4', storage_dtype='uint32', model_dtype='float32', num_elem_per_storage=8, num_storage_per_group=4, max_int_value=7)
|
| 101 |
+
[1m--model-type[0m gpt_bigcode
|
| 102 |
+
[1m--device[0m cuda:0
|
| 103 |
+
[1m--source[0m /home/junrushao/tmp/tmp5aeqhx5w/repo/pytorch_model.bin
|
| 104 |
+
[1m--source-format[0m huggingface-torch
|
| 105 |
+
[1m--output[0m /home/junrushao/tmp/tmpblhqsgfd
|
merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
mlc-chat-config.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_type": "gpt_bigcode",
|
| 3 |
+
"quantization": "q4f32_1",
|
| 4 |
+
"model_config": {
|
| 5 |
+
"n_embd": 6144,
|
| 6 |
+
"n_inner": 24576,
|
| 7 |
+
"n_head": 48,
|
| 8 |
+
"n_layer": 40,
|
| 9 |
+
"n_positions": 8192,
|
| 10 |
+
"layer_norm_epsilon": 1e-05,
|
| 11 |
+
"vocab_size": 49153,
|
| 12 |
+
"context_window_size": 8192,
|
| 13 |
+
"prefill_chunk_size": 8192,
|
| 14 |
+
"tensor_parallel_shards": 1
|
| 15 |
+
},
|
| 16 |
+
"vocab_size": 49153,
|
| 17 |
+
"context_window_size": 8192,
|
| 18 |
+
"sliding_window_size": -1,
|
| 19 |
+
"prefill_chunk_size": 8192,
|
| 20 |
+
"attention_sink_size": -1,
|
| 21 |
+
"tensor_parallel_shards": 1,
|
| 22 |
+
"mean_gen_len": 128,
|
| 23 |
+
"max_gen_len": 512,
|
| 24 |
+
"shift_fill_factor": 0.3,
|
| 25 |
+
"temperature": 0.7,
|
| 26 |
+
"repetition_penalty": 1.0,
|
| 27 |
+
"top_p": 0.95,
|
| 28 |
+
"conv_template": "wizard_coder_or_math",
|
| 29 |
+
"pad_token_id": 0,
|
| 30 |
+
"bos_token_id": 0,
|
| 31 |
+
"eos_token_id": 0,
|
| 32 |
+
"tokenizer_files": [
|
| 33 |
+
"tokenizer.json",
|
| 34 |
+
"vocab.json",
|
| 35 |
+
"merges.txt",
|
| 36 |
+
"added_tokens.json",
|
| 37 |
+
"tokenizer_config.json"
|
| 38 |
+
],
|
| 39 |
+
"version": "0.1.0"
|
| 40 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"additional_special_tokens": [
|
| 4 |
+
"<|endoftext|>",
|
| 5 |
+
"<fim_prefix>",
|
| 6 |
+
"<fim_middle>",
|
| 7 |
+
"<fim_suffix>",
|
| 8 |
+
"<fim_pad>",
|
| 9 |
+
"<filename>",
|
| 10 |
+
"<gh_stars>",
|
| 11 |
+
"<issue_start>",
|
| 12 |
+
"<issue_comment>",
|
| 13 |
+
"<issue_closed>",
|
| 14 |
+
"<jupyter_start>",
|
| 15 |
+
"<jupyter_text>",
|
| 16 |
+
"<jupyter_code>",
|
| 17 |
+
"<jupyter_output>",
|
| 18 |
+
"<empty_output>",
|
| 19 |
+
"<commit_before>",
|
| 20 |
+
"<commit_msg>",
|
| 21 |
+
"<commit_after>",
|
| 22 |
+
"<reponame>"
|
| 23 |
+
],
|
| 24 |
+
"bos_token": "<|endoftext|>",
|
| 25 |
+
"clean_up_tokenization_spaces": true,
|
| 26 |
+
"eos_token": "<|endoftext|>",
|
| 27 |
+
"model_max_length": 2048,
|
| 28 |
+
"padding_side": "right",
|
| 29 |
+
"tokenizer_class": "GPT2Tokenizer",
|
| 30 |
+
"unk_token": "<|endoftext|>",
|
| 31 |
+
"vocab_size": 49152
|
| 32 |
+
}
|
vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|