Initial commit

Browse files

Files changed (7) hide show

added_tokens.json +3 -0
logs.txt +105 -0
merges.txt +0 -0
mlc-chat-config.json +40 -0
tokenizer.json +0 -0
tokenizer_config.json +32 -0
vocab.json +0 -0

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[PAD]": 49152
+}

logs.txt ADDED Viewed

	@@ -0,0 +1,105 @@

+/home/junrushao/micromamba/envs/python311/bin/python -m mlc_chat gen_config /home/junrushao/tmp/tmp5aeqhx5w/repo --quantization q4f32_1 --conv-template wizard_coder_or_math --output /home/junrushao/tmp/tmpblhqsgfd --context-window-size 8192
+[2023-12-29 13:45:49] INFO auto_config.py:115: [92mFound[0m model configuration: /home/junrushao/tmp/tmp5aeqhx5w/repo/config.json
+[2023-12-29 13:45:49] INFO auto_config.py:151: [92mFound[0m model type: [1mgpt_bigcode[0m. Use `--model-type` to override.
+[2023-12-29 13:45:49] INFO gpt_bigcode_model.py:41: [1mcontext_window_size[0m not found in config.json. Falling back to [1mn_positions[0m (8192)
+[2023-12-29 13:45:49] INFO flags_model_config_override.py:63: Default [1mprefill_chunk_size[0m to [1mcontext_window_size[0m (8192) because it is not provided
+[2023-12-29 13:45:49] INFO flags_model_config_override.py:112: Overriding [1mcontext_window_size[0m from 8192 to 8192
+[2023-12-29 13:45:49] INFO flags_model_config_override.py:112: Overriding [1mprefill_chunk_size[0m from 8192 to 8192
+[2023-12-29 13:45:49] INFO gen_config.py:115: [generation_config.json] Setting [1mbos_token_id[0m: 0
+[2023-12-29 13:45:49] INFO gen_config.py:115: [generation_config.json] Setting [1meos_token_id[0m: 0
+[2023-12-29 13:45:49] INFO gen_config.py:129: [91mNot found[0m tokenizer config: /home/junrushao/tmp/tmp5aeqhx5w/repo/tokenizer.model
+[2023-12-29 13:45:49] INFO gen_config.py:127: [92mFound[0m tokenizer config: /home/junrushao/tmp/tmp5aeqhx5w/repo/tokenizer.json. Copying to [1m/home/junrushao/tmp/tmpblhqsgfd/tokenizer.json[0m
+[2023-12-29 13:45:49] INFO gen_config.py:127: [92mFound[0m tokenizer config: /home/junrushao/tmp/tmp5aeqhx5w/repo/vocab.json. Copying to [1m/home/junrushao/tmp/tmpblhqsgfd/vocab.json[0m
+[2023-12-29 13:45:49] INFO gen_config.py:127: [92mFound[0m tokenizer config: /home/junrushao/tmp/tmp5aeqhx5w/repo/merges.txt. Copying to [1m/home/junrushao/tmp/tmpblhqsgfd/merges.txt[0m
+[2023-12-29 13:45:49] INFO gen_config.py:127: [92mFound[0m tokenizer config: /home/junrushao/tmp/tmp5aeqhx5w/repo/added_tokens.json. Copying to [1m/home/junrushao/tmp/tmpblhqsgfd/added_tokens.json[0m
+[2023-12-29 13:45:49] INFO gen_config.py:127: [92mFound[0m tokenizer config: /home/junrushao/tmp/tmp5aeqhx5w/repo/tokenizer_config.json. Copying to [1m/home/junrushao/tmp/tmpblhqsgfd/tokenizer_config.json[0m
+[2023-12-29 13:45:49] INFO gen_config.py:69: [System default] Setting [1mpad_token_id[0m: 0
+[2023-12-29 13:45:49] INFO gen_config.py:69: [System default] Setting [1mtemperature[0m: 0.7
+[2023-12-29 13:45:49] INFO gen_config.py:69: [System default] Setting [1mrepetition_penalty[0m: 1.0
+[2023-12-29 13:45:49] INFO gen_config.py:69: [System default] Setting [1mtop_p[0m: 0.95
+[2023-12-29 13:45:49] INFO gen_config.py:69: [System default] Setting [1mmean_gen_len[0m: 128
+[2023-12-29 13:45:49] INFO gen_config.py:69: [System default] Setting [1mmax_gen_len[0m: 512
+[2023-12-29 13:45:49] INFO gen_config.py:69: [System default] Setting [1mshift_fill_factor[0m: 0.3
+[2023-12-29 13:45:49] INFO gen_config.py:157: Dumping configuration file to: [1m/home/junrushao/tmp/tmpblhqsgfd/mlc-chat-config.json[0m
+/home/junrushao/micromamba/envs/python311/bin/python -m mlc_chat convert_weight /home/junrushao/tmp/tmp5aeqhx5w/repo --quantization q4f32_1 --source-format auto --output /home/junrushao/tmp/tmpblhqsgfd
+[2023-12-29 13:45:50] INFO auto_config.py:115: [92mFound[0m model configuration: /home/junrushao/tmp/tmp5aeqhx5w/repo/config.json
+[2023-12-29 13:45:51] INFO auto_device.py:76: [92mFound[0m device: cuda:0
+[2023-12-29 13:45:51] INFO auto_device.py:76: [92mFound[0m device: cuda:1
+[2023-12-29 13:45:51] INFO auto_device.py:76: [92mFound[0m device: cuda:2
+[2023-12-29 13:45:51] INFO auto_device.py:76: [92mFound[0m device: cuda:3
+[2023-12-29 13:45:51] INFO auto_device.py:85: [91mNot found[0m device: rocm:0
+[2023-12-29 13:45:52] INFO auto_device.py:85: [91mNot found[0m device: metal:0
+[2023-12-29 13:45:52] INFO auto_device.py:85: [91mNot found[0m device: vulkan:0
+[2023-12-29 13:45:52] INFO auto_device.py:85: [91mNot found[0m device: opencl:0
+[2023-12-29 13:45:52] INFO auto_device.py:33: Using device: [1mcuda:0[0m
+[2023-12-29 13:45:52] INFO auto_weight.py:70: Finding weights in: /home/junrushao/tmp/tmp5aeqhx5w/repo
+[2023-12-29 13:45:52] INFO auto_weight.py:129: [92mFound[0m source weight format: huggingface-torch. Source configuration: /home/junrushao/tmp/tmp5aeqhx5w/repo/pytorch_model.bin
+[2023-12-29 13:45:52] INFO auto_weight.py:149: [91mNot found[0m Huggingface Safetensor
+[2023-12-29 13:45:52] INFO auto_weight.py:106: Using source weight configuration: [1m/home/junrushao/tmp/tmp5aeqhx5w/repo/pytorch_model.bin[0m. Use `--source` to override.
+[2023-12-29 13:45:52] INFO auto_weight.py:110: Using source weight format: [1mhuggingface-torch[0m. Use `--source-format` to override.
+[2023-12-29 13:45:52] INFO auto_config.py:151: [92mFound[0m model type: [1mgpt_bigcode[0m. Use `--model-type` to override.
+[2023-12-29 13:45:52] INFO gpt_bigcode_model.py:41: [1mcontext_window_size[0m not found in config.json. Falling back to [1mn_positions[0m (8192)
+Traceback (most recent call last):
+  File "<frozen runpy>", line 198, in _run_module_as_main
+  File "<frozen runpy>", line 88, in _run_code
+  File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/__main__.py", line 39, in <module>
+    main()
+  File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/__main__.py", line 28, in main
+    cli.main(sys.argv[2:])
+  File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/cli/convert_weight.py", line 87, in main
+    convert_weight(
+  File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/interface/convert_weight.py", line 147, in convert_weight
+    _convert_args(args)
+  File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/interface/convert_weight.py", line 64, in _convert_args
+    model, quantize_map = args.model.quantize[args.quantization.kind](
+                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/model/gpt_bigcode/gpt_bigcode_quantization.py", line 21, in group_quant
+    model = quantization.quantize_model(
+            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/quantization/group_quantization.py", line 117, in quantize_model
+    model = mutator.visit(name_prefix, model)
+            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/junrushao/Projects/tvm-dev/python/tvm/relax/frontend/nn/visitor.py", line 140, in visit
+    setattr(node, key, self.visit_module(_get_child_name(name, key), value))
+                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/quantization/group_quantization.py", line 113, in visit_module
+    return self.visit(name, node)
+           ^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/junrushao/Projects/tvm-dev/python/tvm/relax/frontend/nn/visitor.py", line 138, in visit
+    setattr(node, key, self.visit_modulelist(_get_child_name(name, key), value))
+                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/junrushao/Projects/tvm-dev/python/tvm/relax/frontend/nn/visitor.py", line 98, in visit_modulelist
+    return self.visit(name, node)
+           ^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/junrushao/Projects/tvm-dev/python/tvm/relax/frontend/nn/visitor.py", line 130, in visit
+    node[i] = self.visit_module(f"{name}.{i}", node[i])
+              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/quantization/group_quantization.py", line 113, in visit_module
+    return self.visit(name, node)
+           ^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/junrushao/Projects/tvm-dev/python/tvm/relax/frontend/nn/visitor.py", line 140, in visit
+    setattr(node, key, self.visit_module(_get_child_name(name, key), value))
+                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/quantization/group_quantization.py", line 113, in visit_module
+    return self.visit(name, node)
+           ^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/junrushao/Projects/tvm-dev/python/tvm/relax/frontend/nn/visitor.py", line 140, in visit
+    setattr(node, key, self.visit_module(_get_child_name(name, key), value))
+                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/quantization/group_quantization.py", line 107, in visit_module
+    return GroupQuantizeLinear.from_linear(node, self.config)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/quantization/group_quantization.py", line 323, in from_linear
+    _apply_sharding(shard, f"{shard.name}_q_weight", quantized_linear.q_weight)
+  File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/quantization/group_quantization.py", line 444, in _apply_sharding
+    assert weight.shape[0] == sum(shard.rows)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+AssertionError
+[1mWeight conversion with arguments:[0m
+  [1m--config[0m          /home/junrushao/tmp/tmp5aeqhx5w/repo/config.json
+  [1m--quantization[0m    GroupQuantize(name='q4f32_1', kind='group-quant', group_size=32, quantize_dtype='int4', storage_dtype='uint32', model_dtype='float32', num_elem_per_storage=8, num_storage_per_group=4, max_int_value=7)
+  [1m--model-type[0m      gpt_bigcode
+  [1m--device[0m          cuda:0
+  [1m--source[0m          /home/junrushao/tmp/tmp5aeqhx5w/repo/pytorch_model.bin
+  [1m--source-format[0m   huggingface-torch
+  [1m--output[0m          /home/junrushao/tmp/tmpblhqsgfd

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

mlc-chat-config.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "model_type": "gpt_bigcode",
+  "quantization": "q4f32_1",
+  "model_config": {
+    "n_embd": 6144,
+    "n_inner": 24576,
+    "n_head": 48,
+    "n_layer": 40,
+    "n_positions": 8192,
+    "layer_norm_epsilon": 1e-05,
+    "vocab_size": 49153,
+    "context_window_size": 8192,
+    "prefill_chunk_size": 8192,
+    "tensor_parallel_shards": 1
+  },
+  "vocab_size": 49153,
+  "context_window_size": 8192,
+  "sliding_window_size": -1,
+  "prefill_chunk_size": 8192,
+  "attention_sink_size": -1,
+  "tensor_parallel_shards": 1,
+  "mean_gen_len": 128,
+  "max_gen_len": 512,
+  "shift_fill_factor": 0.3,
+  "temperature": 0.7,
+  "repetition_penalty": 1.0,
+  "top_p": 0.95,
+  "conv_template": "wizard_coder_or_math",
+  "pad_token_id": 0,
+  "bos_token_id": 0,
+  "eos_token_id": 0,
+  "tokenizer_files": [
+    "tokenizer.json",
+    "vocab.json",
+    "merges.txt",
+    "added_tokens.json",
+    "tokenizer_config.json"
+  ],
+  "version": "0.1.0"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "add_prefix_space": false,
+  "additional_special_tokens": [
+    "<|endoftext|>",
+    "<fim_prefix>",
+    "<fim_middle>",
+    "<fim_suffix>",
+    "<fim_pad>",
+    "<filename>",
+    "<gh_stars>",
+    "<issue_start>",
+    "<issue_comment>",
+    "<issue_closed>",
+    "<jupyter_start>",
+    "<jupyter_text>",
+    "<jupyter_code>",
+    "<jupyter_output>",
+    "<empty_output>",
+    "<commit_before>",
+    "<commit_msg>",
+    "<commit_after>",
+    "<reponame>"
+  ],
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 2048,
+  "padding_side": "right",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>",
+  "vocab_size": 49152
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff