junrushao commited on
Commit
c38c9fa
·
1 Parent(s): 57346fe

Initial commit

Browse files
Files changed (7) hide show
  1. added_tokens.json +3 -0
  2. logs.txt +105 -0
  3. merges.txt +0 -0
  4. mlc-chat-config.json +40 -0
  5. tokenizer.json +0 -0
  6. tokenizer_config.json +32 -0
  7. vocab.json +0 -0
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[PAD]": 49152
3
+ }
logs.txt ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /home/junrushao/micromamba/envs/python311/bin/python -m mlc_chat gen_config /home/junrushao/tmp/tmp5aeqhx5w/repo --quantization q4f32_1 --conv-template wizard_coder_or_math --output /home/junrushao/tmp/tmpblhqsgfd --context-window-size 8192
2
+ [2023-12-29 13:45:49] INFO auto_config.py:115: Found model configuration: /home/junrushao/tmp/tmp5aeqhx5w/repo/config.json
3
+ [2023-12-29 13:45:49] INFO auto_config.py:151: Found model type: gpt_bigcode. Use `--model-type` to override.
4
+ [2023-12-29 13:45:49] INFO gpt_bigcode_model.py:41: context_window_size not found in config.json. Falling back to n_positions (8192)
5
+ [2023-12-29 13:45:49] INFO flags_model_config_override.py:63: Default prefill_chunk_size to context_window_size (8192) because it is not provided
6
+ [2023-12-29 13:45:49] INFO flags_model_config_override.py:112: Overriding context_window_size from 8192 to 8192
7
+ [2023-12-29 13:45:49] INFO flags_model_config_override.py:112: Overriding prefill_chunk_size from 8192 to 8192
8
+ [2023-12-29 13:45:49] INFO gen_config.py:115: [generation_config.json] Setting bos_token_id: 0
9
+ [2023-12-29 13:45:49] INFO gen_config.py:115: [generation_config.json] Setting eos_token_id: 0
10
+ [2023-12-29 13:45:49] INFO gen_config.py:129: Not found tokenizer config: /home/junrushao/tmp/tmp5aeqhx5w/repo/tokenizer.model
11
+ [2023-12-29 13:45:49] INFO gen_config.py:127: Found tokenizer config: /home/junrushao/tmp/tmp5aeqhx5w/repo/tokenizer.json. Copying to /home/junrushao/tmp/tmpblhqsgfd/tokenizer.json
12
+ [2023-12-29 13:45:49] INFO gen_config.py:127: Found tokenizer config: /home/junrushao/tmp/tmp5aeqhx5w/repo/vocab.json. Copying to /home/junrushao/tmp/tmpblhqsgfd/vocab.json
13
+ [2023-12-29 13:45:49] INFO gen_config.py:127: Found tokenizer config: /home/junrushao/tmp/tmp5aeqhx5w/repo/merges.txt. Copying to /home/junrushao/tmp/tmpblhqsgfd/merges.txt
14
+ [2023-12-29 13:45:49] INFO gen_config.py:127: Found tokenizer config: /home/junrushao/tmp/tmp5aeqhx5w/repo/added_tokens.json. Copying to /home/junrushao/tmp/tmpblhqsgfd/added_tokens.json
15
+ [2023-12-29 13:45:49] INFO gen_config.py:127: Found tokenizer config: /home/junrushao/tmp/tmp5aeqhx5w/repo/tokenizer_config.json. Copying to /home/junrushao/tmp/tmpblhqsgfd/tokenizer_config.json
16
+ [2023-12-29 13:45:49] INFO gen_config.py:69: [System default] Setting pad_token_id: 0
17
+ [2023-12-29 13:45:49] INFO gen_config.py:69: [System default] Setting temperature: 0.7
18
+ [2023-12-29 13:45:49] INFO gen_config.py:69: [System default] Setting repetition_penalty: 1.0
19
+ [2023-12-29 13:45:49] INFO gen_config.py:69: [System default] Setting top_p: 0.95
20
+ [2023-12-29 13:45:49] INFO gen_config.py:69: [System default] Setting mean_gen_len: 128
21
+ [2023-12-29 13:45:49] INFO gen_config.py:69: [System default] Setting max_gen_len: 512
22
+ [2023-12-29 13:45:49] INFO gen_config.py:69: [System default] Setting shift_fill_factor: 0.3
23
+ [2023-12-29 13:45:49] INFO gen_config.py:157: Dumping configuration file to: /home/junrushao/tmp/tmpblhqsgfd/mlc-chat-config.json
24
+ /home/junrushao/micromamba/envs/python311/bin/python -m mlc_chat convert_weight /home/junrushao/tmp/tmp5aeqhx5w/repo --quantization q4f32_1 --source-format auto --output /home/junrushao/tmp/tmpblhqsgfd
25
+ [2023-12-29 13:45:50] INFO auto_config.py:115: Found model configuration: /home/junrushao/tmp/tmp5aeqhx5w/repo/config.json
26
+ [2023-12-29 13:45:51] INFO auto_device.py:76: Found device: cuda:0
27
+ [2023-12-29 13:45:51] INFO auto_device.py:76: Found device: cuda:1
28
+ [2023-12-29 13:45:51] INFO auto_device.py:76: Found device: cuda:2
29
+ [2023-12-29 13:45:51] INFO auto_device.py:76: Found device: cuda:3
30
+ [2023-12-29 13:45:51] INFO auto_device.py:85: Not found device: rocm:0
31
+ [2023-12-29 13:45:52] INFO auto_device.py:85: Not found device: metal:0
32
+ [2023-12-29 13:45:52] INFO auto_device.py:85: Not found device: vulkan:0
33
+ [2023-12-29 13:45:52] INFO auto_device.py:85: Not found device: opencl:0
34
+ [2023-12-29 13:45:52] INFO auto_device.py:33: Using device: cuda:0
35
+ [2023-12-29 13:45:52] INFO auto_weight.py:70: Finding weights in: /home/junrushao/tmp/tmp5aeqhx5w/repo
36
+ [2023-12-29 13:45:52] INFO auto_weight.py:129: Found source weight format: huggingface-torch. Source configuration: /home/junrushao/tmp/tmp5aeqhx5w/repo/pytorch_model.bin
37
+ [2023-12-29 13:45:52] INFO auto_weight.py:149: Not found Huggingface Safetensor
38
+ [2023-12-29 13:45:52] INFO auto_weight.py:106: Using source weight configuration: /home/junrushao/tmp/tmp5aeqhx5w/repo/pytorch_model.bin. Use `--source` to override.
39
+ [2023-12-29 13:45:52] INFO auto_weight.py:110: Using source weight format: huggingface-torch. Use `--source-format` to override.
40
+ [2023-12-29 13:45:52] INFO auto_config.py:151: Found model type: gpt_bigcode. Use `--model-type` to override.
41
+ [2023-12-29 13:45:52] INFO gpt_bigcode_model.py:41: context_window_size not found in config.json. Falling back to n_positions (8192)
42
+ Traceback (most recent call last):
43
+ File "<frozen runpy>", line 198, in _run_module_as_main
44
+ File "<frozen runpy>", line 88, in _run_code
45
+ File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/__main__.py", line 39, in <module>
46
+ main()
47
+ File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/__main__.py", line 28, in main
48
+ cli.main(sys.argv[2:])
49
+ File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/cli/convert_weight.py", line 87, in main
50
+ convert_weight(
51
+ File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/interface/convert_weight.py", line 147, in convert_weight
52
+ _convert_args(args)
53
+ File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/interface/convert_weight.py", line 64, in _convert_args
54
+ model, quantize_map = args.model.quantize[args.quantization.kind](
55
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
56
+ File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/model/gpt_bigcode/gpt_bigcode_quantization.py", line 21, in group_quant
57
+ model = quantization.quantize_model(
58
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
59
+ File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/quantization/group_quantization.py", line 117, in quantize_model
60
+ model = mutator.visit(name_prefix, model)
61
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
62
+ File "/home/junrushao/Projects/tvm-dev/python/tvm/relax/frontend/nn/visitor.py", line 140, in visit
63
+ setattr(node, key, self.visit_module(_get_child_name(name, key), value))
64
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
65
+ File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/quantization/group_quantization.py", line 113, in visit_module
66
+ return self.visit(name, node)
67
+ ^^^^^^^^^^^^^^^^^^^^^^
68
+ File "/home/junrushao/Projects/tvm-dev/python/tvm/relax/frontend/nn/visitor.py", line 138, in visit
69
+ setattr(node, key, self.visit_modulelist(_get_child_name(name, key), value))
70
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
71
+ File "/home/junrushao/Projects/tvm-dev/python/tvm/relax/frontend/nn/visitor.py", line 98, in visit_modulelist
72
+ return self.visit(name, node)
73
+ ^^^^^^^^^^^^^^^^^^^^^^
74
+ File "/home/junrushao/Projects/tvm-dev/python/tvm/relax/frontend/nn/visitor.py", line 130, in visit
75
+ node[i] = self.visit_module(f"{name}.{i}", node[i])
76
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
77
+ File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/quantization/group_quantization.py", line 113, in visit_module
78
+ return self.visit(name, node)
79
+ ^^^^^^^^^^^^^^^^^^^^^^
80
+ File "/home/junrushao/Projects/tvm-dev/python/tvm/relax/frontend/nn/visitor.py", line 140, in visit
81
+ setattr(node, key, self.visit_module(_get_child_name(name, key), value))
82
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
83
+ File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/quantization/group_quantization.py", line 113, in visit_module
84
+ return self.visit(name, node)
85
+ ^^^^^^^^^^^^^^^^^^^^^^
86
+ File "/home/junrushao/Projects/tvm-dev/python/tvm/relax/frontend/nn/visitor.py", line 140, in visit
87
+ setattr(node, key, self.visit_module(_get_child_name(name, key), value))
88
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
89
+ File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/quantization/group_quantization.py", line 107, in visit_module
90
+ return GroupQuantizeLinear.from_linear(node, self.config)
91
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
92
+ File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/quantization/group_quantization.py", line 323, in from_linear
93
+ _apply_sharding(shard, f"{shard.name}_q_weight", quantized_linear.q_weight)
94
+ File "/home/junrushao/Projects/mlc-llm/python/mlc_chat/quantization/group_quantization.py", line 444, in _apply_sharding
95
+ assert weight.shape[0] == sum(shard.rows)
96
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
97
+ AssertionError
98
+ Weight conversion with arguments:
99
+ --config /home/junrushao/tmp/tmp5aeqhx5w/repo/config.json
100
+ --quantization GroupQuantize(name='q4f32_1', kind='group-quant', group_size=32, quantize_dtype='int4', storage_dtype='uint32', model_dtype='float32', num_elem_per_storage=8, num_storage_per_group=4, max_int_value=7)
101
+ --model-type gpt_bigcode
102
+ --device cuda:0
103
+ --source /home/junrushao/tmp/tmp5aeqhx5w/repo/pytorch_model.bin
104
+ --source-format huggingface-torch
105
+ --output /home/junrushao/tmp/tmpblhqsgfd
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
mlc-chat-config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "gpt_bigcode",
3
+ "quantization": "q4f32_1",
4
+ "model_config": {
5
+ "n_embd": 6144,
6
+ "n_inner": 24576,
7
+ "n_head": 48,
8
+ "n_layer": 40,
9
+ "n_positions": 8192,
10
+ "layer_norm_epsilon": 1e-05,
11
+ "vocab_size": 49153,
12
+ "context_window_size": 8192,
13
+ "prefill_chunk_size": 8192,
14
+ "tensor_parallel_shards": 1
15
+ },
16
+ "vocab_size": 49153,
17
+ "context_window_size": 8192,
18
+ "sliding_window_size": -1,
19
+ "prefill_chunk_size": 8192,
20
+ "attention_sink_size": -1,
21
+ "tensor_parallel_shards": 1,
22
+ "mean_gen_len": 128,
23
+ "max_gen_len": 512,
24
+ "shift_fill_factor": 0.3,
25
+ "temperature": 0.7,
26
+ "repetition_penalty": 1.0,
27
+ "top_p": 0.95,
28
+ "conv_template": "wizard_coder_or_math",
29
+ "pad_token_id": 0,
30
+ "bos_token_id": 0,
31
+ "eos_token_id": 0,
32
+ "tokenizer_files": [
33
+ "tokenizer.json",
34
+ "vocab.json",
35
+ "merges.txt",
36
+ "added_tokens.json",
37
+ "tokenizer_config.json"
38
+ ],
39
+ "version": "0.1.0"
40
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "additional_special_tokens": [
4
+ "<|endoftext|>",
5
+ "<fim_prefix>",
6
+ "<fim_middle>",
7
+ "<fim_suffix>",
8
+ "<fim_pad>",
9
+ "<filename>",
10
+ "<gh_stars>",
11
+ "<issue_start>",
12
+ "<issue_comment>",
13
+ "<issue_closed>",
14
+ "<jupyter_start>",
15
+ "<jupyter_text>",
16
+ "<jupyter_code>",
17
+ "<jupyter_output>",
18
+ "<empty_output>",
19
+ "<commit_before>",
20
+ "<commit_msg>",
21
+ "<commit_after>",
22
+ "<reponame>"
23
+ ],
24
+ "bos_token": "<|endoftext|>",
25
+ "clean_up_tokenization_spaces": true,
26
+ "eos_token": "<|endoftext|>",
27
+ "model_max_length": 2048,
28
+ "padding_side": "right",
29
+ "tokenizer_class": "GPT2Tokenizer",
30
+ "unk_token": "<|endoftext|>",
31
+ "vocab_size": 49152
32
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff