| [glm6b] | |
| model_name = chatglm-6b | |
| head_num = 32 | |
| size_per_head = 128 | |
| inter_size = 16384 | |
| max_pos_seq_len = 2048 | |
| num_layer = 28 | |
| vocab_size = 130528 | |
| start_id = 130004 | |
| end_id = 130005 | |
| weight_data_type = fp16 | |
| tensor_para_size = 1 | |
| layernorm_eps = 1e-5 | |
| [glm6b] | |
| model_name = chatglm-6b | |
| head_num = 32 | |
| size_per_head = 128 | |
| inter_size = 16384 | |
| max_pos_seq_len = 2048 | |
| num_layer = 28 | |
| vocab_size = 130528 | |
| start_id = 130004 | |
| end_id = 130005 | |
| weight_data_type = fp16 | |
| tensor_para_size = 1 | |
| layernorm_eps = 1e-5 | |