| [llama] | |
| model_name = ziya-llama | |
| head_num = 40 | |
| size_per_head = 128 | |
| inter_size = 13824 | |
| num_layer = 40 | |
| rotary_embedding = 128 | |
| layernorm_eps = 1e-06 | |
| vocab_size = 39424 | |
| start_id = 1 | |
| end_id = 2 | |
| weight_data_type = fp16 | |
| tensor_para_size = 1 | |
| [llama] | |
| model_name = ziya-llama | |
| head_num = 40 | |
| size_per_head = 128 | |
| inter_size = 13824 | |
| num_layer = 40 | |
| rotary_embedding = 128 | |
| layernorm_eps = 1e-06 | |
| vocab_size = 39424 | |
| start_id = 1 | |
| end_id = 2 | |
| weight_data_type = fp16 | |
| tensor_para_size = 1 | |