| [belle] | |
| model_name= | |
| num_layer=30 | |
| head_num=32 | |
| inter_size=16384 | |
| size_per_head=128 | |
| vocab_size=250880 | |
| tensor_para_size=1 | |
| weight_data_type=fp16 | |
| model_variant=bloom-pre | |
| layernorm_eps=1e-05 | |
| layernorm_type=pre_layernorm | |
| activation_type=Gelu | |
| has_positional_encoding=False | |
| has_pre_decoder_layernorm=True | |
| has_post_decoder_layernorm=True | |
| use_attention_linear_bias=True | |
| start_id=1 | |
| end_id=2 | |