Bimba / config.json
OldestSalt's picture
Push model using huggingface_hub.
9a11b89 verified
Raw
History Blame Contribute Delete
450 Bytes
{
"cfg": {
"d_conv": 4,
"d_model": 512,
"d_state": 128,
"dim_feedforward": 2048,
"dropout": 0.1,
"eos_token_id": 2,
"expand": 2,
"headdim": 64,
"max_source_len": 64,
"max_target_len": 64,
"ngroups": 1,
"nhead": 8,
"num_decoder_layers": 6,
"num_encoder_layers": 6,
"pad_token_id": 1,
"separate_source_target_embeddings": true,
"tie_embeddings": true,
"vocab_size": 256204
}
}