basenji / config.json
ZhiyuanChen's picture
Upload folder using huggingface_hub
535e94b verified
{
"architectures": [
"BasenjiForTokenPrediction"
],
"batch_norm_eps": 0.001,
"batch_norm_momentum": 0.1,
"blocks": {
"bottleneck_size": 384,
"dilation": 1,
"dilation_rate": 1.5,
"dropout": 0.3,
"kernel_size": 3,
"num_blocks": 11,
"round_dilation": true
},
"bos_token_id": null,
"conv_tower_channels": [
339,
399,
470,
554,
652,
768
],
"conv_tower_kernel_size": 5,
"crop_bins": 64,
"dtype": "float32",
"eos_token_id": null,
"head": {
"act": null,
"bias": true,
"dropout": 0.0,
"hidden_size": null,
"layer_norm_eps": 1e-12,
"loss_weight": null,
"num_labels": null,
"output_name": null,
"problem_type": "regression",
"transform": null,
"transform_act": "gelu",
"type": null
},
"head_act": "softplus",
"head_hidden_size": 1536,
"hidden_act": "gelu_new",
"hidden_dropout": 0.05,
"id2label": null,
"label2id": null,
"mask_token_id": null,
"model_type": "basenji",
"null_token_id": null,
"num_labels": 5313,
"output_contexts": false,
"pad_token_id": 0,
"sequence_length": 131072,
"stem_channels": 288,
"stem_kernel_size": 15,
"stem_pool_size": 2,
"tie_word_embeddings": true,
"transformers_version": "5.7.0",
"unk_token_id": 3,
"vocab_size": 5
}