rd_hubert / config.json
Hguimaraes's picture
Upload model
8ff90c3 verified
{
"activation_dropout": 0.1,
"activation_fn": "gelu",
"architectures": [
"RDDistillerModel"
],
"attention_dropout": 0.1,
"attention_type": "original",
"auto_map": {
"AutoConfig": "configuration_distiller.DistillerConfig",
"AutoModel": "distiller_model.RDDistillerModel"
},
"conv_pos": 128,
"conv_pos_groups": 16,
"cosine_loss": 1.0,
"dropout": 0.1,
"dtype": "float32",
"encoder_attention_heads": 12,
"encoder_embed_dim": 768,
"encoder_ffn_embed_dim": 3072,
"encoder_layerdrop": 0.0,
"encoder_layers": 2,
"extractor_conv_feature_layers": "[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2",
"extractor_dropout": 0.0,
"extractor_mode": "default",
"feat_pen_loss": 0.0,
"feature_grad_mult": 0.1,
"final_dim": 768,
"init_teacher_conv_layers": true,
"init_teacher_encoder_layers": true,
"layer_emb_size": 0,
"layer_norm_first": false,
"loss_type": "l1",
"model_type": "rd_distiller",
"n_tasks": 3,
"out_layer_inter_dim": -1,
"out_layer_type": "expand-last",
"pred_layer_id": [
4,
8,
12
],
"task_emb_size": 0,
"task_emb_type": "expand-last",
"transformers_version": "5.1.0"
}