File size: 1,169 Bytes
8ff90c3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 | {
"activation_dropout": 0.1,
"activation_fn": "gelu",
"architectures": [
"RDDistillerModel"
],
"attention_dropout": 0.1,
"attention_type": "original",
"auto_map": {
"AutoConfig": "configuration_distiller.DistillerConfig",
"AutoModel": "distiller_model.RDDistillerModel"
},
"conv_pos": 128,
"conv_pos_groups": 16,
"cosine_loss": 1.0,
"dropout": 0.1,
"dtype": "float32",
"encoder_attention_heads": 12,
"encoder_embed_dim": 768,
"encoder_ffn_embed_dim": 3072,
"encoder_layerdrop": 0.0,
"encoder_layers": 2,
"extractor_conv_feature_layers": "[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2",
"extractor_dropout": 0.0,
"extractor_mode": "default",
"feat_pen_loss": 0.0,
"feature_grad_mult": 0.1,
"final_dim": 768,
"init_teacher_conv_layers": true,
"init_teacher_encoder_layers": true,
"layer_emb_size": 0,
"layer_norm_first": false,
"loss_type": "l1",
"model_type": "rd_distiller",
"n_tasks": 3,
"out_layer_inter_dim": -1,
"out_layer_type": "expand-last",
"pred_layer_id": [
4,
8,
12
],
"task_emb_size": 0,
"task_emb_type": "expand-last",
"transformers_version": "5.1.0"
}
|