File size: 1,169 Bytes
8ff90c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
{
  "activation_dropout": 0.1,
  "activation_fn": "gelu",
  "architectures": [
    "RDDistillerModel"
  ],
  "attention_dropout": 0.1,
  "attention_type": "original",
  "auto_map": {
    "AutoConfig": "configuration_distiller.DistillerConfig",
    "AutoModel": "distiller_model.RDDistillerModel"
  },
  "conv_pos": 128,
  "conv_pos_groups": 16,
  "cosine_loss": 1.0,
  "dropout": 0.1,
  "dtype": "float32",
  "encoder_attention_heads": 12,
  "encoder_embed_dim": 768,
  "encoder_ffn_embed_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 2,
  "extractor_conv_feature_layers": "[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2",
  "extractor_dropout": 0.0,
  "extractor_mode": "default",
  "feat_pen_loss": 0.0,
  "feature_grad_mult": 0.1,
  "final_dim": 768,
  "init_teacher_conv_layers": true,
  "init_teacher_encoder_layers": true,
  "layer_emb_size": 0,
  "layer_norm_first": false,
  "loss_type": "l1",
  "model_type": "rd_distiller",
  "n_tasks": 3,
  "out_layer_inter_dim": -1,
  "out_layer_type": "expand-last",
  "pred_layer_id": [
    4,
    8,
    12
  ],
  "task_emb_size": 0,
  "task_emb_type": "expand-last",
  "transformers_version": "5.1.0"
}