File size: 1,688 Bytes
65e1f21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
{
  "batch_size": 96,
  "dev": [
    "data/ca_ancora-corefud-minidev.conllu",
    "data/cs_pcedt-corefud-minidev.conllu",
    "data/cs_pdt-corefud-minidev.conllu",
    "data/cs_pdtsc-corefud-minidev.conllu",
    "data/cu_proiel-corefud-minidev.conllu",
    "data/es_ancora-corefud-minidev.conllu",
    "data/grc_proiel-corefud-minidev.conllu",
    "data/hu_korkor-corefud-minidev.conllu",
    "data/hu_szegedkoref-corefud-minidev.conllu",
    "data/pl_pcc-corefud-minidev.conllu",
    "data/tr_itcc-corefud-minidev.conllu"
  ],
  "dropout": 0.5,
  "enodes_per_head": 2,
  "epochs": 20,
  "epochs_frozen": 2,
  "exp": "ls0.0-tw0.3-b96-s7",
  "label_smoothing": 0.0,
  "lazy_adam": true,
  "learning_rate": 1e-05,
  "learning_rate_decay": "cos",
  "learning_rate_warmup": 5000,
  "load": null,
  "logdir": "logs/ls0.0-tw0.3-b96-s7-crac2026_empty_nodes_baseline-260122_233254",
  "max_train_sentence_len": 120,
  "prediction_threshold": 0.5,
  "save_model": true,
  "seed": 7,
  "steps_per_epoch": 5000,
  "tags_min_occurrences": 2,
  "tags_weight": 0.3,
  "task_dim": 512,
  "task_hidden_layer": 2048,
  "test": [],
  "threads": 4,
  "train": [
    "data/ca_ancora-corefud-train.conllu",
    "data/cs_pcedt-corefud-train.conllu",
    "data/cs_pdt-corefud-train.conllu",
    "data/cs_pdtsc-corefud-train.conllu",
    "data/cu_proiel-corefud-train.conllu",
    "data/es_ancora-corefud-train.conllu",
    "data/grc_proiel-corefud-train.conllu",
    "data/hu_korkor-corefud-train.conllu",
    "data/hu_szegedkoref-corefud-train.conllu",
    "data/pl_pcc-corefud-train.conllu",
    "data/tr_itcc-corefud-train.conllu"
  ],
  "train_sampling_exponent": 0.5,
  "transformer": "xlm-roberta-large"
}