Milan Straka
Initial upload of the model.
65e1f21
{
"batch_size": 96,
"dev": [
"data/ca_ancora-corefud-minidev.conllu",
"data/cs_pcedt-corefud-minidev.conllu",
"data/cs_pdt-corefud-minidev.conllu",
"data/cs_pdtsc-corefud-minidev.conllu",
"data/cu_proiel-corefud-minidev.conllu",
"data/es_ancora-corefud-minidev.conllu",
"data/grc_proiel-corefud-minidev.conllu",
"data/hu_korkor-corefud-minidev.conllu",
"data/hu_szegedkoref-corefud-minidev.conllu",
"data/pl_pcc-corefud-minidev.conllu",
"data/tr_itcc-corefud-minidev.conllu"
],
"dropout": 0.5,
"enodes_per_head": 2,
"epochs": 20,
"epochs_frozen": 2,
"exp": "ls0.0-tw0.3-b96-s7",
"label_smoothing": 0.0,
"lazy_adam": true,
"learning_rate": 1e-05,
"learning_rate_decay": "cos",
"learning_rate_warmup": 5000,
"load": null,
"logdir": "logs/ls0.0-tw0.3-b96-s7-crac2026_empty_nodes_baseline-260122_233254",
"max_train_sentence_len": 120,
"prediction_threshold": 0.5,
"save_model": true,
"seed": 7,
"steps_per_epoch": 5000,
"tags_min_occurrences": 2,
"tags_weight": 0.3,
"task_dim": 512,
"task_hidden_layer": 2048,
"test": [],
"threads": 4,
"train": [
"data/ca_ancora-corefud-train.conllu",
"data/cs_pcedt-corefud-train.conllu",
"data/cs_pdt-corefud-train.conllu",
"data/cs_pdtsc-corefud-train.conllu",
"data/cu_proiel-corefud-train.conllu",
"data/es_ancora-corefud-train.conllu",
"data/grc_proiel-corefud-train.conllu",
"data/hu_korkor-corefud-train.conllu",
"data/hu_szegedkoref-corefud-train.conllu",
"data/pl_pcc-corefud-train.conllu",
"data/tr_itcc-corefud-train.conllu"
],
"train_sampling_exponent": 0.5,
"transformer": "xlm-roberta-large"
}