| { | |
| "batch_size": 96, | |
| "dev": [ | |
| "data/ca_ancora-corefud-minidev.conllu", | |
| "data/cs_pcedt-corefud-minidev.conllu", | |
| "data/cs_pdt-corefud-minidev.conllu", | |
| "data/cs_pdtsc-corefud-minidev.conllu", | |
| "data/cu_proiel-corefud-minidev.conllu", | |
| "data/es_ancora-corefud-minidev.conllu", | |
| "data/grc_proiel-corefud-minidev.conllu", | |
| "data/hu_korkor-corefud-minidev.conllu", | |
| "data/hu_szegedkoref-corefud-minidev.conllu", | |
| "data/pl_pcc-corefud-minidev.conllu", | |
| "data/tr_itcc-corefud-minidev.conllu" | |
| ], | |
| "dropout": 0.5, | |
| "enodes_per_head": 2, | |
| "epochs": 20, | |
| "epochs_frozen": 2, | |
| "exp": "ls0.0-tw0.3-b96-s7", | |
| "label_smoothing": 0.0, | |
| "lazy_adam": true, | |
| "learning_rate": 1e-05, | |
| "learning_rate_decay": "cos", | |
| "learning_rate_warmup": 5000, | |
| "load": null, | |
| "logdir": "logs/ls0.0-tw0.3-b96-s7-crac2026_empty_nodes_baseline-260122_233254", | |
| "max_train_sentence_len": 120, | |
| "prediction_threshold": 0.5, | |
| "save_model": true, | |
| "seed": 7, | |
| "steps_per_epoch": 5000, | |
| "tags_min_occurrences": 2, | |
| "tags_weight": 0.3, | |
| "task_dim": 512, | |
| "task_hidden_layer": 2048, | |
| "test": [], | |
| "threads": 4, | |
| "train": [ | |
| "data/ca_ancora-corefud-train.conllu", | |
| "data/cs_pcedt-corefud-train.conllu", | |
| "data/cs_pdt-corefud-train.conllu", | |
| "data/cs_pdtsc-corefud-train.conllu", | |
| "data/cu_proiel-corefud-train.conllu", | |
| "data/es_ancora-corefud-train.conllu", | |
| "data/grc_proiel-corefud-train.conllu", | |
| "data/hu_korkor-corefud-train.conllu", | |
| "data/hu_szegedkoref-corefud-train.conllu", | |
| "data/pl_pcc-corefud-train.conllu", | |
| "data/tr_itcc-corefud-train.conllu" | |
| ], | |
| "train_sampling_exponent": 0.5, | |
| "transformer": "xlm-roberta-large" | |
| } |