| local env = import "../env.jsonnet"; |
|
|
| local dataset_path = env.str("DATA_PATH", "data/ace/events"); |
| local ontology_path = "data/ace/ontology.tsv"; |
|
|
| local debug = false; |
|
|
| # embedding |
| local label_dim = 64; |
| local pretrained_model = env.str("ENCODER", "roberta-large"); |
|
|
| # module |
| local dropout = 0.2; |
| local bio_dim = 512; |
| local bio_layers = 2; |
| local span_typing_dims = [256, 256]; |
| local event_smoothing_factor = env.json("SMOOTHING", "0.0"); |
| local arg_smoothing_factor = env.json("SMOOTHING", "0.0"); |
| local layer_fix = 0; |
|
|
| # training |
| local typing_loss_factor = 8.0; |
| local grad_acc = env.json("GRAD_ACC", "1"); |
| local max_training_tokens = 512; |
| local max_inference_tokens = 1024; |
| local lr = env.json("LR", "1e-3"); |
| local cuda_devices = env.json("CUDA_DEVICES", "[0]"); |
|
|
| { |
| dataset_reader: { |
| type: "concrete", |
| debug: debug, |
| pretrained_model: pretrained_model, |
| ignore_label: false, |
| [ if debug then "max_instances" ]: 128, |
| event_smoothing_factor: event_smoothing_factor, |
| arg_smoothing_factor: event_smoothing_factor, |
| }, |
| train_data_path: dataset_path + "/train.tar.gz", |
| validation_data_path: dataset_path + "/dev.tar.gz", |
| test_data_path: dataset_path + "/test.tar.gz", |
|
|
| datasets_for_vocab_creation: ["train"], |
|
|
| data_loader: { |
| batch_sampler: { |
| type: "max_tokens_sampler", |
| max_tokens: max_training_tokens, |
| sorting_keys: ['tokens'] |
| } |
| }, |
|
|
| validation_data_loader: { |
| batch_sampler: { |
| type: "max_tokens_sampler", |
| max_tokens: max_inference_tokens, |
| sorting_keys: ['tokens'] |
| } |
| }, |
|
|
| model: { |
| type: "span", |
| word_embedding: { |
| token_embedders: { |
| "pieces": { |
| type: "pretrained_transformer", |
| model_name: pretrained_model, |
| } |
| }, |
| }, |
| span_extractor: { |
| type: 'combo', |
| sub_extractors: [ |
| { |
| type: 'self_attentive', |
| }, |
| { |
| type: 'bidirectional_endpoint', |
| } |
| ] |
| }, |
| span_finder: { |
| type: "bio", |
| bio_encoder: { |
| type: "lstm", |
| hidden_size: bio_dim, |
| num_layers: bio_layers, |
| bidirectional: true, |
| dropout: dropout, |
| }, |
| no_label: false, |
| }, |
| span_typing: { |
| type: 'mlp', |
| hidden_dims: span_typing_dims, |
| }, |
| metrics: [{type: "srl"}], |
|
|
| ontology_path: ontology_path, |
| typing_loss_factor: typing_loss_factor, |
| label_dim: label_dim, |
| max_decoding_spans: 128, |
| max_recursion_depth: 2, |
| debug: debug, |
| }, |
|
|
| trainer: { |
| num_epochs: 128, |
| patience: null, |
| [if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0], |
| validation_metric: "+arg-c_f", |
| num_gradient_accumulation_steps: grad_acc, |
| optimizer: { |
| type: "transformer", |
| base: { |
| type: "adam", |
| lr: lr, |
| }, |
| embeddings_lr: 0.0, |
| encoder_lr: 1e-5, |
| pooler_lr: 1e-5, |
| layer_fix: layer_fix, |
| } |
| }, |
|
|
| cuda_devices:: cuda_devices, |
| [if std.length(cuda_devices) > 1 then "distributed"]: { |
| "cuda_devices": cuda_devices |
| }, |
| [if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true, |
| } |
|
|