| { | |
| "dataset_reader": { | |
| "type": "qasper", | |
| "for_training": true, | |
| "max_document_length": 15360, | |
| "transformer_model_name": ".../qasper-ckpt/qasper_trained_led_base_hf_serialized/" | |
| }, | |
| "model": { | |
| "type": "qasper_baseline", | |
| "attention_dropout": 0.1, | |
| "attention_window_size": 1536, | |
| "gradient_checkpointing": true, | |
| "transformer_model_name": ".../qasper-ckpt/qasper_trained_led_base_hf_serialized/", | |
| "use_evidence_scaffold": true | |
| }, | |
| "train_data_path": ".../ours_led_qa_binary/ours_qa_train.json", | |
| "validation_data_path": ".../ours_led_qa_binary/ours_qa_dev.json", | |
| "trainer": { | |
| "callbacks": [ | |
| { | |
| "type": "tensorboard" | |
| } | |
| ], | |
| "cuda_device": 0, | |
| "enable_default_callbacks": false, | |
| "grad_clipping": 1, | |
| "learning_rate_scheduler": { | |
| "type": "slanted_triangular", | |
| "cut_frac": 0.1, | |
| "num_epochs": 5, | |
| "num_steps_per_epoch": 255 | |
| }, | |
| "num_epochs": 5, | |
| "num_gradient_accumulation_steps": 16, | |
| "optimizer": { | |
| "type": "adam", | |
| "lr": 3e-05 | |
| }, | |
| "patience": 2, | |
| "use_amp": true, | |
| "validation_metric": "+answer_f1" | |
| }, | |
| "vocabulary": { | |
| "type": "empty" | |
| }, | |
| "data_loader": { | |
| "batch_size": 1 | |
| }, | |
| "pytorch_seed": 15371, | |
| "validation_dataset_reader": { | |
| "type": "qasper", | |
| "for_training": false, | |
| "max_document_length": 15360, | |
| "transformer_model_name": ".../qasper-ckpt/qasper_trained_led_base_hf_serialized/" | |
| } | |
| } |