{ "dataset_reader": { "type": "qasper", "for_training": true, "max_document_length": 15360, "transformer_model_name": ".../qasper-ckpt/qasper_trained_led_base_hf_serialized/" }, "model": { "type": "qasper_baseline", "attention_dropout": 0.1, "attention_window_size": 1536, "gradient_checkpointing": true, "transformer_model_name": ".../qasper-ckpt/qasper_trained_led_base_hf_serialized/", "use_evidence_scaffold": true }, "train_data_path": ".../ours_led_qa_binary/ours_qa_train.json", "validation_data_path": ".../ours_led_qa_binary/ours_qa_dev.json", "trainer": { "callbacks": [ { "type": "tensorboard" } ], "cuda_device": 0, "enable_default_callbacks": false, "grad_clipping": 1, "learning_rate_scheduler": { "type": "slanted_triangular", "cut_frac": 0.1, "num_epochs": 5, "num_steps_per_epoch": 255 }, "num_epochs": 5, "num_gradient_accumulation_steps": 16, "optimizer": { "type": "adam", "lr": 3e-05 }, "patience": 2, "use_amp": true, "validation_metric": "+answer_f1" }, "vocabulary": { "type": "empty" }, "data_loader": { "batch_size": 1 }, "pytorch_seed": 15371, "validation_dataset_reader": { "type": "qasper", "for_training": false, "max_document_length": 15360, "transformer_model_name": ".../qasper-ckpt/qasper_trained_led_base_hf_serialized/" } }