| # f1/exact_match = 88.52/81.22 -> 81.5137/88.6696 | |
| CUDA_VISIBLE_DEVICES=0 python run_qa.py \ | |
| --model_name_or_path google-bert/bert-base-uncased \ | |
| --dataset_name squad \ | |
| --do_train \ | |
| --do_eval \ | |
| --per_device_train_batch_size 12 \ | |
| --learning_rate 3e-5 \ | |
| --num_train_epochs 2 \ | |
| --max_seq_length 384 \ | |
| --doc_stride 128 \ | |
| --output_dir ./baseline \ | |
| --overwrite_output_dir \ | |
| --apply-trp --trp-depths 1 --trp-p 0.1 --trp-lambdas 0.4 0.2 0.1 | |