| { | |
| "model_name": "microsoft/deberta-v3-large", | |
| "lr": 6e-06, | |
| "epochs": 1, | |
| "weight_decay": 0.01, | |
| "train_bs": 3, | |
| "eval_bs": 8, | |
| "gradient_accumulation_steps": 8, | |
| "gradient_checkpointing": false, | |
| "eval_datasets": [ | |
| {"path": "./data/all_eval/osu_eval.csv", "name": "osu"}, | |
| {"path": "./data/all_eval/healthver_eval.csv", "name": "healthver"}, | |
| {"path": "./data/all_eval/deepset_1_eval.csv", "name": "deepset_1"}, | |
| {"path": "./data/all_eval/deepset_2_eval.csv", "name": "deepset_2"}, | |
| {"path": "./data/all_eval/deepset_3_eval.csv", "name": "deepset_3"}, | |
| {"path": "./data/all_eval/deepset_4_eval.csv", "name": "deepset_4"} | |
| ], | |
| "train_dataset_path": "./data/all_training/all_train_v2_pseudo.csv", | |
| "ensemble_model_predictions": [ | |
| "mathislucka/deberta-large-hallucination-eval-v2", | |
| "mathislucka/deberta-base-hallucination-eval-v2", | |
| "models/albert-xxlarge-v2-optim-data-v1", | |
| "models/deberta-base-v3-no-atomic-wfc-nq", | |
| "models/deberta-v3-large-data-optim-v2" | |
| ], | |
| "half_precision": true | |
| } |