| { | |
| "dataset_args": { | |
| "max_utterances": 20, | |
| "max_tokens": 512, | |
| "dataroot": "data/all/fact_full/nlu" | |
| }, | |
| "task": "linking", | |
| "model_name_or_path": "microsoft/deberta-v3-large", | |
| "per_gpu_train_batch_size": 2, | |
| "per_gpu_eval_batch_size": 2, | |
| "gradient_accumulation_steps": 1, | |
| "learning_rate": 2e-07, | |
| "adam_epsilon": 1e-08, | |
| "max_grad_norm": 1.0, | |
| "num_train_epochs": 20, | |
| "warmup_steps": 0, | |
| "fp16": "", | |
| "seed": 47 | |
| } |