| { | |
| "model_name": "microsoft/deberta-v3-large", | |
| "num_labels": 2, | |
| "train_file": "train_windows.jsonl", | |
| "val_file": "val_windows.jsonl", | |
| "max_length": 512, | |
| "batch_size": 8, | |
| "gradient_accumulation_steps": 8, | |
| "num_epochs": 3, | |
| "learning_rate": 1e-06, | |
| "warmup_ratio": 0.1, | |
| "weight_decay": 0.01, | |
| "max_grad_norm": 1.0, | |
| "label_smoothing": 0.0, | |
| "device": "cuda", | |
| "num_workers": 0, | |
| "seed": 42, | |
| "bf16": true, | |
| "logging_steps": 1, | |
| "eval_steps": 5000, | |
| "save_steps": 10000, | |
| "output_dir": "./deberta_link_output", | |
| "wandb_project": "deberta-link-classification", | |
| "wandb_name": "deberta-v3-large-link-tokens", | |
| "patience": 2, | |
| "min_delta": 0.0001, | |
| "max_checkpoints": 5, | |
| "protect_latest_epoch_step": true | |
| } |