| { | |
| "model": { | |
| "names": [ | |
| "hf_text" | |
| ], | |
| "hf_text": { | |
| "checkpoint_name": "microsoft/deberta-v3-base", | |
| "gradient_checkpointing": false, | |
| "pooling_mode": "cls", | |
| "data_types": [ | |
| "text" | |
| ], | |
| "tokenizer_name": "hf_auto", | |
| "max_text_len": 512, | |
| "insert_sep": true, | |
| "text_segment_num": 2, | |
| "stochastic_chunk": false, | |
| "text_aug_detect_length": 10, | |
| "text_trivial_aug_maxscale": 0, | |
| "text_train_augment_types": null | |
| } | |
| }, | |
| "data": { | |
| "image": { | |
| "missing_value_strategy": "skip" | |
| }, | |
| "text": null, | |
| "categorical": { | |
| "minimum_cat_count": 100, | |
| "maximum_num_cat": 20, | |
| "convert_to_text": true | |
| }, | |
| "numerical": { | |
| "convert_to_text": false, | |
| "scaler_with_mean": true, | |
| "scaler_with_std": true | |
| }, | |
| "label": { | |
| "numerical_label_preprocessing": "standardscaler" | |
| }, | |
| "pos_label": null, | |
| "mixup": { | |
| "turn_on": false, | |
| "mixup_alpha": 0.8, | |
| "cutmix_alpha": 1, | |
| "cutmix_minmax": null, | |
| "prob": 1, | |
| "switch_prob": 0.5, | |
| "mode": "batch", | |
| "turn_off_epoch": 5, | |
| "label_smoothing": 0.1 | |
| } | |
| }, | |
| "optimization": { | |
| "optim_type": "adamw", | |
| "learning_rate": 0.0001, | |
| "weight_decay": 0.001, | |
| "lr_choice": "layerwise_decay", | |
| "lr_decay": 0.9, | |
| "lr_schedule": "cosine_decay", | |
| "max_epochs": 10, | |
| "max_steps": -1, | |
| "warmup_steps": 0.1, | |
| "end_lr": 0, | |
| "lr_mult": 1, | |
| "patience": 10, | |
| "val_check_interval": 0.5, | |
| "gradient_clip_val": 1, | |
| "gradient_clip_algorithm": "norm", | |
| "track_grad_norm": -1, | |
| "log_every_n_steps": 10, | |
| "top_k": 3, | |
| "top_k_average_method": "greedy_soup", | |
| "efficient_finetune": null, | |
| "lora": { | |
| "r": 8, | |
| "alpha": 8, | |
| "filter": [ | |
| "query", | |
| "value", | |
| "^q$", | |
| "^v$", | |
| "^k$", | |
| "^o$" | |
| ] | |
| }, | |
| "loss_function": "auto" | |
| }, | |
| "env": { | |
| "num_gpus": -1, | |
| "num_nodes": 1, | |
| "batch_size": 128, | |
| "per_gpu_batch_size": 2, | |
| "eval_batch_size_ratio": 4, | |
| "per_gpu_batch_size_evaluation": null, | |
| "precision": 16, | |
| "num_workers": 2, | |
| "num_workers_evaluation": 2, | |
| "fast_dev_run": false, | |
| "deterministic": false, | |
| "auto_select_gpus": true, | |
| "strategy": "ddp_spawn" | |
| } | |
| } | |