| model: | |
| names: | |
| - fusion_mlp | |
| - hf_text | |
| - numerical_mlp | |
| - timm_image | |
| numerical_mlp: | |
| hidden_size: 128 | |
| activation: leaky_relu | |
| num_layers: 1 | |
| drop_rate: 0.1 | |
| normalization: layer_norm | |
| d_token: 8 | |
| embedding_arch: null | |
| data_types: | |
| - numerical | |
| merge: concat | |
| hf_text: | |
| checkpoint_name: local://hf_text | |
| gradient_checkpointing: false | |
| pooling_mode: cls | |
| data_types: | |
| - text | |
| tokenizer_name: hf_auto | |
| max_text_len: 512 | |
| insert_sep: true | |
| low_cpu_mem_usage: false | |
| text_segment_num: 2 | |
| stochastic_chunk: false | |
| text_aug_detect_length: 10 | |
| text_trivial_aug_maxscale: 0.0 | |
| text_train_augment_types: null | |
| timm_image: | |
| checkpoint_name: swin_base_patch4_window7_224 | |
| mix_choice: all_logits | |
| data_types: | |
| - image | |
| train_transforms: | |
| - resize_shorter_side | |
| - center_crop | |
| - trivial_augment | |
| val_transforms: | |
| - resize_shorter_side | |
| - center_crop | |
| image_norm: imagenet | |
| image_size: null | |
| max_img_num_per_col: 2 | |
| fusion_mlp: | |
| weight: 0.1 | |
| adapt_in_features: max | |
| hidden_sizes: | |
| - 128 | |
| activation: leaky_relu | |
| drop_rate: 0.1 | |
| normalization: layer_norm | |
| data_types: null | |
| data: | |
| image: | |
| missing_value_strategy: zero | |
| text: | |
| normalize_text: false | |
| categorical: | |
| minimum_cat_count: 100 | |
| maximum_num_cat: 20 | |
| convert_to_text: true | |
| numerical: | |
| convert_to_text: false | |
| scaler_with_mean: true | |
| scaler_with_std: true | |
| document: | |
| missing_value_strategy: zero | |
| label: | |
| numerical_label_preprocessing: standardscaler | |
| pos_label: null | |
| mixup: | |
| turn_on: false | |
| mixup_alpha: 0.8 | |
| cutmix_alpha: 1.0 | |
| cutmix_minmax: null | |
| prob: 1.0 | |
| switch_prob: 0.5 | |
| mode: batch | |
| turn_off_epoch: 5 | |
| label_smoothing: 0.1 | |
| templates: | |
| turn_on: false | |
| num_templates: 30 | |
| template_length: 2048 | |
| preset_templates: | |
| - super_glue | |
| - rte | |
| custom_templates: null | |
| optimization: | |
| optim_type: adamw | |
| learning_rate: 0.0001 | |
| weight_decay: 0.001 | |
| lr_choice: layerwise_decay | |
| lr_decay: 0.9 | |
| lr_schedule: cosine_decay | |
| max_epochs: 10 | |
| max_steps: -1 | |
| warmup_steps: 0.1 | |
| end_lr: 0 | |
| lr_mult: 1 | |
| patience: 10 | |
| val_check_interval: 0.5 | |
| check_val_every_n_epoch: 1 | |
| skip_final_val: false | |
| gradient_clip_val: 1 | |
| gradient_clip_algorithm: norm | |
| track_grad_norm: -1 | |
| log_every_n_steps: 10 | |
| top_k: 3 | |
| top_k_average_method: greedy_soup | |
| efficient_finetune: null | |
| lora: | |
| module_filter: null | |
| filter: | |
| - query | |
| - value | |
| - ^q$ | |
| - ^v$ | |
| - ^k$ | |
| - ^o$ | |
| r: 8 | |
| alpha: 8 | |
| loss_function: auto | |
| focal_loss: | |
| alpha: null | |
| gamma: 2.0 | |
| reduction: mean | |
| env: | |
| num_gpus: 1 | |
| num_nodes: 1 | |
| batch_size: 128 | |
| per_gpu_batch_size: 8 | |
| eval_batch_size_ratio: 4 | |
| per_gpu_batch_size_evaluation: null | |
| precision: 16 | |
| num_workers: 2 | |
| num_workers_evaluation: 2 | |
| fast_dev_run: false | |
| deterministic: false | |
| auto_select_gpus: true | |
| strategy: null | |
| deepspeed_allgather_size: 1000000000.0 | |
| deepspeed_allreduce_size: 1000000000.0 | |