| data: | |
| data_dir: sqale_dataset_mined | |
| max_query_length: 128 | |
| max_table_length: 384 | |
| num_negatives_per_positive: 15 | |
| train_split: 0.9 | |
| val_split: 0.05 | |
| test_split: 0.05 | |
| seed: 666 | |
| max_samples: null | |
| max_val_samples: 10000 | |
| complexity_ratios: | |
| easy: 0.2 | |
| medium: 0.3 | |
| hard: 0.5 | |
| model: | |
| name: BAAI/bge-reranker-v2-m3 | |
| gradient_checkpointing: false | |
| torch_dtype: bfloat16 | |
| trust_remote_code: true | |
| training: | |
| output_dir: outputs/reranker_v2_curriculum | |
| batch_size: 4 | |
| gradient_accumulation_steps: 4 | |
| learning_rate: 2.0e-05 | |
| warmup_ratio: 0.1 | |
| num_epochs: 5 | |
| weight_decay: 0.01 | |
| max_grad_norm: 1.0 | |
| loss_type: focal | |
| temperature: 1.0 | |
| margin: 1.0 | |
| wandb_project: sqale-table-reranker | |
| wandb_run_name: null | |
| log_every_n_steps: 10 | |
| eval_every_n_steps: 250 | |
| save_every_n_steps: 500 | |
| device: cuda | |
| mixed_precision: true | |
| cudnn_benchmark: true | |
| tf32: true | |
| torch_compile: false | |
| torch_compile_mode: reduce-overhead | |
| flash_attention: false | |
| push_to_hub: true | |
| hub_model_id: Rubyando59/sqale-reranker-curriculum | |
| hub_private_repo: true | |
| hub_token: null | |
| eval: | |
| k_values: | |
| - 1 | |
| - 3 | |
| - 5 | |
| - 10 | |
| batch_size: 32 | |