data: data_dir: sqale_dataset_mined max_query_length: 128 max_table_length: 384 num_negatives_per_positive: 15 train_split: 0.9 val_split: 0.05 test_split: 0.05 seed: 666 max_samples: null max_val_samples: 10000 complexity_ratios: easy: 0.2 medium: 0.3 hard: 0.5 model: name: BAAI/bge-reranker-v2-m3 gradient_checkpointing: false torch_dtype: bfloat16 trust_remote_code: true training: output_dir: outputs/reranker_v2_curriculum batch_size: 4 gradient_accumulation_steps: 4 learning_rate: 2.0e-05 warmup_ratio: 0.1 num_epochs: 5 weight_decay: 0.01 max_grad_norm: 1.0 loss_type: focal temperature: 1.0 margin: 1.0 wandb_project: sqale-table-reranker wandb_run_name: null log_every_n_steps: 10 eval_every_n_steps: 250 save_every_n_steps: 500 device: cuda mixed_precision: true cudnn_benchmark: true tf32: true torch_compile: false torch_compile_mode: reduce-overhead flash_attention: false push_to_hub: true hub_model_id: Rubyando59/sqale-reranker-curriculum hub_private_repo: true hub_token: null eval: k_values: - 1 - 3 - 5 - 10 batch_size: 32