data:
  data_dir: sqale_dataset_mined
  max_query_length: 128
  max_table_length: 384
  num_negatives_per_positive: 15
  train_split: 0.9
  val_split: 0.05
  test_split: 0.05
  seed: 666
  max_samples: null
  max_val_samples: 10000
  complexity_ratios:
    easy: 0.2
    medium: 0.3
    hard: 0.5
model:
  name: BAAI/bge-reranker-v2-m3
  gradient_checkpointing: false
  torch_dtype: bfloat16
  trust_remote_code: true
training:
  output_dir: outputs/reranker_v2_curriculum
  batch_size: 4
  gradient_accumulation_steps: 4
  learning_rate: 2.0e-05
  warmup_ratio: 0.1
  num_epochs: 5
  weight_decay: 0.01
  max_grad_norm: 1.0
  loss_type: focal
  temperature: 1.0
  margin: 1.0
  wandb_project: sqale-table-reranker
  wandb_run_name: null
  log_every_n_steps: 10
  eval_every_n_steps: 250
  save_every_n_steps: 500
  device: cuda
  mixed_precision: true
  cudnn_benchmark: true
  tf32: true
  torch_compile: false
  torch_compile_mode: reduce-overhead
  flash_attention: false
  push_to_hub: true
  hub_model_id: Rubyando59/sqale-reranker-curriculum
  hub_private_repo: true
  hub_token: null
eval:
  k_values:
  - 1
  - 3
  - 5
  - 10
  batch_size: 32