Rubyando59's picture
Best Recall@10: 0.8666 at step 250
c5327d8 verified
data:
data_dir: sqale_dataset_mined
max_query_length: 128
max_table_length: 384
num_negatives_per_positive: 15
train_split: 0.9
val_split: 0.05
test_split: 0.05
seed: 666
max_samples: null
max_val_samples: 10000
complexity_ratios:
easy: 0.2
medium: 0.3
hard: 0.5
model:
name: BAAI/bge-reranker-v2-m3
gradient_checkpointing: false
torch_dtype: bfloat16
trust_remote_code: true
training:
output_dir: outputs/reranker_v2_curriculum
batch_size: 4
gradient_accumulation_steps: 4
learning_rate: 2.0e-05
warmup_ratio: 0.1
num_epochs: 5
weight_decay: 0.01
max_grad_norm: 1.0
loss_type: focal
temperature: 1.0
margin: 1.0
wandb_project: sqale-table-reranker
wandb_run_name: null
log_every_n_steps: 10
eval_every_n_steps: 250
save_every_n_steps: 500
device: cuda
mixed_precision: true
cudnn_benchmark: true
tf32: true
torch_compile: false
torch_compile_mode: reduce-overhead
flash_attention: false
push_to_hub: true
hub_model_id: Rubyando59/sqale-reranker-curriculum
hub_private_repo: true
hub_token: null
eval:
k_values:
- 1
- 3
- 5
- 10
batch_size: 32