| multi_ctx_training: True |
| base_model: |
| (): pylate.models.ColBERT |
| model_name_or_path: "./models/GTE-ModernColBERT" |
| model_kwargs: |
| attn_implementation: "flash_attention_2" |
| torch_dtype: !ext torch.bfloat16 |
| document_length: 8192 |
|
|
| config: |
| (): contextual_embeddings.training.contextual_training.ContextualTrainingConfig |
| model: |
| (): contextual_embeddings.models.long_context_model.LongContextEmbeddingModel |
| base_model: !cfg base_model |
| multi_ctx_training: !cfg multi_ctx_training |
| lambda_seq: 0.1 |
| pooling_mode: "tokens" |
| multi_ctx_training: !cfg multi_ctx_training |
| colbert_tokenize: True |
| loss_type: "late_interaction" |
| exp_name: "mc_base" |
| n_gpus: 4 |
| output_dir: "./checkpoints/new_submission" |
| train_dataset: |
| (): contextual_embeddings.models.utils.get_long_context_dataset |
| base_model: !cfg base_model |
| eval_dataset: |
| mldr: |
| (): contextual_embeddings.models.utils.get_chunked_mldr_split |
| path: "data_dir/chunked-mldr-big" |
| split: "test" |
| base_model: !cfg base_model |
| squad: |
| (): contextual_embeddings.models.utils.get_chunked_mldr_split |
| path: "data_dir/squad" |
| split: "validation" |
| base_model: !cfg base_model |
| all_queries: False |
| narrative_qa: |
| (): contextual_embeddings.models.utils.get_chunked_mldr_split |
| path: "data_dir/narrative_qa" |
| split: "test" |
| base_model: !cfg base_model |
| all_queries: False |
| run_train: True |
| training_args: |
| (): sentence_transformers.SentenceTransformerTrainingArguments |
| output_dir: null |
| overwrite_output_dir: true |
| num_train_epochs: 2 |
| per_device_train_batch_size: 4 |
| per_device_eval_batch_size: 4 |
| fp16: False |
| bf16: True |
| learning_rate: 5e-5 |
| warmup_steps: 55 |
| lr_scheduler_type: "cosine" |
| eval_strategy: "steps" |
| eval_on_start: True |
| eval_steps: 100 |
| logging_steps: 10 |
| report_to: "wandb" |
|
|