File size: 1,168 Bytes
52fa787
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0494379
52fa787
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
run_name: bert-tiny-stage2-sbert
model_name: QuangDuy/bert-tiny-stage2-hf
output_dir: outputs/bert-tiny-stage2-sbert

train_dataset:
  - batmangiaicuuthegioi/zalo-legal-triplets
  - QuangDuy/mmarco-vi-hard-negatives
train_split: train
eval_dataset: another-symato/VMTEB-Zalo-legel-retrieval-wseg
eval_corpus_config: corpus
eval_queries_config: queries
eval_labels_config: data_ir
eval_split: train

seed: 42
max_seq_length: 512
pooling: mean
normalize_embeddings: true
include_hard_negatives: true

num_train_epochs: 5
train_batch_size: 64
learning_rate: 2.0e-5
warmup_ratio: 0.1
weight_decay: 0.01
precision: bf16
use_amp: true
use_cached_mnrl: false

validation_size: 0.05
validation_subset: null
evaluation_steps: 2000
checkpoint_save_steps: 2000
checkpoint_save_total_limit: 5
early_stopping_patience: 4
hf_repo_id: QuangDuy/bert-tiny-stage2-sbert
hf_private: false
hf_push_on_save: true
run_retrieval_eval_after_train: true
retrieval_eval_limit_queries: null
retrieval_eval_extra_corpus_docs: null

matryoshka_dims:
  - 384
  - 256
  - 128
  - 64

truncate_dims:
  - 384
  - 256
  - 128
  - 64

top_k:
  - 1
  - 3
  - 5
  - 10

map_at_k: 100
eval_batch_size: 128