Add finetuned model
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +4 -0
- 1_Pooling/config.json +10 -0
- README.md +0 -0
- checkpoint-196/1_Pooling/config.json +10 -0
- checkpoint-196/README.md +0 -0
- checkpoint-196/config.json +27 -0
- checkpoint-196/config_sentence_transformers.json +14 -0
- checkpoint-196/model.safetensors +3 -0
- checkpoint-196/modules.json +20 -0
- checkpoint-196/optimizer.pt +3 -0
- checkpoint-196/rng_state.pth +3 -0
- checkpoint-196/scheduler.pt +3 -0
- checkpoint-196/sentence_bert_config.json +4 -0
- checkpoint-196/sentencepiece.bpe.model +3 -0
- checkpoint-196/special_tokens_map.json +51 -0
- checkpoint-196/tokenizer.json +3 -0
- checkpoint-196/tokenizer_config.json +62 -0
- checkpoint-196/trainer_state.json +1611 -0
- checkpoint-196/training_args.bin +3 -0
- checkpoint-294/1_Pooling/config.json +10 -0
- checkpoint-294/README.md +0 -0
- checkpoint-294/config.json +27 -0
- checkpoint-294/config_sentence_transformers.json +14 -0
- checkpoint-294/model.safetensors +3 -0
- checkpoint-294/modules.json +20 -0
- checkpoint-294/optimizer.pt +3 -0
- checkpoint-294/rng_state.pth +3 -0
- checkpoint-294/scheduler.pt +3 -0
- checkpoint-294/sentence_bert_config.json +4 -0
- checkpoint-294/sentencepiece.bpe.model +3 -0
- checkpoint-294/special_tokens_map.json +51 -0
- checkpoint-294/tokenizer.json +3 -0
- checkpoint-294/tokenizer_config.json +62 -0
- checkpoint-294/trainer_state.json +2395 -0
- checkpoint-294/training_args.bin +3 -0
- checkpoint-98/1_Pooling/config.json +10 -0
- checkpoint-98/README.md +1621 -0
- checkpoint-98/config.json +27 -0
- checkpoint-98/config_sentence_transformers.json +14 -0
- checkpoint-98/model.safetensors +3 -0
- checkpoint-98/modules.json +20 -0
- checkpoint-98/optimizer.pt +3 -0
- checkpoint-98/rng_state.pth +3 -0
- checkpoint-98/scheduler.pt +3 -0
- checkpoint-98/sentence_bert_config.json +4 -0
- checkpoint-98/sentencepiece.bpe.model +3 -0
- checkpoint-98/special_tokens_map.json +51 -0
- checkpoint-98/tokenizer.json +3 -0
- checkpoint-98/tokenizer_config.json +62 -0
- checkpoint-98/trainer_state.json +827 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
checkpoint-196/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
checkpoint-294/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
checkpoint-98/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 1024,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": true,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
README.md
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-196/1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 1024,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": true,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
checkpoint-196/README.md
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-196/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"XLMRobertaModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"eos_token_id": 2,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 1024,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 4096,
|
| 14 |
+
"layer_norm_eps": 1e-05,
|
| 15 |
+
"max_position_embeddings": 514,
|
| 16 |
+
"model_type": "xlm-roberta",
|
| 17 |
+
"num_attention_heads": 16,
|
| 18 |
+
"num_hidden_layers": 24,
|
| 19 |
+
"output_past": true,
|
| 20 |
+
"pad_token_id": 1,
|
| 21 |
+
"position_embedding_type": "absolute",
|
| 22 |
+
"torch_dtype": "float32",
|
| 23 |
+
"transformers_version": "4.51.3",
|
| 24 |
+
"type_vocab_size": 1,
|
| 25 |
+
"use_cache": true,
|
| 26 |
+
"vocab_size": 250002
|
| 27 |
+
}
|
checkpoint-196/config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_type": "SentenceTransformer",
|
| 3 |
+
"__version__": {
|
| 4 |
+
"sentence_transformers": "5.1.0",
|
| 5 |
+
"transformers": "4.51.3",
|
| 6 |
+
"pytorch": "2.8.0+cu126"
|
| 7 |
+
},
|
| 8 |
+
"prompts": {
|
| 9 |
+
"query": "",
|
| 10 |
+
"document": ""
|
| 11 |
+
},
|
| 12 |
+
"default_prompt_name": null,
|
| 13 |
+
"similarity_fn_name": "cosine"
|
| 14 |
+
}
|
checkpoint-196/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:86fee9b378922f1db9f68cf51a4941e02dfb183276ac89a16c8edbed98e30b9e
|
| 3 |
+
size 2239607176
|
checkpoint-196/modules.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"idx": 2,
|
| 16 |
+
"name": "2",
|
| 17 |
+
"path": "2_Normalize",
|
| 18 |
+
"type": "sentence_transformers.models.Normalize"
|
| 19 |
+
}
|
| 20 |
+
]
|
checkpoint-196/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c03ddaba15f7dd601cacfb32507cfd2ceb73de3b6a0540a718091ec7eb2e678
|
| 3 |
+
size 4471067142
|
checkpoint-196/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa08bd9dd367cde376d15e8b982d14cd6729eae58ce75d651531d783eb6f5977
|
| 3 |
+
size 14645
|
checkpoint-196/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9df0dbf279a8d0448c4e72f58a6993855330283a91196a4153f7f62130d091c0
|
| 3 |
+
size 1465
|
checkpoint-196/sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 512,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
checkpoint-196/sentencepiece.bpe.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
|
| 3 |
+
size 5069051
|
checkpoint-196/special_tokens_map.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"cls_token": {
|
| 10 |
+
"content": "<s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"eos_token": {
|
| 17 |
+
"content": "</s>",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"mask_token": {
|
| 24 |
+
"content": "<mask>",
|
| 25 |
+
"lstrip": true,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"pad_token": {
|
| 31 |
+
"content": "<pad>",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
},
|
| 37 |
+
"sep_token": {
|
| 38 |
+
"content": "</s>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false
|
| 43 |
+
},
|
| 44 |
+
"unk_token": {
|
| 45 |
+
"content": "<unk>",
|
| 46 |
+
"lstrip": false,
|
| 47 |
+
"normalized": false,
|
| 48 |
+
"rstrip": false,
|
| 49 |
+
"single_word": false
|
| 50 |
+
}
|
| 51 |
+
}
|
checkpoint-196/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:883b037111086fd4dfebbbc9b7cee11e1517b5e0c0514879478661440f137085
|
| 3 |
+
size 17082987
|
checkpoint-196/tokenizer_config.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<s>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<pad>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "</s>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"250001": {
|
| 36 |
+
"content": "<mask>",
|
| 37 |
+
"lstrip": true,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"bos_token": "<s>",
|
| 45 |
+
"clean_up_tokenization_spaces": true,
|
| 46 |
+
"cls_token": "<s>",
|
| 47 |
+
"eos_token": "</s>",
|
| 48 |
+
"extra_special_tokens": {},
|
| 49 |
+
"mask_token": "<mask>",
|
| 50 |
+
"max_length": 512,
|
| 51 |
+
"model_max_length": 512,
|
| 52 |
+
"pad_to_multiple_of": null,
|
| 53 |
+
"pad_token": "<pad>",
|
| 54 |
+
"pad_token_type_id": 0,
|
| 55 |
+
"padding_side": "right",
|
| 56 |
+
"sep_token": "</s>",
|
| 57 |
+
"stride": 0,
|
| 58 |
+
"tokenizer_class": "XLMRobertaTokenizer",
|
| 59 |
+
"truncation_side": "right",
|
| 60 |
+
"truncation_strategy": "longest_first",
|
| 61 |
+
"unk_token": "<unk>"
|
| 62 |
+
}
|
checkpoint-196/trainer_state.json
ADDED
|
@@ -0,0 +1,1611 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 98,
|
| 3 |
+
"best_metric": 0.3312285498294292,
|
| 4 |
+
"best_model_checkpoint": "intfloat/multilingual-e5-large/checkpoint-98",
|
| 5 |
+
"epoch": 2.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 196,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.01020408163265306,
|
| 14 |
+
"grad_norm": 973.273681640625,
|
| 15 |
+
"learning_rate": 0.0,
|
| 16 |
+
"loss": 15.8588,
|
| 17 |
+
"step": 1
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.02040816326530612,
|
| 21 |
+
"grad_norm": 1016.8517456054688,
|
| 22 |
+
"learning_rate": 1.0204081632653061e-07,
|
| 23 |
+
"loss": 10.7411,
|
| 24 |
+
"step": 2
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 0.030612244897959183,
|
| 28 |
+
"grad_norm": 166.88465881347656,
|
| 29 |
+
"learning_rate": 2.0408163265306121e-07,
|
| 30 |
+
"loss": 1.3873,
|
| 31 |
+
"step": 3
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 0.04081632653061224,
|
| 35 |
+
"grad_norm": 108.06741333007812,
|
| 36 |
+
"learning_rate": 3.0612244897959183e-07,
|
| 37 |
+
"loss": 0.9088,
|
| 38 |
+
"step": 4
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"epoch": 0.05102040816326531,
|
| 42 |
+
"grad_norm": 1.1959134340286255,
|
| 43 |
+
"learning_rate": 4.0816326530612243e-07,
|
| 44 |
+
"loss": 0.0077,
|
| 45 |
+
"step": 5
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"epoch": 0.061224489795918366,
|
| 49 |
+
"grad_norm": 130.83908081054688,
|
| 50 |
+
"learning_rate": 5.102040816326531e-07,
|
| 51 |
+
"loss": 0.6016,
|
| 52 |
+
"step": 6
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"epoch": 0.07142857142857142,
|
| 56 |
+
"grad_norm": 318.3863525390625,
|
| 57 |
+
"learning_rate": 6.122448979591837e-07,
|
| 58 |
+
"loss": 1.6714,
|
| 59 |
+
"step": 7
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"epoch": 0.08163265306122448,
|
| 63 |
+
"grad_norm": 74.26002502441406,
|
| 64 |
+
"learning_rate": 7.142857142857143e-07,
|
| 65 |
+
"loss": 0.4211,
|
| 66 |
+
"step": 8
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"epoch": 0.09183673469387756,
|
| 70 |
+
"grad_norm": 32.4500846862793,
|
| 71 |
+
"learning_rate": 8.163265306122449e-07,
|
| 72 |
+
"loss": 0.1996,
|
| 73 |
+
"step": 9
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"epoch": 0.10204081632653061,
|
| 77 |
+
"grad_norm": 41.27345275878906,
|
| 78 |
+
"learning_rate": 9.183673469387756e-07,
|
| 79 |
+
"loss": 0.1895,
|
| 80 |
+
"step": 10
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"epoch": 0.11224489795918367,
|
| 84 |
+
"grad_norm": 27.35291862487793,
|
| 85 |
+
"learning_rate": 1.0204081632653063e-06,
|
| 86 |
+
"loss": 0.1358,
|
| 87 |
+
"step": 11
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"epoch": 0.12244897959183673,
|
| 91 |
+
"grad_norm": 103.75244903564453,
|
| 92 |
+
"learning_rate": 1.122448979591837e-06,
|
| 93 |
+
"loss": 0.5552,
|
| 94 |
+
"step": 12
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"epoch": 0.1326530612244898,
|
| 98 |
+
"grad_norm": 155.97923278808594,
|
| 99 |
+
"learning_rate": 1.2244897959183673e-06,
|
| 100 |
+
"loss": 0.5141,
|
| 101 |
+
"step": 13
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"epoch": 0.14285714285714285,
|
| 105 |
+
"grad_norm": 53.757484436035156,
|
| 106 |
+
"learning_rate": 1.3265306122448982e-06,
|
| 107 |
+
"loss": 0.1955,
|
| 108 |
+
"step": 14
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"epoch": 0.15306122448979592,
|
| 112 |
+
"grad_norm": 175.17491149902344,
|
| 113 |
+
"learning_rate": 1.4285714285714286e-06,
|
| 114 |
+
"loss": 1.9114,
|
| 115 |
+
"step": 15
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"epoch": 0.16326530612244897,
|
| 119 |
+
"grad_norm": 49.02252197265625,
|
| 120 |
+
"learning_rate": 1.5306122448979593e-06,
|
| 121 |
+
"loss": 0.2645,
|
| 122 |
+
"step": 16
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"epoch": 0.17346938775510204,
|
| 126 |
+
"grad_norm": 999.3756103515625,
|
| 127 |
+
"learning_rate": 1.6326530612244897e-06,
|
| 128 |
+
"loss": 7.5545,
|
| 129 |
+
"step": 17
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"epoch": 0.1836734693877551,
|
| 133 |
+
"grad_norm": 149.2627410888672,
|
| 134 |
+
"learning_rate": 1.7346938775510206e-06,
|
| 135 |
+
"loss": 0.4297,
|
| 136 |
+
"step": 18
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"epoch": 0.19387755102040816,
|
| 140 |
+
"grad_norm": 204.95181274414062,
|
| 141 |
+
"learning_rate": 1.8367346938775512e-06,
|
| 142 |
+
"loss": 0.678,
|
| 143 |
+
"step": 19
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"epoch": 0.20408163265306123,
|
| 147 |
+
"grad_norm": 103.94851684570312,
|
| 148 |
+
"learning_rate": 1.938775510204082e-06,
|
| 149 |
+
"loss": 0.4634,
|
| 150 |
+
"step": 20
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"epoch": 0.21428571428571427,
|
| 154 |
+
"grad_norm": 536.7100219726562,
|
| 155 |
+
"learning_rate": 2.0408163265306125e-06,
|
| 156 |
+
"loss": 4.2252,
|
| 157 |
+
"step": 21
|
| 158 |
+
},
|
| 159 |
+
{
|
| 160 |
+
"epoch": 0.22448979591836735,
|
| 161 |
+
"grad_norm": 444.44805908203125,
|
| 162 |
+
"learning_rate": 2.1428571428571427e-06,
|
| 163 |
+
"loss": 3.9985,
|
| 164 |
+
"step": 22
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
"epoch": 0.23469387755102042,
|
| 168 |
+
"grad_norm": 170.50369262695312,
|
| 169 |
+
"learning_rate": 2.244897959183674e-06,
|
| 170 |
+
"loss": 1.9242,
|
| 171 |
+
"step": 23
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"epoch": 0.24489795918367346,
|
| 175 |
+
"grad_norm": 626.5487060546875,
|
| 176 |
+
"learning_rate": 2.3469387755102044e-06,
|
| 177 |
+
"loss": 3.2716,
|
| 178 |
+
"step": 24
|
| 179 |
+
},
|
| 180 |
+
{
|
| 181 |
+
"epoch": 0.25510204081632654,
|
| 182 |
+
"grad_norm": 51.353050231933594,
|
| 183 |
+
"learning_rate": 2.4489795918367347e-06,
|
| 184 |
+
"loss": 0.123,
|
| 185 |
+
"step": 25
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"epoch": 0.2653061224489796,
|
| 189 |
+
"grad_norm": 108.25341796875,
|
| 190 |
+
"learning_rate": 2.5510204081632657e-06,
|
| 191 |
+
"loss": 1.0011,
|
| 192 |
+
"step": 26
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"epoch": 0.2755102040816326,
|
| 196 |
+
"grad_norm": 322.83502197265625,
|
| 197 |
+
"learning_rate": 2.6530612244897964e-06,
|
| 198 |
+
"loss": 3.5846,
|
| 199 |
+
"step": 27
|
| 200 |
+
},
|
| 201 |
+
{
|
| 202 |
+
"epoch": 0.2857142857142857,
|
| 203 |
+
"grad_norm": 203.38458251953125,
|
| 204 |
+
"learning_rate": 2.7551020408163266e-06,
|
| 205 |
+
"loss": 1.1365,
|
| 206 |
+
"step": 28
|
| 207 |
+
},
|
| 208 |
+
{
|
| 209 |
+
"epoch": 0.29591836734693877,
|
| 210 |
+
"grad_norm": 127.78427124023438,
|
| 211 |
+
"learning_rate": 2.8571428571428573e-06,
|
| 212 |
+
"loss": 0.7149,
|
| 213 |
+
"step": 29
|
| 214 |
+
},
|
| 215 |
+
{
|
| 216 |
+
"epoch": 0.30612244897959184,
|
| 217 |
+
"grad_norm": 283.67645263671875,
|
| 218 |
+
"learning_rate": 2.959183673469388e-06,
|
| 219 |
+
"loss": 1.2629,
|
| 220 |
+
"step": 30
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"epoch": 0.3163265306122449,
|
| 224 |
+
"grad_norm": 82.65542602539062,
|
| 225 |
+
"learning_rate": 3.0612244897959185e-06,
|
| 226 |
+
"loss": 0.6459,
|
| 227 |
+
"step": 31
|
| 228 |
+
},
|
| 229 |
+
{
|
| 230 |
+
"epoch": 0.32653061224489793,
|
| 231 |
+
"grad_norm": 42.66185760498047,
|
| 232 |
+
"learning_rate": 3.1632653061224496e-06,
|
| 233 |
+
"loss": 0.1934,
|
| 234 |
+
"step": 32
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"epoch": 0.336734693877551,
|
| 238 |
+
"grad_norm": 212.1294708251953,
|
| 239 |
+
"learning_rate": 3.2653061224489794e-06,
|
| 240 |
+
"loss": 1.4897,
|
| 241 |
+
"step": 33
|
| 242 |
+
},
|
| 243 |
+
{
|
| 244 |
+
"epoch": 0.3469387755102041,
|
| 245 |
+
"grad_norm": 188.0417022705078,
|
| 246 |
+
"learning_rate": 3.3673469387755105e-06,
|
| 247 |
+
"loss": 0.8561,
|
| 248 |
+
"step": 34
|
| 249 |
+
},
|
| 250 |
+
{
|
| 251 |
+
"epoch": 0.35714285714285715,
|
| 252 |
+
"grad_norm": 2.0467610359191895,
|
| 253 |
+
"learning_rate": 3.469387755102041e-06,
|
| 254 |
+
"loss": 0.0128,
|
| 255 |
+
"step": 35
|
| 256 |
+
},
|
| 257 |
+
{
|
| 258 |
+
"epoch": 0.3673469387755102,
|
| 259 |
+
"grad_norm": 283.3966979980469,
|
| 260 |
+
"learning_rate": 3.5714285714285718e-06,
|
| 261 |
+
"loss": 1.4952,
|
| 262 |
+
"step": 36
|
| 263 |
+
},
|
| 264 |
+
{
|
| 265 |
+
"epoch": 0.37755102040816324,
|
| 266 |
+
"grad_norm": 60.74869155883789,
|
| 267 |
+
"learning_rate": 3.6734693877551024e-06,
|
| 268 |
+
"loss": 0.3181,
|
| 269 |
+
"step": 37
|
| 270 |
+
},
|
| 271 |
+
{
|
| 272 |
+
"epoch": 0.3877551020408163,
|
| 273 |
+
"grad_norm": 824.6165771484375,
|
| 274 |
+
"learning_rate": 3.7755102040816327e-06,
|
| 275 |
+
"loss": 6.3681,
|
| 276 |
+
"step": 38
|
| 277 |
+
},
|
| 278 |
+
{
|
| 279 |
+
"epoch": 0.3979591836734694,
|
| 280 |
+
"grad_norm": 231.1636962890625,
|
| 281 |
+
"learning_rate": 3.877551020408164e-06,
|
| 282 |
+
"loss": 1.4487,
|
| 283 |
+
"step": 39
|
| 284 |
+
},
|
| 285 |
+
{
|
| 286 |
+
"epoch": 0.40816326530612246,
|
| 287 |
+
"grad_norm": 26.46611785888672,
|
| 288 |
+
"learning_rate": 3.979591836734694e-06,
|
| 289 |
+
"loss": 0.1702,
|
| 290 |
+
"step": 40
|
| 291 |
+
},
|
| 292 |
+
{
|
| 293 |
+
"epoch": 0.41836734693877553,
|
| 294 |
+
"grad_norm": 75.88525390625,
|
| 295 |
+
"learning_rate": 4.081632653061225e-06,
|
| 296 |
+
"loss": 0.2513,
|
| 297 |
+
"step": 41
|
| 298 |
+
},
|
| 299 |
+
{
|
| 300 |
+
"epoch": 0.42857142857142855,
|
| 301 |
+
"grad_norm": 465.83392333984375,
|
| 302 |
+
"learning_rate": 4.183673469387755e-06,
|
| 303 |
+
"loss": 4.1595,
|
| 304 |
+
"step": 42
|
| 305 |
+
},
|
| 306 |
+
{
|
| 307 |
+
"epoch": 0.4387755102040816,
|
| 308 |
+
"grad_norm": 306.2772521972656,
|
| 309 |
+
"learning_rate": 4.2857142857142855e-06,
|
| 310 |
+
"loss": 2.7347,
|
| 311 |
+
"step": 43
|
| 312 |
+
},
|
| 313 |
+
{
|
| 314 |
+
"epoch": 0.4489795918367347,
|
| 315 |
+
"grad_norm": 488.9759521484375,
|
| 316 |
+
"learning_rate": 4.3877551020408165e-06,
|
| 317 |
+
"loss": 2.3182,
|
| 318 |
+
"step": 44
|
| 319 |
+
},
|
| 320 |
+
{
|
| 321 |
+
"epoch": 0.45918367346938777,
|
| 322 |
+
"grad_norm": 355.1698913574219,
|
| 323 |
+
"learning_rate": 4.489795918367348e-06,
|
| 324 |
+
"loss": 1.3285,
|
| 325 |
+
"step": 45
|
| 326 |
+
},
|
| 327 |
+
{
|
| 328 |
+
"epoch": 0.46938775510204084,
|
| 329 |
+
"grad_norm": 263.558349609375,
|
| 330 |
+
"learning_rate": 4.591836734693878e-06,
|
| 331 |
+
"loss": 2.1155,
|
| 332 |
+
"step": 46
|
| 333 |
+
},
|
| 334 |
+
{
|
| 335 |
+
"epoch": 0.47959183673469385,
|
| 336 |
+
"grad_norm": 9.667963981628418,
|
| 337 |
+
"learning_rate": 4.693877551020409e-06,
|
| 338 |
+
"loss": 0.0645,
|
| 339 |
+
"step": 47
|
| 340 |
+
},
|
| 341 |
+
{
|
| 342 |
+
"epoch": 0.4897959183673469,
|
| 343 |
+
"grad_norm": 957.79345703125,
|
| 344 |
+
"learning_rate": 4.795918367346939e-06,
|
| 345 |
+
"loss": 7.1283,
|
| 346 |
+
"step": 48
|
| 347 |
+
},
|
| 348 |
+
{
|
| 349 |
+
"epoch": 0.5,
|
| 350 |
+
"grad_norm": 160.0965118408203,
|
| 351 |
+
"learning_rate": 4.897959183673469e-06,
|
| 352 |
+
"loss": 0.711,
|
| 353 |
+
"step": 49
|
| 354 |
+
},
|
| 355 |
+
{
|
| 356 |
+
"epoch": 0.5102040816326531,
|
| 357 |
+
"grad_norm": 93.697265625,
|
| 358 |
+
"learning_rate": 5e-06,
|
| 359 |
+
"loss": 0.4716,
|
| 360 |
+
"step": 50
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"epoch": 0.5204081632653061,
|
| 364 |
+
"grad_norm": 292.9518737792969,
|
| 365 |
+
"learning_rate": 5.1020408163265315e-06,
|
| 366 |
+
"loss": 2.2895,
|
| 367 |
+
"step": 51
|
| 368 |
+
},
|
| 369 |
+
{
|
| 370 |
+
"epoch": 0.5306122448979592,
|
| 371 |
+
"grad_norm": 335.4564514160156,
|
| 372 |
+
"learning_rate": 5.204081632653062e-06,
|
| 373 |
+
"loss": 1.9235,
|
| 374 |
+
"step": 52
|
| 375 |
+
},
|
| 376 |
+
{
|
| 377 |
+
"epoch": 0.5408163265306123,
|
| 378 |
+
"grad_norm": 138.63575744628906,
|
| 379 |
+
"learning_rate": 5.306122448979593e-06,
|
| 380 |
+
"loss": 0.8777,
|
| 381 |
+
"step": 53
|
| 382 |
+
},
|
| 383 |
+
{
|
| 384 |
+
"epoch": 0.5510204081632653,
|
| 385 |
+
"grad_norm": 1.011594533920288,
|
| 386 |
+
"learning_rate": 5.408163265306123e-06,
|
| 387 |
+
"loss": 0.0038,
|
| 388 |
+
"step": 54
|
| 389 |
+
},
|
| 390 |
+
{
|
| 391 |
+
"epoch": 0.5612244897959183,
|
| 392 |
+
"grad_norm": 506.25152587890625,
|
| 393 |
+
"learning_rate": 5.510204081632653e-06,
|
| 394 |
+
"loss": 1.5598,
|
| 395 |
+
"step": 55
|
| 396 |
+
},
|
| 397 |
+
{
|
| 398 |
+
"epoch": 0.5714285714285714,
|
| 399 |
+
"grad_norm": 2.2550530433654785,
|
| 400 |
+
"learning_rate": 5.6122448979591834e-06,
|
| 401 |
+
"loss": 0.0177,
|
| 402 |
+
"step": 56
|
| 403 |
+
},
|
| 404 |
+
{
|
| 405 |
+
"epoch": 0.5816326530612245,
|
| 406 |
+
"grad_norm": 13.93323802947998,
|
| 407 |
+
"learning_rate": 5.7142857142857145e-06,
|
| 408 |
+
"loss": 0.0837,
|
| 409 |
+
"step": 57
|
| 410 |
+
},
|
| 411 |
+
{
|
| 412 |
+
"epoch": 0.5918367346938775,
|
| 413 |
+
"grad_norm": 7.279649257659912,
|
| 414 |
+
"learning_rate": 5.816326530612246e-06,
|
| 415 |
+
"loss": 0.0429,
|
| 416 |
+
"step": 58
|
| 417 |
+
},
|
| 418 |
+
{
|
| 419 |
+
"epoch": 0.6020408163265306,
|
| 420 |
+
"grad_norm": 0.9923371076583862,
|
| 421 |
+
"learning_rate": 5.918367346938776e-06,
|
| 422 |
+
"loss": 0.0071,
|
| 423 |
+
"step": 59
|
| 424 |
+
},
|
| 425 |
+
{
|
| 426 |
+
"epoch": 0.6122448979591837,
|
| 427 |
+
"grad_norm": 743.8301391601562,
|
| 428 |
+
"learning_rate": 6.020408163265307e-06,
|
| 429 |
+
"loss": 2.7217,
|
| 430 |
+
"step": 60
|
| 431 |
+
},
|
| 432 |
+
{
|
| 433 |
+
"epoch": 0.6224489795918368,
|
| 434 |
+
"grad_norm": 227.04403686523438,
|
| 435 |
+
"learning_rate": 6.122448979591837e-06,
|
| 436 |
+
"loss": 3.9013,
|
| 437 |
+
"step": 61
|
| 438 |
+
},
|
| 439 |
+
{
|
| 440 |
+
"epoch": 0.6326530612244898,
|
| 441 |
+
"grad_norm": 193.12701416015625,
|
| 442 |
+
"learning_rate": 6.224489795918368e-06,
|
| 443 |
+
"loss": 1.417,
|
| 444 |
+
"step": 62
|
| 445 |
+
},
|
| 446 |
+
{
|
| 447 |
+
"epoch": 0.6428571428571429,
|
| 448 |
+
"grad_norm": 642.7814331054688,
|
| 449 |
+
"learning_rate": 6.326530612244899e-06,
|
| 450 |
+
"loss": 3.5854,
|
| 451 |
+
"step": 63
|
| 452 |
+
},
|
| 453 |
+
{
|
| 454 |
+
"epoch": 0.6530612244897959,
|
| 455 |
+
"grad_norm": 1007.544189453125,
|
| 456 |
+
"learning_rate": 6.4285714285714295e-06,
|
| 457 |
+
"loss": 12.918,
|
| 458 |
+
"step": 64
|
| 459 |
+
},
|
| 460 |
+
{
|
| 461 |
+
"epoch": 0.6632653061224489,
|
| 462 |
+
"grad_norm": 1310.942138671875,
|
| 463 |
+
"learning_rate": 6.530612244897959e-06,
|
| 464 |
+
"loss": 7.1566,
|
| 465 |
+
"step": 65
|
| 466 |
+
},
|
| 467 |
+
{
|
| 468 |
+
"epoch": 0.673469387755102,
|
| 469 |
+
"grad_norm": 810.1301879882812,
|
| 470 |
+
"learning_rate": 6.63265306122449e-06,
|
| 471 |
+
"loss": 3.9897,
|
| 472 |
+
"step": 66
|
| 473 |
+
},
|
| 474 |
+
{
|
| 475 |
+
"epoch": 0.6836734693877551,
|
| 476 |
+
"grad_norm": 513.1759643554688,
|
| 477 |
+
"learning_rate": 6.734693877551021e-06,
|
| 478 |
+
"loss": 8.1139,
|
| 479 |
+
"step": 67
|
| 480 |
+
},
|
| 481 |
+
{
|
| 482 |
+
"epoch": 0.6938775510204082,
|
| 483 |
+
"grad_norm": 1414.8878173828125,
|
| 484 |
+
"learning_rate": 6.836734693877551e-06,
|
| 485 |
+
"loss": 5.7005,
|
| 486 |
+
"step": 68
|
| 487 |
+
},
|
| 488 |
+
{
|
| 489 |
+
"epoch": 0.7040816326530612,
|
| 490 |
+
"grad_norm": 31.607126235961914,
|
| 491 |
+
"learning_rate": 6.938775510204082e-06,
|
| 492 |
+
"loss": 0.1219,
|
| 493 |
+
"step": 69
|
| 494 |
+
},
|
| 495 |
+
{
|
| 496 |
+
"epoch": 0.7142857142857143,
|
| 497 |
+
"grad_norm": 799.9751586914062,
|
| 498 |
+
"learning_rate": 7.0408163265306125e-06,
|
| 499 |
+
"loss": 5.7849,
|
| 500 |
+
"step": 70
|
| 501 |
+
},
|
| 502 |
+
{
|
| 503 |
+
"epoch": 0.7244897959183674,
|
| 504 |
+
"grad_norm": 132.71778869628906,
|
| 505 |
+
"learning_rate": 7.1428571428571436e-06,
|
| 506 |
+
"loss": 1.0726,
|
| 507 |
+
"step": 71
|
| 508 |
+
},
|
| 509 |
+
{
|
| 510 |
+
"epoch": 0.7346938775510204,
|
| 511 |
+
"grad_norm": 256.61041259765625,
|
| 512 |
+
"learning_rate": 7.244897959183675e-06,
|
| 513 |
+
"loss": 1.2599,
|
| 514 |
+
"step": 72
|
| 515 |
+
},
|
| 516 |
+
{
|
| 517 |
+
"epoch": 0.7448979591836735,
|
| 518 |
+
"grad_norm": 192.0435333251953,
|
| 519 |
+
"learning_rate": 7.346938775510205e-06,
|
| 520 |
+
"loss": 0.6473,
|
| 521 |
+
"step": 73
|
| 522 |
+
},
|
| 523 |
+
{
|
| 524 |
+
"epoch": 0.7551020408163265,
|
| 525 |
+
"grad_norm": 293.7915954589844,
|
| 526 |
+
"learning_rate": 7.448979591836736e-06,
|
| 527 |
+
"loss": 1.0397,
|
| 528 |
+
"step": 74
|
| 529 |
+
},
|
| 530 |
+
{
|
| 531 |
+
"epoch": 0.7653061224489796,
|
| 532 |
+
"grad_norm": 312.2645263671875,
|
| 533 |
+
"learning_rate": 7.551020408163265e-06,
|
| 534 |
+
"loss": 1.5555,
|
| 535 |
+
"step": 75
|
| 536 |
+
},
|
| 537 |
+
{
|
| 538 |
+
"epoch": 0.7755102040816326,
|
| 539 |
+
"grad_norm": 1.417815923690796,
|
| 540 |
+
"learning_rate": 7.653061224489796e-06,
|
| 541 |
+
"loss": 0.0078,
|
| 542 |
+
"step": 76
|
| 543 |
+
},
|
| 544 |
+
{
|
| 545 |
+
"epoch": 0.7857142857142857,
|
| 546 |
+
"grad_norm": 1.4391653537750244,
|
| 547 |
+
"learning_rate": 7.755102040816327e-06,
|
| 548 |
+
"loss": 0.0048,
|
| 549 |
+
"step": 77
|
| 550 |
+
},
|
| 551 |
+
{
|
| 552 |
+
"epoch": 0.7959183673469388,
|
| 553 |
+
"grad_norm": 5.628185749053955,
|
| 554 |
+
"learning_rate": 7.857142857142858e-06,
|
| 555 |
+
"loss": 0.0323,
|
| 556 |
+
"step": 78
|
| 557 |
+
},
|
| 558 |
+
{
|
| 559 |
+
"epoch": 0.8061224489795918,
|
| 560 |
+
"grad_norm": 264.5353698730469,
|
| 561 |
+
"learning_rate": 7.959183673469388e-06,
|
| 562 |
+
"loss": 1.7425,
|
| 563 |
+
"step": 79
|
| 564 |
+
},
|
| 565 |
+
{
|
| 566 |
+
"epoch": 0.8163265306122449,
|
| 567 |
+
"grad_norm": 1.5278851985931396,
|
| 568 |
+
"learning_rate": 8.06122448979592e-06,
|
| 569 |
+
"loss": 0.0035,
|
| 570 |
+
"step": 80
|
| 571 |
+
},
|
| 572 |
+
{
|
| 573 |
+
"epoch": 0.826530612244898,
|
| 574 |
+
"grad_norm": 932.3336181640625,
|
| 575 |
+
"learning_rate": 8.16326530612245e-06,
|
| 576 |
+
"loss": 6.4849,
|
| 577 |
+
"step": 81
|
| 578 |
+
},
|
| 579 |
+
{
|
| 580 |
+
"epoch": 0.8367346938775511,
|
| 581 |
+
"grad_norm": 635.4749145507812,
|
| 582 |
+
"learning_rate": 8.26530612244898e-06,
|
| 583 |
+
"loss": 4.3767,
|
| 584 |
+
"step": 82
|
| 585 |
+
},
|
| 586 |
+
{
|
| 587 |
+
"epoch": 0.8469387755102041,
|
| 588 |
+
"grad_norm": 8.875201225280762,
|
| 589 |
+
"learning_rate": 8.36734693877551e-06,
|
| 590 |
+
"loss": 0.0186,
|
| 591 |
+
"step": 83
|
| 592 |
+
},
|
| 593 |
+
{
|
| 594 |
+
"epoch": 0.8571428571428571,
|
| 595 |
+
"grad_norm": 0.15500876307487488,
|
| 596 |
+
"learning_rate": 8.469387755102042e-06,
|
| 597 |
+
"loss": 0.0008,
|
| 598 |
+
"step": 84
|
| 599 |
+
},
|
| 600 |
+
{
|
| 601 |
+
"epoch": 0.8673469387755102,
|
| 602 |
+
"grad_norm": 269.5357666015625,
|
| 603 |
+
"learning_rate": 8.571428571428571e-06,
|
| 604 |
+
"loss": 0.8354,
|
| 605 |
+
"step": 85
|
| 606 |
+
},
|
| 607 |
+
{
|
| 608 |
+
"epoch": 0.8775510204081632,
|
| 609 |
+
"grad_norm": 5.054287910461426,
|
| 610 |
+
"learning_rate": 8.673469387755103e-06,
|
| 611 |
+
"loss": 0.0162,
|
| 612 |
+
"step": 86
|
| 613 |
+
},
|
| 614 |
+
{
|
| 615 |
+
"epoch": 0.8877551020408163,
|
| 616 |
+
"grad_norm": 84.90735626220703,
|
| 617 |
+
"learning_rate": 8.775510204081633e-06,
|
| 618 |
+
"loss": 0.1282,
|
| 619 |
+
"step": 87
|
| 620 |
+
},
|
| 621 |
+
{
|
| 622 |
+
"epoch": 0.8979591836734694,
|
| 623 |
+
"grad_norm": 81.53719329833984,
|
| 624 |
+
"learning_rate": 8.877551020408163e-06,
|
| 625 |
+
"loss": 0.4514,
|
| 626 |
+
"step": 88
|
| 627 |
+
},
|
| 628 |
+
{
|
| 629 |
+
"epoch": 0.9081632653061225,
|
| 630 |
+
"grad_norm": 547.4005126953125,
|
| 631 |
+
"learning_rate": 8.979591836734695e-06,
|
| 632 |
+
"loss": 4.9103,
|
| 633 |
+
"step": 89
|
| 634 |
+
},
|
| 635 |
+
{
|
| 636 |
+
"epoch": 0.9183673469387755,
|
| 637 |
+
"grad_norm": 25.792213439941406,
|
| 638 |
+
"learning_rate": 9.081632653061225e-06,
|
| 639 |
+
"loss": 0.0762,
|
| 640 |
+
"step": 90
|
| 641 |
+
},
|
| 642 |
+
{
|
| 643 |
+
"epoch": 0.9285714285714286,
|
| 644 |
+
"grad_norm": 10.455421447753906,
|
| 645 |
+
"learning_rate": 9.183673469387756e-06,
|
| 646 |
+
"loss": 0.0444,
|
| 647 |
+
"step": 91
|
| 648 |
+
},
|
| 649 |
+
{
|
| 650 |
+
"epoch": 0.9387755102040817,
|
| 651 |
+
"grad_norm": 472.54376220703125,
|
| 652 |
+
"learning_rate": 9.285714285714288e-06,
|
| 653 |
+
"loss": 1.8609,
|
| 654 |
+
"step": 92
|
| 655 |
+
},
|
| 656 |
+
{
|
| 657 |
+
"epoch": 0.9489795918367347,
|
| 658 |
+
"grad_norm": 31.092357635498047,
|
| 659 |
+
"learning_rate": 9.387755102040818e-06,
|
| 660 |
+
"loss": 0.1489,
|
| 661 |
+
"step": 93
|
| 662 |
+
},
|
| 663 |
+
{
|
| 664 |
+
"epoch": 0.9591836734693877,
|
| 665 |
+
"grad_norm": 231.94151306152344,
|
| 666 |
+
"learning_rate": 9.489795918367348e-06,
|
| 667 |
+
"loss": 0.5926,
|
| 668 |
+
"step": 94
|
| 669 |
+
},
|
| 670 |
+
{
|
| 671 |
+
"epoch": 0.9693877551020408,
|
| 672 |
+
"grad_norm": 211.05117797851562,
|
| 673 |
+
"learning_rate": 9.591836734693878e-06,
|
| 674 |
+
"loss": 0.5344,
|
| 675 |
+
"step": 95
|
| 676 |
+
},
|
| 677 |
+
{
|
| 678 |
+
"epoch": 0.9795918367346939,
|
| 679 |
+
"grad_norm": 217.01339721679688,
|
| 680 |
+
"learning_rate": 9.693877551020408e-06,
|
| 681 |
+
"loss": 0.4693,
|
| 682 |
+
"step": 96
|
| 683 |
+
},
|
| 684 |
+
{
|
| 685 |
+
"epoch": 0.9897959183673469,
|
| 686 |
+
"grad_norm": 1123.96484375,
|
| 687 |
+
"learning_rate": 9.795918367346939e-06,
|
| 688 |
+
"loss": 9.2282,
|
| 689 |
+
"step": 97
|
| 690 |
+
},
|
| 691 |
+
{
|
| 692 |
+
"epoch": 1.0,
|
| 693 |
+
"grad_norm": 741.597412109375,
|
| 694 |
+
"learning_rate": 9.89795918367347e-06,
|
| 695 |
+
"loss": 4.6238,
|
| 696 |
+
"step": 98
|
| 697 |
+
},
|
| 698 |
+
{
|
| 699 |
+
"epoch": 1.0,
|
| 700 |
+
"eval_dim_1024_cosine_accuracy@1": 0.36235595390524966,
|
| 701 |
+
"eval_dim_1024_cosine_accuracy@10": 0.4334186939820743,
|
| 702 |
+
"eval_dim_1024_cosine_accuracy@3": 0.3681177976952625,
|
| 703 |
+
"eval_dim_1024_cosine_accuracy@5": 0.39308578745198464,
|
| 704 |
+
"eval_dim_1024_cosine_map@100": 0.45394800707643057,
|
| 705 |
+
"eval_dim_1024_cosine_mrr@10": 0.37430415828303115,
|
| 706 |
+
"eval_dim_1024_cosine_ndcg@10": 0.3858809020056271,
|
| 707 |
+
"eval_dim_1024_cosine_precision@1": 0.36235595390524966,
|
| 708 |
+
"eval_dim_1024_cosine_precision@10": 0.3176696542893726,
|
| 709 |
+
"eval_dim_1024_cosine_precision@3": 0.36192915066154496,
|
| 710 |
+
"eval_dim_1024_cosine_precision@5": 0.35172855313700385,
|
| 711 |
+
"eval_dim_1024_cosine_recall@1": 0.04346309464734114,
|
| 712 |
+
"eval_dim_1024_cosine_recall@10": 0.28096984500258326,
|
| 713 |
+
"eval_dim_1024_cosine_recall@3": 0.12757812796185336,
|
| 714 |
+
"eval_dim_1024_cosine_recall@5": 0.19200836801442767,
|
| 715 |
+
"eval_dim_128_cosine_accuracy@1": 0.3085787451984635,
|
| 716 |
+
"eval_dim_128_cosine_accuracy@10": 0.37964148527528807,
|
| 717 |
+
"eval_dim_128_cosine_accuracy@3": 0.31241997439180536,
|
| 718 |
+
"eval_dim_128_cosine_accuracy@5": 0.3361075544174136,
|
| 719 |
+
"eval_dim_128_cosine_map@100": 0.3963095303049961,
|
| 720 |
+
"eval_dim_128_cosine_mrr@10": 0.3199812511432227,
|
| 721 |
+
"eval_dim_128_cosine_ndcg@10": 0.3312285498294292,
|
| 722 |
+
"eval_dim_128_cosine_precision@1": 0.3085787451984635,
|
| 723 |
+
"eval_dim_128_cosine_precision@10": 0.2752880921895006,
|
| 724 |
+
"eval_dim_128_cosine_precision@3": 0.3079385403329065,
|
| 725 |
+
"eval_dim_128_cosine_precision@5": 0.29961587708066584,
|
| 726 |
+
"eval_dim_128_cosine_recall@1": 0.036297623853982414,
|
| 727 |
+
"eval_dim_128_cosine_recall@10": 0.24000960695821508,
|
| 728 |
+
"eval_dim_128_cosine_recall@3": 0.10638786483158841,
|
| 729 |
+
"eval_dim_128_cosine_recall@5": 0.16032639984514846,
|
| 730 |
+
"eval_dim_256_cosine_accuracy@1": 0.3437900128040973,
|
| 731 |
+
"eval_dim_256_cosine_accuracy@10": 0.41101152368758004,
|
| 732 |
+
"eval_dim_256_cosine_accuracy@3": 0.34763124199743917,
|
| 733 |
+
"eval_dim_256_cosine_accuracy@5": 0.3764404609475032,
|
| 734 |
+
"eval_dim_256_cosine_map@100": 0.4298669852983799,
|
| 735 |
+
"eval_dim_256_cosine_mrr@10": 0.3551361197487955,
|
| 736 |
+
"eval_dim_256_cosine_ndcg@10": 0.3670052960875804,
|
| 737 |
+
"eval_dim_256_cosine_precision@1": 0.3437900128040973,
|
| 738 |
+
"eval_dim_256_cosine_precision@10": 0.3040973111395647,
|
| 739 |
+
"eval_dim_256_cosine_precision@3": 0.342936406316688,
|
| 740 |
+
"eval_dim_256_cosine_precision@5": 0.33457106274007686,
|
| 741 |
+
"eval_dim_256_cosine_recall@1": 0.04013102608834382,
|
| 742 |
+
"eval_dim_256_cosine_recall@10": 0.2648598688529433,
|
| 743 |
+
"eval_dim_256_cosine_recall@3": 0.11771735023719074,
|
| 744 |
+
"eval_dim_256_cosine_recall@5": 0.17837935755014916,
|
| 745 |
+
"eval_dim_512_cosine_accuracy@1": 0.35979513444302175,
|
| 746 |
+
"eval_dim_512_cosine_accuracy@10": 0.4334186939820743,
|
| 747 |
+
"eval_dim_512_cosine_accuracy@3": 0.36555697823303457,
|
| 748 |
+
"eval_dim_512_cosine_accuracy@5": 0.3911651728553137,
|
| 749 |
+
"eval_dim_512_cosine_map@100": 0.4476805587612892,
|
| 750 |
+
"eval_dim_512_cosine_mrr@10": 0.37212542934373866,
|
| 751 |
+
"eval_dim_512_cosine_ndcg@10": 0.3843750966464458,
|
| 752 |
+
"eval_dim_512_cosine_precision@1": 0.35979513444302175,
|
| 753 |
+
"eval_dim_512_cosine_precision@10": 0.3173495518565941,
|
| 754 |
+
"eval_dim_512_cosine_precision@3": 0.35936833119931705,
|
| 755 |
+
"eval_dim_512_cosine_precision@5": 0.34967989756722156,
|
| 756 |
+
"eval_dim_512_cosine_recall@1": 0.04265405128130224,
|
| 757 |
+
"eval_dim_512_cosine_recall@10": 0.2781876565001863,
|
| 758 |
+
"eval_dim_512_cosine_recall@3": 0.12523102347193127,
|
| 759 |
+
"eval_dim_512_cosine_recall@5": 0.18912519336740205,
|
| 760 |
+
"eval_dim_64_cosine_accuracy@1": 0.2740076824583867,
|
| 761 |
+
"eval_dim_64_cosine_accuracy@10": 0.3354673495518566,
|
| 762 |
+
"eval_dim_64_cosine_accuracy@3": 0.27848911651728553,
|
| 763 |
+
"eval_dim_64_cosine_accuracy@5": 0.30153649167733676,
|
| 764 |
+
"eval_dim_64_cosine_map@100": 0.3539045084602349,
|
| 765 |
+
"eval_dim_64_cosine_mrr@10": 0.28429414873076814,
|
| 766 |
+
"eval_dim_64_cosine_ndcg@10": 0.29402896525927075,
|
| 767 |
+
"eval_dim_64_cosine_precision@1": 0.2740076824583867,
|
| 768 |
+
"eval_dim_64_cosine_precision@10": 0.24571062740076827,
|
| 769 |
+
"eval_dim_64_cosine_precision@3": 0.27315407597097735,
|
| 770 |
+
"eval_dim_64_cosine_precision@5": 0.2670934699103713,
|
| 771 |
+
"eval_dim_64_cosine_recall@1": 0.03167890172057568,
|
| 772 |
+
"eval_dim_64_cosine_recall@10": 0.21092883720941633,
|
| 773 |
+
"eval_dim_64_cosine_recall@3": 0.09267023360511464,
|
| 774 |
+
"eval_dim_64_cosine_recall@5": 0.14048625468314752,
|
| 775 |
+
"eval_dim_768_cosine_accuracy@1": 0.3591549295774648,
|
| 776 |
+
"eval_dim_768_cosine_accuracy@10": 0.4334186939820743,
|
| 777 |
+
"eval_dim_768_cosine_accuracy@3": 0.3649167733674776,
|
| 778 |
+
"eval_dim_768_cosine_accuracy@5": 0.3892445582586428,
|
| 779 |
+
"eval_dim_768_cosine_map@100": 0.4493001842217619,
|
| 780 |
+
"eval_dim_768_cosine_mrr@10": 0.37149335406377615,
|
| 781 |
+
"eval_dim_768_cosine_ndcg@10": 0.38308181752122755,
|
| 782 |
+
"eval_dim_768_cosine_precision@1": 0.3591549295774648,
|
| 783 |
+
"eval_dim_768_cosine_precision@10": 0.31670934699103714,
|
| 784 |
+
"eval_dim_768_cosine_precision@3": 0.3587281263337601,
|
| 785 |
+
"eval_dim_768_cosine_precision@5": 0.34852752880921894,
|
| 786 |
+
"eval_dim_768_cosine_recall@1": 0.04250079684114586,
|
| 787 |
+
"eval_dim_768_cosine_recall@10": 0.27695909667507057,
|
| 788 |
+
"eval_dim_768_cosine_recall@3": 0.12462187901616553,
|
| 789 |
+
"eval_dim_768_cosine_recall@5": 0.1875478484365334,
|
| 790 |
+
"eval_runtime": 99.0843,
|
| 791 |
+
"eval_samples_per_second": 0.0,
|
| 792 |
+
"eval_sequential_score": 0.29402896525927075,
|
| 793 |
+
"eval_steps_per_second": 0.0,
|
| 794 |
+
"step": 98
|
| 795 |
+
},
|
| 796 |
+
{
|
| 797 |
+
"epoch": 1.010204081632653,
|
| 798 |
+
"grad_norm": 342.861328125,
|
| 799 |
+
"learning_rate": 1e-05,
|
| 800 |
+
"loss": 1.9644,
|
| 801 |
+
"step": 99
|
| 802 |
+
},
|
| 803 |
+
{
|
| 804 |
+
"epoch": 1.0204081632653061,
|
| 805 |
+
"grad_norm": 761.8235473632812,
|
| 806 |
+
"learning_rate": 1.0102040816326531e-05,
|
| 807 |
+
"loss": 7.4242,
|
| 808 |
+
"step": 100
|
| 809 |
+
},
|
| 810 |
+
{
|
| 811 |
+
"epoch": 1.030612244897959,
|
| 812 |
+
"grad_norm": 146.39175415039062,
|
| 813 |
+
"learning_rate": 1.0204081632653063e-05,
|
| 814 |
+
"loss": 0.9592,
|
| 815 |
+
"step": 101
|
| 816 |
+
},
|
| 817 |
+
{
|
| 818 |
+
"epoch": 1.0408163265306123,
|
| 819 |
+
"grad_norm": 69.37447357177734,
|
| 820 |
+
"learning_rate": 1.0306122448979591e-05,
|
| 821 |
+
"loss": 0.3051,
|
| 822 |
+
"step": 102
|
| 823 |
+
},
|
| 824 |
+
{
|
| 825 |
+
"epoch": 1.0510204081632653,
|
| 826 |
+
"grad_norm": 241.93687438964844,
|
| 827 |
+
"learning_rate": 1.0408163265306123e-05,
|
| 828 |
+
"loss": 0.926,
|
| 829 |
+
"step": 103
|
| 830 |
+
},
|
| 831 |
+
{
|
| 832 |
+
"epoch": 1.0612244897959184,
|
| 833 |
+
"grad_norm": 13.75313949584961,
|
| 834 |
+
"learning_rate": 1.0510204081632654e-05,
|
| 835 |
+
"loss": 0.0751,
|
| 836 |
+
"step": 104
|
| 837 |
+
},
|
| 838 |
+
{
|
| 839 |
+
"epoch": 1.0714285714285714,
|
| 840 |
+
"grad_norm": 1.861573576927185,
|
| 841 |
+
"learning_rate": 1.0612244897959186e-05,
|
| 842 |
+
"loss": 0.0111,
|
| 843 |
+
"step": 105
|
| 844 |
+
},
|
| 845 |
+
{
|
| 846 |
+
"epoch": 1.0816326530612246,
|
| 847 |
+
"grad_norm": 1.4446377754211426,
|
| 848 |
+
"learning_rate": 1.0714285714285714e-05,
|
| 849 |
+
"loss": 0.0072,
|
| 850 |
+
"step": 106
|
| 851 |
+
},
|
| 852 |
+
{
|
| 853 |
+
"epoch": 1.0918367346938775,
|
| 854 |
+
"grad_norm": 2.217988967895508,
|
| 855 |
+
"learning_rate": 1.0816326530612246e-05,
|
| 856 |
+
"loss": 0.0107,
|
| 857 |
+
"step": 107
|
| 858 |
+
},
|
| 859 |
+
{
|
| 860 |
+
"epoch": 1.1020408163265305,
|
| 861 |
+
"grad_norm": 620.331787109375,
|
| 862 |
+
"learning_rate": 1.0918367346938776e-05,
|
| 863 |
+
"loss": 3.4505,
|
| 864 |
+
"step": 108
|
| 865 |
+
},
|
| 866 |
+
{
|
| 867 |
+
"epoch": 1.1122448979591837,
|
| 868 |
+
"grad_norm": 1.4038218259811401,
|
| 869 |
+
"learning_rate": 1.1020408163265306e-05,
|
| 870 |
+
"loss": 0.005,
|
| 871 |
+
"step": 109
|
| 872 |
+
},
|
| 873 |
+
{
|
| 874 |
+
"epoch": 1.1224489795918366,
|
| 875 |
+
"grad_norm": 46.48203659057617,
|
| 876 |
+
"learning_rate": 1.1122448979591838e-05,
|
| 877 |
+
"loss": 0.1701,
|
| 878 |
+
"step": 110
|
| 879 |
+
},
|
| 880 |
+
{
|
| 881 |
+
"epoch": 1.1326530612244898,
|
| 882 |
+
"grad_norm": 6.003911972045898,
|
| 883 |
+
"learning_rate": 1.1224489795918367e-05,
|
| 884 |
+
"loss": 0.027,
|
| 885 |
+
"step": 111
|
| 886 |
+
},
|
| 887 |
+
{
|
| 888 |
+
"epoch": 1.1428571428571428,
|
| 889 |
+
"grad_norm": 379.09527587890625,
|
| 890 |
+
"learning_rate": 1.1326530612244899e-05,
|
| 891 |
+
"loss": 1.3824,
|
| 892 |
+
"step": 112
|
| 893 |
+
},
|
| 894 |
+
{
|
| 895 |
+
"epoch": 1.153061224489796,
|
| 896 |
+
"grad_norm": 1103.1077880859375,
|
| 897 |
+
"learning_rate": 1.1428571428571429e-05,
|
| 898 |
+
"loss": 8.1459,
|
| 899 |
+
"step": 113
|
| 900 |
+
},
|
| 901 |
+
{
|
| 902 |
+
"epoch": 1.163265306122449,
|
| 903 |
+
"grad_norm": 29.499439239501953,
|
| 904 |
+
"learning_rate": 1.1530612244897961e-05,
|
| 905 |
+
"loss": 0.0917,
|
| 906 |
+
"step": 114
|
| 907 |
+
},
|
| 908 |
+
{
|
| 909 |
+
"epoch": 1.1734693877551021,
|
| 910 |
+
"grad_norm": 0.06352390348911285,
|
| 911 |
+
"learning_rate": 1.1632653061224491e-05,
|
| 912 |
+
"loss": 0.0003,
|
| 913 |
+
"step": 115
|
| 914 |
+
},
|
| 915 |
+
{
|
| 916 |
+
"epoch": 1.183673469387755,
|
| 917 |
+
"grad_norm": 111.55418395996094,
|
| 918 |
+
"learning_rate": 1.1734693877551021e-05,
|
| 919 |
+
"loss": 0.3716,
|
| 920 |
+
"step": 116
|
| 921 |
+
},
|
| 922 |
+
{
|
| 923 |
+
"epoch": 1.193877551020408,
|
| 924 |
+
"grad_norm": 32.166500091552734,
|
| 925 |
+
"learning_rate": 1.1836734693877552e-05,
|
| 926 |
+
"loss": 0.1704,
|
| 927 |
+
"step": 117
|
| 928 |
+
},
|
| 929 |
+
{
|
| 930 |
+
"epoch": 1.2040816326530612,
|
| 931 |
+
"grad_norm": 870.0745239257812,
|
| 932 |
+
"learning_rate": 1.1938775510204084e-05,
|
| 933 |
+
"loss": 9.8059,
|
| 934 |
+
"step": 118
|
| 935 |
+
},
|
| 936 |
+
{
|
| 937 |
+
"epoch": 1.2142857142857142,
|
| 938 |
+
"grad_norm": 139.17662048339844,
|
| 939 |
+
"learning_rate": 1.2040816326530614e-05,
|
| 940 |
+
"loss": 0.5882,
|
| 941 |
+
"step": 119
|
| 942 |
+
},
|
| 943 |
+
{
|
| 944 |
+
"epoch": 1.2244897959183674,
|
| 945 |
+
"grad_norm": 28.489713668823242,
|
| 946 |
+
"learning_rate": 1.2142857142857142e-05,
|
| 947 |
+
"loss": 0.0531,
|
| 948 |
+
"step": 120
|
| 949 |
+
},
|
| 950 |
+
{
|
| 951 |
+
"epoch": 1.2346938775510203,
|
| 952 |
+
"grad_norm": 0.18062859773635864,
|
| 953 |
+
"learning_rate": 1.2244897959183674e-05,
|
| 954 |
+
"loss": 0.0005,
|
| 955 |
+
"step": 121
|
| 956 |
+
},
|
| 957 |
+
{
|
| 958 |
+
"epoch": 1.2448979591836735,
|
| 959 |
+
"grad_norm": 8.26645565032959,
|
| 960 |
+
"learning_rate": 1.2346938775510204e-05,
|
| 961 |
+
"loss": 0.0314,
|
| 962 |
+
"step": 122
|
| 963 |
+
},
|
| 964 |
+
{
|
| 965 |
+
"epoch": 1.2551020408163265,
|
| 966 |
+
"grad_norm": 64.67955017089844,
|
| 967 |
+
"learning_rate": 1.2448979591836736e-05,
|
| 968 |
+
"loss": 0.1811,
|
| 969 |
+
"step": 123
|
| 970 |
+
},
|
| 971 |
+
{
|
| 972 |
+
"epoch": 1.2653061224489797,
|
| 973 |
+
"grad_norm": 420.44439697265625,
|
| 974 |
+
"learning_rate": 1.2551020408163267e-05,
|
| 975 |
+
"loss": 2.6136,
|
| 976 |
+
"step": 124
|
| 977 |
+
},
|
| 978 |
+
{
|
| 979 |
+
"epoch": 1.2755102040816326,
|
| 980 |
+
"grad_norm": 3.5323660373687744,
|
| 981 |
+
"learning_rate": 1.2653061224489798e-05,
|
| 982 |
+
"loss": 0.0087,
|
| 983 |
+
"step": 125
|
| 984 |
+
},
|
| 985 |
+
{
|
| 986 |
+
"epoch": 1.2857142857142856,
|
| 987 |
+
"grad_norm": 52.854801177978516,
|
| 988 |
+
"learning_rate": 1.2755102040816327e-05,
|
| 989 |
+
"loss": 0.1269,
|
| 990 |
+
"step": 126
|
| 991 |
+
},
|
| 992 |
+
{
|
| 993 |
+
"epoch": 1.2959183673469388,
|
| 994 |
+
"grad_norm": 4.583413124084473,
|
| 995 |
+
"learning_rate": 1.2857142857142859e-05,
|
| 996 |
+
"loss": 0.0091,
|
| 997 |
+
"step": 127
|
| 998 |
+
},
|
| 999 |
+
{
|
| 1000 |
+
"epoch": 1.306122448979592,
|
| 1001 |
+
"grad_norm": 17.20958137512207,
|
| 1002 |
+
"learning_rate": 1.2959183673469389e-05,
|
| 1003 |
+
"loss": 0.0467,
|
| 1004 |
+
"step": 128
|
| 1005 |
+
},
|
| 1006 |
+
{
|
| 1007 |
+
"epoch": 1.316326530612245,
|
| 1008 |
+
"grad_norm": 8.821357727050781,
|
| 1009 |
+
"learning_rate": 1.3061224489795918e-05,
|
| 1010 |
+
"loss": 0.0282,
|
| 1011 |
+
"step": 129
|
| 1012 |
+
},
|
| 1013 |
+
{
|
| 1014 |
+
"epoch": 1.3265306122448979,
|
| 1015 |
+
"grad_norm": 0.3024923503398895,
|
| 1016 |
+
"learning_rate": 1.316326530612245e-05,
|
| 1017 |
+
"loss": 0.0012,
|
| 1018 |
+
"step": 130
|
| 1019 |
+
},
|
| 1020 |
+
{
|
| 1021 |
+
"epoch": 1.336734693877551,
|
| 1022 |
+
"grad_norm": 1110.76513671875,
|
| 1023 |
+
"learning_rate": 1.326530612244898e-05,
|
| 1024 |
+
"loss": 3.5135,
|
| 1025 |
+
"step": 131
|
| 1026 |
+
},
|
| 1027 |
+
{
|
| 1028 |
+
"epoch": 1.346938775510204,
|
| 1029 |
+
"grad_norm": 4.655632495880127,
|
| 1030 |
+
"learning_rate": 1.3367346938775512e-05,
|
| 1031 |
+
"loss": 0.0186,
|
| 1032 |
+
"step": 132
|
| 1033 |
+
},
|
| 1034 |
+
{
|
| 1035 |
+
"epoch": 1.3571428571428572,
|
| 1036 |
+
"grad_norm": 641.764404296875,
|
| 1037 |
+
"learning_rate": 1.3469387755102042e-05,
|
| 1038 |
+
"loss": 3.2599,
|
| 1039 |
+
"step": 133
|
| 1040 |
+
},
|
| 1041 |
+
{
|
| 1042 |
+
"epoch": 1.3673469387755102,
|
| 1043 |
+
"grad_norm": 1076.8260498046875,
|
| 1044 |
+
"learning_rate": 1.3571428571428574e-05,
|
| 1045 |
+
"loss": 5.5417,
|
| 1046 |
+
"step": 134
|
| 1047 |
+
},
|
| 1048 |
+
{
|
| 1049 |
+
"epoch": 1.3775510204081631,
|
| 1050 |
+
"grad_norm": 0.5416738390922546,
|
| 1051 |
+
"learning_rate": 1.3673469387755102e-05,
|
| 1052 |
+
"loss": 0.0019,
|
| 1053 |
+
"step": 135
|
| 1054 |
+
},
|
| 1055 |
+
{
|
| 1056 |
+
"epoch": 1.3877551020408163,
|
| 1057 |
+
"grad_norm": 200.03311157226562,
|
| 1058 |
+
"learning_rate": 1.3775510204081634e-05,
|
| 1059 |
+
"loss": 0.5649,
|
| 1060 |
+
"step": 136
|
| 1061 |
+
},
|
| 1062 |
+
{
|
| 1063 |
+
"epoch": 1.3979591836734695,
|
| 1064 |
+
"grad_norm": 35.22038650512695,
|
| 1065 |
+
"learning_rate": 1.3877551020408165e-05,
|
| 1066 |
+
"loss": 0.084,
|
| 1067 |
+
"step": 137
|
| 1068 |
+
},
|
| 1069 |
+
{
|
| 1070 |
+
"epoch": 1.4081632653061225,
|
| 1071 |
+
"grad_norm": 141.9106903076172,
|
| 1072 |
+
"learning_rate": 1.3979591836734696e-05,
|
| 1073 |
+
"loss": 0.6062,
|
| 1074 |
+
"step": 138
|
| 1075 |
+
},
|
| 1076 |
+
{
|
| 1077 |
+
"epoch": 1.4183673469387754,
|
| 1078 |
+
"grad_norm": 15.920783996582031,
|
| 1079 |
+
"learning_rate": 1.4081632653061225e-05,
|
| 1080 |
+
"loss": 0.0639,
|
| 1081 |
+
"step": 139
|
| 1082 |
+
},
|
| 1083 |
+
{
|
| 1084 |
+
"epoch": 1.4285714285714286,
|
| 1085 |
+
"grad_norm": 206.33274841308594,
|
| 1086 |
+
"learning_rate": 1.4183673469387755e-05,
|
| 1087 |
+
"loss": 0.4069,
|
| 1088 |
+
"step": 140
|
| 1089 |
+
},
|
| 1090 |
+
{
|
| 1091 |
+
"epoch": 1.4387755102040816,
|
| 1092 |
+
"grad_norm": 51.149173736572266,
|
| 1093 |
+
"learning_rate": 1.4285714285714287e-05,
|
| 1094 |
+
"loss": 0.2462,
|
| 1095 |
+
"step": 141
|
| 1096 |
+
},
|
| 1097 |
+
{
|
| 1098 |
+
"epoch": 1.4489795918367347,
|
| 1099 |
+
"grad_norm": 658.653564453125,
|
| 1100 |
+
"learning_rate": 1.4387755102040817e-05,
|
| 1101 |
+
"loss": 4.9288,
|
| 1102 |
+
"step": 142
|
| 1103 |
+
},
|
| 1104 |
+
{
|
| 1105 |
+
"epoch": 1.4591836734693877,
|
| 1106 |
+
"grad_norm": 63.49065399169922,
|
| 1107 |
+
"learning_rate": 1.448979591836735e-05,
|
| 1108 |
+
"loss": 0.1852,
|
| 1109 |
+
"step": 143
|
| 1110 |
+
},
|
| 1111 |
+
{
|
| 1112 |
+
"epoch": 1.469387755102041,
|
| 1113 |
+
"grad_norm": 1453.699462890625,
|
| 1114 |
+
"learning_rate": 1.4591836734693878e-05,
|
| 1115 |
+
"loss": 3.0971,
|
| 1116 |
+
"step": 144
|
| 1117 |
+
},
|
| 1118 |
+
{
|
| 1119 |
+
"epoch": 1.4795918367346939,
|
| 1120 |
+
"grad_norm": 499.0628662109375,
|
| 1121 |
+
"learning_rate": 1.469387755102041e-05,
|
| 1122 |
+
"loss": 3.787,
|
| 1123 |
+
"step": 145
|
| 1124 |
+
},
|
| 1125 |
+
{
|
| 1126 |
+
"epoch": 1.489795918367347,
|
| 1127 |
+
"grad_norm": 253.33152770996094,
|
| 1128 |
+
"learning_rate": 1.479591836734694e-05,
|
| 1129 |
+
"loss": 0.8474,
|
| 1130 |
+
"step": 146
|
| 1131 |
+
},
|
| 1132 |
+
{
|
| 1133 |
+
"epoch": 1.5,
|
| 1134 |
+
"grad_norm": 0.8343175649642944,
|
| 1135 |
+
"learning_rate": 1.4897959183673472e-05,
|
| 1136 |
+
"loss": 0.0028,
|
| 1137 |
+
"step": 147
|
| 1138 |
+
},
|
| 1139 |
+
{
|
| 1140 |
+
"epoch": 1.510204081632653,
|
| 1141 |
+
"grad_norm": 38.5785026550293,
|
| 1142 |
+
"learning_rate": 1.5000000000000002e-05,
|
| 1143 |
+
"loss": 0.0931,
|
| 1144 |
+
"step": 148
|
| 1145 |
+
},
|
| 1146 |
+
{
|
| 1147 |
+
"epoch": 1.5204081632653061,
|
| 1148 |
+
"grad_norm": 563.4974365234375,
|
| 1149 |
+
"learning_rate": 1.510204081632653e-05,
|
| 1150 |
+
"loss": 1.8378,
|
| 1151 |
+
"step": 149
|
| 1152 |
+
},
|
| 1153 |
+
{
|
| 1154 |
+
"epoch": 1.5306122448979593,
|
| 1155 |
+
"grad_norm": 749.0945434570312,
|
| 1156 |
+
"learning_rate": 1.5204081632653063e-05,
|
| 1157 |
+
"loss": 2.6074,
|
| 1158 |
+
"step": 150
|
| 1159 |
+
},
|
| 1160 |
+
{
|
| 1161 |
+
"epoch": 1.5408163265306123,
|
| 1162 |
+
"grad_norm": 62.52786636352539,
|
| 1163 |
+
"learning_rate": 1.530612244897959e-05,
|
| 1164 |
+
"loss": 0.1441,
|
| 1165 |
+
"step": 151
|
| 1166 |
+
},
|
| 1167 |
+
{
|
| 1168 |
+
"epoch": 1.5510204081632653,
|
| 1169 |
+
"grad_norm": 281.54400634765625,
|
| 1170 |
+
"learning_rate": 1.5408163265306123e-05,
|
| 1171 |
+
"loss": 0.5622,
|
| 1172 |
+
"step": 152
|
| 1173 |
+
},
|
| 1174 |
+
{
|
| 1175 |
+
"epoch": 1.5612244897959182,
|
| 1176 |
+
"grad_norm": 1.1233166456222534,
|
| 1177 |
+
"learning_rate": 1.5510204081632655e-05,
|
| 1178 |
+
"loss": 0.0049,
|
| 1179 |
+
"step": 153
|
| 1180 |
+
},
|
| 1181 |
+
{
|
| 1182 |
+
"epoch": 1.5714285714285714,
|
| 1183 |
+
"grad_norm": 9.458003044128418,
|
| 1184 |
+
"learning_rate": 1.5612244897959187e-05,
|
| 1185 |
+
"loss": 0.0268,
|
| 1186 |
+
"step": 154
|
| 1187 |
+
},
|
| 1188 |
+
{
|
| 1189 |
+
"epoch": 1.5816326530612246,
|
| 1190 |
+
"grad_norm": 7.9042439460754395,
|
| 1191 |
+
"learning_rate": 1.5714285714285715e-05,
|
| 1192 |
+
"loss": 0.0281,
|
| 1193 |
+
"step": 155
|
| 1194 |
+
},
|
| 1195 |
+
{
|
| 1196 |
+
"epoch": 1.5918367346938775,
|
| 1197 |
+
"grad_norm": 402.8667907714844,
|
| 1198 |
+
"learning_rate": 1.5816326530612247e-05,
|
| 1199 |
+
"loss": 2.9755,
|
| 1200 |
+
"step": 156
|
| 1201 |
+
},
|
| 1202 |
+
{
|
| 1203 |
+
"epoch": 1.6020408163265305,
|
| 1204 |
+
"grad_norm": 359.3101806640625,
|
| 1205 |
+
"learning_rate": 1.5918367346938776e-05,
|
| 1206 |
+
"loss": 1.0982,
|
| 1207 |
+
"step": 157
|
| 1208 |
+
},
|
| 1209 |
+
{
|
| 1210 |
+
"epoch": 1.6122448979591837,
|
| 1211 |
+
"grad_norm": 26.466707229614258,
|
| 1212 |
+
"learning_rate": 1.6020408163265308e-05,
|
| 1213 |
+
"loss": 0.0621,
|
| 1214 |
+
"step": 158
|
| 1215 |
+
},
|
| 1216 |
+
{
|
| 1217 |
+
"epoch": 1.6224489795918369,
|
| 1218 |
+
"grad_norm": 472.1581726074219,
|
| 1219 |
+
"learning_rate": 1.612244897959184e-05,
|
| 1220 |
+
"loss": 6.9631,
|
| 1221 |
+
"step": 159
|
| 1222 |
+
},
|
| 1223 |
+
{
|
| 1224 |
+
"epoch": 1.6326530612244898,
|
| 1225 |
+
"grad_norm": 812.54638671875,
|
| 1226 |
+
"learning_rate": 1.6224489795918368e-05,
|
| 1227 |
+
"loss": 4.7216,
|
| 1228 |
+
"step": 160
|
| 1229 |
+
},
|
| 1230 |
+
{
|
| 1231 |
+
"epoch": 1.6428571428571428,
|
| 1232 |
+
"grad_norm": 252.12796020507812,
|
| 1233 |
+
"learning_rate": 1.63265306122449e-05,
|
| 1234 |
+
"loss": 0.848,
|
| 1235 |
+
"step": 161
|
| 1236 |
+
},
|
| 1237 |
+
{
|
| 1238 |
+
"epoch": 1.6530612244897958,
|
| 1239 |
+
"grad_norm": 1087.48828125,
|
| 1240 |
+
"learning_rate": 1.642857142857143e-05,
|
| 1241 |
+
"loss": 5.6006,
|
| 1242 |
+
"step": 162
|
| 1243 |
+
},
|
| 1244 |
+
{
|
| 1245 |
+
"epoch": 1.663265306122449,
|
| 1246 |
+
"grad_norm": 280.405517578125,
|
| 1247 |
+
"learning_rate": 1.653061224489796e-05,
|
| 1248 |
+
"loss": 4.299,
|
| 1249 |
+
"step": 163
|
| 1250 |
+
},
|
| 1251 |
+
{
|
| 1252 |
+
"epoch": 1.6734693877551021,
|
| 1253 |
+
"grad_norm": 457.81494140625,
|
| 1254 |
+
"learning_rate": 1.6632653061224492e-05,
|
| 1255 |
+
"loss": 2.042,
|
| 1256 |
+
"step": 164
|
| 1257 |
+
},
|
| 1258 |
+
{
|
| 1259 |
+
"epoch": 1.683673469387755,
|
| 1260 |
+
"grad_norm": 511.0380859375,
|
| 1261 |
+
"learning_rate": 1.673469387755102e-05,
|
| 1262 |
+
"loss": 2.4823,
|
| 1263 |
+
"step": 165
|
| 1264 |
+
},
|
| 1265 |
+
{
|
| 1266 |
+
"epoch": 1.693877551020408,
|
| 1267 |
+
"grad_norm": 7.505221366882324,
|
| 1268 |
+
"learning_rate": 1.6836734693877553e-05,
|
| 1269 |
+
"loss": 0.0189,
|
| 1270 |
+
"step": 166
|
| 1271 |
+
},
|
| 1272 |
+
{
|
| 1273 |
+
"epoch": 1.7040816326530612,
|
| 1274 |
+
"grad_norm": 1.01173734664917,
|
| 1275 |
+
"learning_rate": 1.6938775510204085e-05,
|
| 1276 |
+
"loss": 0.0039,
|
| 1277 |
+
"step": 167
|
| 1278 |
+
},
|
| 1279 |
+
{
|
| 1280 |
+
"epoch": 1.7142857142857144,
|
| 1281 |
+
"grad_norm": 0.5971992015838623,
|
| 1282 |
+
"learning_rate": 1.7040816326530613e-05,
|
| 1283 |
+
"loss": 0.0024,
|
| 1284 |
+
"step": 168
|
| 1285 |
+
},
|
| 1286 |
+
{
|
| 1287 |
+
"epoch": 1.7244897959183674,
|
| 1288 |
+
"grad_norm": 505.6401672363281,
|
| 1289 |
+
"learning_rate": 1.7142857142857142e-05,
|
| 1290 |
+
"loss": 2.0453,
|
| 1291 |
+
"step": 169
|
| 1292 |
+
},
|
| 1293 |
+
{
|
| 1294 |
+
"epoch": 1.7346938775510203,
|
| 1295 |
+
"grad_norm": 4.466002464294434,
|
| 1296 |
+
"learning_rate": 1.7244897959183674e-05,
|
| 1297 |
+
"loss": 0.0092,
|
| 1298 |
+
"step": 170
|
| 1299 |
+
},
|
| 1300 |
+
{
|
| 1301 |
+
"epoch": 1.7448979591836735,
|
| 1302 |
+
"grad_norm": 1.1195125579833984,
|
| 1303 |
+
"learning_rate": 1.7346938775510206e-05,
|
| 1304 |
+
"loss": 0.0029,
|
| 1305 |
+
"step": 171
|
| 1306 |
+
},
|
| 1307 |
+
{
|
| 1308 |
+
"epoch": 1.7551020408163265,
|
| 1309 |
+
"grad_norm": 104.82202911376953,
|
| 1310 |
+
"learning_rate": 1.7448979591836738e-05,
|
| 1311 |
+
"loss": 0.3271,
|
| 1312 |
+
"step": 172
|
| 1313 |
+
},
|
| 1314 |
+
{
|
| 1315 |
+
"epoch": 1.7653061224489797,
|
| 1316 |
+
"grad_norm": 1.860406756401062,
|
| 1317 |
+
"learning_rate": 1.7551020408163266e-05,
|
| 1318 |
+
"loss": 0.0054,
|
| 1319 |
+
"step": 173
|
| 1320 |
+
},
|
| 1321 |
+
{
|
| 1322 |
+
"epoch": 1.7755102040816326,
|
| 1323 |
+
"grad_norm": 0.044311508536338806,
|
| 1324 |
+
"learning_rate": 1.7653061224489798e-05,
|
| 1325 |
+
"loss": 0.0002,
|
| 1326 |
+
"step": 174
|
| 1327 |
+
},
|
| 1328 |
+
{
|
| 1329 |
+
"epoch": 1.7857142857142856,
|
| 1330 |
+
"grad_norm": 40.70656204223633,
|
| 1331 |
+
"learning_rate": 1.7755102040816327e-05,
|
| 1332 |
+
"loss": 0.0685,
|
| 1333 |
+
"step": 175
|
| 1334 |
+
},
|
| 1335 |
+
{
|
| 1336 |
+
"epoch": 1.7959183673469388,
|
| 1337 |
+
"grad_norm": 395.348388671875,
|
| 1338 |
+
"learning_rate": 1.785714285714286e-05,
|
| 1339 |
+
"loss": 1.3097,
|
| 1340 |
+
"step": 176
|
| 1341 |
+
},
|
| 1342 |
+
{
|
| 1343 |
+
"epoch": 1.806122448979592,
|
| 1344 |
+
"grad_norm": 326.2778015136719,
|
| 1345 |
+
"learning_rate": 1.795918367346939e-05,
|
| 1346 |
+
"loss": 1.8817,
|
| 1347 |
+
"step": 177
|
| 1348 |
+
},
|
| 1349 |
+
{
|
| 1350 |
+
"epoch": 1.816326530612245,
|
| 1351 |
+
"grad_norm": 41.05072784423828,
|
| 1352 |
+
"learning_rate": 1.806122448979592e-05,
|
| 1353 |
+
"loss": 0.2497,
|
| 1354 |
+
"step": 178
|
| 1355 |
+
},
|
| 1356 |
+
{
|
| 1357 |
+
"epoch": 1.8265306122448979,
|
| 1358 |
+
"grad_norm": 121.29589080810547,
|
| 1359 |
+
"learning_rate": 1.816326530612245e-05,
|
| 1360 |
+
"loss": 0.5822,
|
| 1361 |
+
"step": 179
|
| 1362 |
+
},
|
| 1363 |
+
{
|
| 1364 |
+
"epoch": 1.836734693877551,
|
| 1365 |
+
"grad_norm": 711.2618408203125,
|
| 1366 |
+
"learning_rate": 1.826530612244898e-05,
|
| 1367 |
+
"loss": 1.8103,
|
| 1368 |
+
"step": 180
|
| 1369 |
+
},
|
| 1370 |
+
{
|
| 1371 |
+
"epoch": 1.8469387755102042,
|
| 1372 |
+
"grad_norm": 500.7347106933594,
|
| 1373 |
+
"learning_rate": 1.836734693877551e-05,
|
| 1374 |
+
"loss": 1.5506,
|
| 1375 |
+
"step": 181
|
| 1376 |
+
},
|
| 1377 |
+
{
|
| 1378 |
+
"epoch": 1.8571428571428572,
|
| 1379 |
+
"grad_norm": 252.05322265625,
|
| 1380 |
+
"learning_rate": 1.8469387755102043e-05,
|
| 1381 |
+
"loss": 1.281,
|
| 1382 |
+
"step": 182
|
| 1383 |
+
},
|
| 1384 |
+
{
|
| 1385 |
+
"epoch": 1.8673469387755102,
|
| 1386 |
+
"grad_norm": 370.9935302734375,
|
| 1387 |
+
"learning_rate": 1.8571428571428575e-05,
|
| 1388 |
+
"loss": 2.8616,
|
| 1389 |
+
"step": 183
|
| 1390 |
+
},
|
| 1391 |
+
{
|
| 1392 |
+
"epoch": 1.8775510204081631,
|
| 1393 |
+
"grad_norm": 4.682647705078125,
|
| 1394 |
+
"learning_rate": 1.8673469387755104e-05,
|
| 1395 |
+
"loss": 0.0118,
|
| 1396 |
+
"step": 184
|
| 1397 |
+
},
|
| 1398 |
+
{
|
| 1399 |
+
"epoch": 1.8877551020408163,
|
| 1400 |
+
"grad_norm": 2.143557548522949,
|
| 1401 |
+
"learning_rate": 1.8775510204081636e-05,
|
| 1402 |
+
"loss": 0.0038,
|
| 1403 |
+
"step": 185
|
| 1404 |
+
},
|
| 1405 |
+
{
|
| 1406 |
+
"epoch": 1.8979591836734695,
|
| 1407 |
+
"grad_norm": 6.499508857727051,
|
| 1408 |
+
"learning_rate": 1.8877551020408164e-05,
|
| 1409 |
+
"loss": 0.0331,
|
| 1410 |
+
"step": 186
|
| 1411 |
+
},
|
| 1412 |
+
{
|
| 1413 |
+
"epoch": 1.9081632653061225,
|
| 1414 |
+
"grad_norm": 7.2162089347839355,
|
| 1415 |
+
"learning_rate": 1.8979591836734696e-05,
|
| 1416 |
+
"loss": 0.0273,
|
| 1417 |
+
"step": 187
|
| 1418 |
+
},
|
| 1419 |
+
{
|
| 1420 |
+
"epoch": 1.9183673469387754,
|
| 1421 |
+
"grad_norm": 23.073841094970703,
|
| 1422 |
+
"learning_rate": 1.9081632653061225e-05,
|
| 1423 |
+
"loss": 0.1026,
|
| 1424 |
+
"step": 188
|
| 1425 |
+
},
|
| 1426 |
+
{
|
| 1427 |
+
"epoch": 1.9285714285714286,
|
| 1428 |
+
"grad_norm": 48.74525833129883,
|
| 1429 |
+
"learning_rate": 1.9183673469387756e-05,
|
| 1430 |
+
"loss": 0.1942,
|
| 1431 |
+
"step": 189
|
| 1432 |
+
},
|
| 1433 |
+
{
|
| 1434 |
+
"epoch": 1.9387755102040818,
|
| 1435 |
+
"grad_norm": 384.64678955078125,
|
| 1436 |
+
"learning_rate": 1.928571428571429e-05,
|
| 1437 |
+
"loss": 3.4886,
|
| 1438 |
+
"step": 190
|
| 1439 |
+
},
|
| 1440 |
+
{
|
| 1441 |
+
"epoch": 1.9489795918367347,
|
| 1442 |
+
"grad_norm": 103.53422546386719,
|
| 1443 |
+
"learning_rate": 1.9387755102040817e-05,
|
| 1444 |
+
"loss": 0.628,
|
| 1445 |
+
"step": 191
|
| 1446 |
+
},
|
| 1447 |
+
{
|
| 1448 |
+
"epoch": 1.9591836734693877,
|
| 1449 |
+
"grad_norm": 42.5008544921875,
|
| 1450 |
+
"learning_rate": 1.948979591836735e-05,
|
| 1451 |
+
"loss": 0.1967,
|
| 1452 |
+
"step": 192
|
| 1453 |
+
},
|
| 1454 |
+
{
|
| 1455 |
+
"epoch": 1.9693877551020407,
|
| 1456 |
+
"grad_norm": 145.1553955078125,
|
| 1457 |
+
"learning_rate": 1.9591836734693877e-05,
|
| 1458 |
+
"loss": 3.9822,
|
| 1459 |
+
"step": 193
|
| 1460 |
+
},
|
| 1461 |
+
{
|
| 1462 |
+
"epoch": 1.9795918367346939,
|
| 1463 |
+
"grad_norm": 0.07428821176290512,
|
| 1464 |
+
"learning_rate": 1.969387755102041e-05,
|
| 1465 |
+
"loss": 0.0003,
|
| 1466 |
+
"step": 194
|
| 1467 |
+
},
|
| 1468 |
+
{
|
| 1469 |
+
"epoch": 1.989795918367347,
|
| 1470 |
+
"grad_norm": 545.6088256835938,
|
| 1471 |
+
"learning_rate": 1.979591836734694e-05,
|
| 1472 |
+
"loss": 3.7309,
|
| 1473 |
+
"step": 195
|
| 1474 |
+
},
|
| 1475 |
+
{
|
| 1476 |
+
"epoch": 2.0,
|
| 1477 |
+
"grad_norm": 0.5490627288818359,
|
| 1478 |
+
"learning_rate": 1.9897959183673473e-05,
|
| 1479 |
+
"loss": 0.0024,
|
| 1480 |
+
"step": 196
|
| 1481 |
+
},
|
| 1482 |
+
{
|
| 1483 |
+
"epoch": 2.0,
|
| 1484 |
+
"eval_dim_1024_cosine_accuracy@1": 0.32522407170294493,
|
| 1485 |
+
"eval_dim_1024_cosine_accuracy@10": 0.3969270166453265,
|
| 1486 |
+
"eval_dim_1024_cosine_accuracy@3": 0.33290653008962867,
|
| 1487 |
+
"eval_dim_1024_cosine_accuracy@5": 0.36043533930857874,
|
| 1488 |
+
"eval_dim_1024_cosine_map@100": 0.4164888021641558,
|
| 1489 |
+
"eval_dim_1024_cosine_mrr@10": 0.33769460195516493,
|
| 1490 |
+
"eval_dim_1024_cosine_ndcg@10": 0.34986350069216465,
|
| 1491 |
+
"eval_dim_1024_cosine_precision@1": 0.32522407170294493,
|
| 1492 |
+
"eval_dim_1024_cosine_precision@10": 0.28361075544174136,
|
| 1493 |
+
"eval_dim_1024_cosine_precision@3": 0.3254374733247973,
|
| 1494 |
+
"eval_dim_1024_cosine_precision@5": 0.31626120358514725,
|
| 1495 |
+
"eval_dim_1024_cosine_recall@1": 0.04113491331982186,
|
| 1496 |
+
"eval_dim_1024_cosine_recall@10": 0.2664549051060991,
|
| 1497 |
+
"eval_dim_1024_cosine_recall@3": 0.12080229545561262,
|
| 1498 |
+
"eval_dim_1024_cosine_recall@5": 0.18183789253196145,
|
| 1499 |
+
"eval_dim_128_cosine_accuracy@1": 0.30217669654289375,
|
| 1500 |
+
"eval_dim_128_cosine_accuracy@10": 0.3546734955185659,
|
| 1501 |
+
"eval_dim_128_cosine_accuracy@3": 0.3072983354673495,
|
| 1502 |
+
"eval_dim_128_cosine_accuracy@5": 0.3265044814340589,
|
| 1503 |
+
"eval_dim_128_cosine_map@100": 0.38014172959059034,
|
| 1504 |
+
"eval_dim_128_cosine_mrr@10": 0.3112729406743488,
|
| 1505 |
+
"eval_dim_128_cosine_ndcg@10": 0.32071443787836906,
|
| 1506 |
+
"eval_dim_128_cosine_precision@1": 0.30217669654289375,
|
| 1507 |
+
"eval_dim_128_cosine_precision@10": 0.26312419974391804,
|
| 1508 |
+
"eval_dim_128_cosine_precision@3": 0.30239009816474605,
|
| 1509 |
+
"eval_dim_128_cosine_precision@5": 0.29359795134443023,
|
| 1510 |
+
"eval_dim_128_cosine_recall@1": 0.03603846894598867,
|
| 1511 |
+
"eval_dim_128_cosine_recall@10": 0.23664446759855584,
|
| 1512 |
+
"eval_dim_128_cosine_recall@3": 0.10607255532328354,
|
| 1513 |
+
"eval_dim_128_cosine_recall@5": 0.15998840334482403,
|
| 1514 |
+
"eval_dim_256_cosine_accuracy@1": 0.31049935979513443,
|
| 1515 |
+
"eval_dim_256_cosine_accuracy@10": 0.3725992317541613,
|
| 1516 |
+
"eval_dim_256_cosine_accuracy@3": 0.31882202304737517,
|
| 1517 |
+
"eval_dim_256_cosine_accuracy@5": 0.34571062740076824,
|
| 1518 |
+
"eval_dim_256_cosine_map@100": 0.3940538127924734,
|
| 1519 |
+
"eval_dim_256_cosine_mrr@10": 0.3219094872263883,
|
| 1520 |
+
"eval_dim_256_cosine_ndcg@10": 0.33365785011470184,
|
| 1521 |
+
"eval_dim_256_cosine_precision@1": 0.31049935979513443,
|
| 1522 |
+
"eval_dim_256_cosine_precision@10": 0.2727272727272727,
|
| 1523 |
+
"eval_dim_256_cosine_precision@3": 0.3109261630388391,
|
| 1524 |
+
"eval_dim_256_cosine_precision@5": 0.3035851472471191,
|
| 1525 |
+
"eval_dim_256_cosine_recall@1": 0.0379038673811849,
|
| 1526 |
+
"eval_dim_256_cosine_recall@10": 0.25061548215235363,
|
| 1527 |
+
"eval_dim_256_cosine_recall@3": 0.11184662439829526,
|
| 1528 |
+
"eval_dim_256_cosine_recall@5": 0.16972372403865282,
|
| 1529 |
+
"eval_dim_512_cosine_accuracy@1": 0.32842509603072984,
|
| 1530 |
+
"eval_dim_512_cosine_accuracy@10": 0.39564660691421255,
|
| 1531 |
+
"eval_dim_512_cosine_accuracy@3": 0.33418693982074266,
|
| 1532 |
+
"eval_dim_512_cosine_accuracy@5": 0.36555697823303457,
|
| 1533 |
+
"eval_dim_512_cosine_map@100": 0.4125328284000196,
|
| 1534 |
+
"eval_dim_512_cosine_mrr@10": 0.34027168058858154,
|
| 1535 |
+
"eval_dim_512_cosine_ndcg@10": 0.3525488928748249,
|
| 1536 |
+
"eval_dim_512_cosine_precision@1": 0.32842509603072984,
|
| 1537 |
+
"eval_dim_512_cosine_precision@10": 0.28693982074263763,
|
| 1538 |
+
"eval_dim_512_cosine_precision@3": 0.3282116944088775,
|
| 1539 |
+
"eval_dim_512_cosine_precision@5": 0.31997439180537773,
|
| 1540 |
+
"eval_dim_512_cosine_recall@1": 0.04071091183465321,
|
| 1541 |
+
"eval_dim_512_cosine_recall@10": 0.2638449444559509,
|
| 1542 |
+
"eval_dim_512_cosine_recall@3": 0.11970757850133786,
|
| 1543 |
+
"eval_dim_512_cosine_recall@5": 0.1806811237454132,
|
| 1544 |
+
"eval_dim_64_cosine_accuracy@1": 0.28040973111395645,
|
| 1545 |
+
"eval_dim_64_cosine_accuracy@10": 0.3348271446862996,
|
| 1546 |
+
"eval_dim_64_cosine_accuracy@3": 0.28297055057618437,
|
| 1547 |
+
"eval_dim_64_cosine_accuracy@5": 0.3072983354673495,
|
| 1548 |
+
"eval_dim_64_cosine_map@100": 0.35085623648833997,
|
| 1549 |
+
"eval_dim_64_cosine_mrr@10": 0.28944678170030247,
|
| 1550 |
+
"eval_dim_64_cosine_ndcg@10": 0.2991224720529457,
|
| 1551 |
+
"eval_dim_64_cosine_precision@1": 0.28040973111395645,
|
| 1552 |
+
"eval_dim_64_cosine_precision@10": 0.24878361075544175,
|
| 1553 |
+
"eval_dim_64_cosine_precision@3": 0.27955612462654716,
|
| 1554 |
+
"eval_dim_64_cosine_precision@5": 0.27247119078105,
|
| 1555 |
+
"eval_dim_64_cosine_recall@1": 0.03187808455878807,
|
| 1556 |
+
"eval_dim_64_cosine_recall@10": 0.2128007008801171,
|
| 1557 |
+
"eval_dim_64_cosine_recall@3": 0.09363361347149868,
|
| 1558 |
+
"eval_dim_64_cosine_recall@5": 0.14192536615474802,
|
| 1559 |
+
"eval_dim_768_cosine_accuracy@1": 0.32970550576184376,
|
| 1560 |
+
"eval_dim_768_cosine_accuracy@10": 0.3994878361075544,
|
| 1561 |
+
"eval_dim_768_cosine_accuracy@3": 0.33418693982074266,
|
| 1562 |
+
"eval_dim_768_cosine_accuracy@5": 0.36427656850192064,
|
| 1563 |
+
"eval_dim_768_cosine_map@100": 0.4160652625925415,
|
| 1564 |
+
"eval_dim_768_cosine_mrr@10": 0.3415124585899229,
|
| 1565 |
+
"eval_dim_768_cosine_ndcg@10": 0.35370573856938964,
|
| 1566 |
+
"eval_dim_768_cosine_precision@1": 0.32970550576184376,
|
| 1567 |
+
"eval_dim_768_cosine_precision@10": 0.2877720870678617,
|
| 1568 |
+
"eval_dim_768_cosine_precision@3": 0.3288518992744345,
|
| 1569 |
+
"eval_dim_768_cosine_precision@5": 0.31997439180537773,
|
| 1570 |
+
"eval_dim_768_cosine_recall@1": 0.040955758827011135,
|
| 1571 |
+
"eval_dim_768_cosine_recall@10": 0.26685683005601735,
|
| 1572 |
+
"eval_dim_768_cosine_recall@3": 0.12009305539695316,
|
| 1573 |
+
"eval_dim_768_cosine_recall@5": 0.18142212378067016,
|
| 1574 |
+
"eval_runtime": 99.167,
|
| 1575 |
+
"eval_samples_per_second": 0.0,
|
| 1576 |
+
"eval_sequential_score": 0.2991224720529457,
|
| 1577 |
+
"eval_steps_per_second": 0.0,
|
| 1578 |
+
"step": 196
|
| 1579 |
+
}
|
| 1580 |
+
],
|
| 1581 |
+
"logging_steps": 1,
|
| 1582 |
+
"max_steps": 1960,
|
| 1583 |
+
"num_input_tokens_seen": 0,
|
| 1584 |
+
"num_train_epochs": 20,
|
| 1585 |
+
"save_steps": 500,
|
| 1586 |
+
"stateful_callbacks": {
|
| 1587 |
+
"EarlyStoppingCallback": {
|
| 1588 |
+
"args": {
|
| 1589 |
+
"early_stopping_patience": 2,
|
| 1590 |
+
"early_stopping_threshold": 0.0
|
| 1591 |
+
},
|
| 1592 |
+
"attributes": {
|
| 1593 |
+
"early_stopping_patience_counter": 1
|
| 1594 |
+
}
|
| 1595 |
+
},
|
| 1596 |
+
"TrainerControl": {
|
| 1597 |
+
"args": {
|
| 1598 |
+
"should_epoch_stop": false,
|
| 1599 |
+
"should_evaluate": false,
|
| 1600 |
+
"should_log": false,
|
| 1601 |
+
"should_save": true,
|
| 1602 |
+
"should_training_stop": false
|
| 1603 |
+
},
|
| 1604 |
+
"attributes": {}
|
| 1605 |
+
}
|
| 1606 |
+
},
|
| 1607 |
+
"total_flos": 0.0,
|
| 1608 |
+
"train_batch_size": 2,
|
| 1609 |
+
"trial_name": null,
|
| 1610 |
+
"trial_params": null
|
| 1611 |
+
}
|
checkpoint-196/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:739b4a0a62fdf782034d2ababe5e5ea588023ed6263f2604e31385fc77a8faab
|
| 3 |
+
size 6097
|
checkpoint-294/1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 1024,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": true,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
checkpoint-294/README.md
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-294/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"XLMRobertaModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"eos_token_id": 2,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 1024,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 4096,
|
| 14 |
+
"layer_norm_eps": 1e-05,
|
| 15 |
+
"max_position_embeddings": 514,
|
| 16 |
+
"model_type": "xlm-roberta",
|
| 17 |
+
"num_attention_heads": 16,
|
| 18 |
+
"num_hidden_layers": 24,
|
| 19 |
+
"output_past": true,
|
| 20 |
+
"pad_token_id": 1,
|
| 21 |
+
"position_embedding_type": "absolute",
|
| 22 |
+
"torch_dtype": "float32",
|
| 23 |
+
"transformers_version": "4.51.3",
|
| 24 |
+
"type_vocab_size": 1,
|
| 25 |
+
"use_cache": true,
|
| 26 |
+
"vocab_size": 250002
|
| 27 |
+
}
|
checkpoint-294/config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_type": "SentenceTransformer",
|
| 3 |
+
"__version__": {
|
| 4 |
+
"sentence_transformers": "5.1.0",
|
| 5 |
+
"transformers": "4.51.3",
|
| 6 |
+
"pytorch": "2.8.0+cu126"
|
| 7 |
+
},
|
| 8 |
+
"prompts": {
|
| 9 |
+
"query": "",
|
| 10 |
+
"document": ""
|
| 11 |
+
},
|
| 12 |
+
"default_prompt_name": null,
|
| 13 |
+
"similarity_fn_name": "cosine"
|
| 14 |
+
}
|
checkpoint-294/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43c4fc720dc2e3977a3cc3e3497ee115851c08f432984ab97bfaf724a6b3d666
|
| 3 |
+
size 2239607176
|
checkpoint-294/modules.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"idx": 2,
|
| 16 |
+
"name": "2",
|
| 17 |
+
"path": "2_Normalize",
|
| 18 |
+
"type": "sentence_transformers.models.Normalize"
|
| 19 |
+
}
|
| 20 |
+
]
|
checkpoint-294/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0ddd633ecdd2f1e6c3de318b7ceb44fe2f43af2a873c9089a62cd07aaeb5c74
|
| 3 |
+
size 4471067142
|
checkpoint-294/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5bdc8f1e2d846953d00ba606f4cf92976f5653cd22fea2aacf347840fdb304ea
|
| 3 |
+
size 14645
|
checkpoint-294/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91b1f870feb8cc60c3e97ad9856efed23cb494b46c65f244929c7c13ceca58cc
|
| 3 |
+
size 1465
|
checkpoint-294/sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 512,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
checkpoint-294/sentencepiece.bpe.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
|
| 3 |
+
size 5069051
|
checkpoint-294/special_tokens_map.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"cls_token": {
|
| 10 |
+
"content": "<s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"eos_token": {
|
| 17 |
+
"content": "</s>",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"mask_token": {
|
| 24 |
+
"content": "<mask>",
|
| 25 |
+
"lstrip": true,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"pad_token": {
|
| 31 |
+
"content": "<pad>",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
},
|
| 37 |
+
"sep_token": {
|
| 38 |
+
"content": "</s>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false
|
| 43 |
+
},
|
| 44 |
+
"unk_token": {
|
| 45 |
+
"content": "<unk>",
|
| 46 |
+
"lstrip": false,
|
| 47 |
+
"normalized": false,
|
| 48 |
+
"rstrip": false,
|
| 49 |
+
"single_word": false
|
| 50 |
+
}
|
| 51 |
+
}
|
checkpoint-294/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:883b037111086fd4dfebbbc9b7cee11e1517b5e0c0514879478661440f137085
|
| 3 |
+
size 17082987
|
checkpoint-294/tokenizer_config.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<s>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<pad>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "</s>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"250001": {
|
| 36 |
+
"content": "<mask>",
|
| 37 |
+
"lstrip": true,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"bos_token": "<s>",
|
| 45 |
+
"clean_up_tokenization_spaces": true,
|
| 46 |
+
"cls_token": "<s>",
|
| 47 |
+
"eos_token": "</s>",
|
| 48 |
+
"extra_special_tokens": {},
|
| 49 |
+
"mask_token": "<mask>",
|
| 50 |
+
"max_length": 512,
|
| 51 |
+
"model_max_length": 512,
|
| 52 |
+
"pad_to_multiple_of": null,
|
| 53 |
+
"pad_token": "<pad>",
|
| 54 |
+
"pad_token_type_id": 0,
|
| 55 |
+
"padding_side": "right",
|
| 56 |
+
"sep_token": "</s>",
|
| 57 |
+
"stride": 0,
|
| 58 |
+
"tokenizer_class": "XLMRobertaTokenizer",
|
| 59 |
+
"truncation_side": "right",
|
| 60 |
+
"truncation_strategy": "longest_first",
|
| 61 |
+
"unk_token": "<unk>"
|
| 62 |
+
}
|
checkpoint-294/trainer_state.json
ADDED
|
@@ -0,0 +1,2395 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 98,
|
| 3 |
+
"best_metric": 0.3312285498294292,
|
| 4 |
+
"best_model_checkpoint": "intfloat/multilingual-e5-large/checkpoint-98",
|
| 5 |
+
"epoch": 3.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 294,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.01020408163265306,
|
| 14 |
+
"grad_norm": 973.273681640625,
|
| 15 |
+
"learning_rate": 0.0,
|
| 16 |
+
"loss": 15.8588,
|
| 17 |
+
"step": 1
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.02040816326530612,
|
| 21 |
+
"grad_norm": 1016.8517456054688,
|
| 22 |
+
"learning_rate": 1.0204081632653061e-07,
|
| 23 |
+
"loss": 10.7411,
|
| 24 |
+
"step": 2
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 0.030612244897959183,
|
| 28 |
+
"grad_norm": 166.88465881347656,
|
| 29 |
+
"learning_rate": 2.0408163265306121e-07,
|
| 30 |
+
"loss": 1.3873,
|
| 31 |
+
"step": 3
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 0.04081632653061224,
|
| 35 |
+
"grad_norm": 108.06741333007812,
|
| 36 |
+
"learning_rate": 3.0612244897959183e-07,
|
| 37 |
+
"loss": 0.9088,
|
| 38 |
+
"step": 4
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"epoch": 0.05102040816326531,
|
| 42 |
+
"grad_norm": 1.1959134340286255,
|
| 43 |
+
"learning_rate": 4.0816326530612243e-07,
|
| 44 |
+
"loss": 0.0077,
|
| 45 |
+
"step": 5
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"epoch": 0.061224489795918366,
|
| 49 |
+
"grad_norm": 130.83908081054688,
|
| 50 |
+
"learning_rate": 5.102040816326531e-07,
|
| 51 |
+
"loss": 0.6016,
|
| 52 |
+
"step": 6
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"epoch": 0.07142857142857142,
|
| 56 |
+
"grad_norm": 318.3863525390625,
|
| 57 |
+
"learning_rate": 6.122448979591837e-07,
|
| 58 |
+
"loss": 1.6714,
|
| 59 |
+
"step": 7
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"epoch": 0.08163265306122448,
|
| 63 |
+
"grad_norm": 74.26002502441406,
|
| 64 |
+
"learning_rate": 7.142857142857143e-07,
|
| 65 |
+
"loss": 0.4211,
|
| 66 |
+
"step": 8
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"epoch": 0.09183673469387756,
|
| 70 |
+
"grad_norm": 32.4500846862793,
|
| 71 |
+
"learning_rate": 8.163265306122449e-07,
|
| 72 |
+
"loss": 0.1996,
|
| 73 |
+
"step": 9
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"epoch": 0.10204081632653061,
|
| 77 |
+
"grad_norm": 41.27345275878906,
|
| 78 |
+
"learning_rate": 9.183673469387756e-07,
|
| 79 |
+
"loss": 0.1895,
|
| 80 |
+
"step": 10
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"epoch": 0.11224489795918367,
|
| 84 |
+
"grad_norm": 27.35291862487793,
|
| 85 |
+
"learning_rate": 1.0204081632653063e-06,
|
| 86 |
+
"loss": 0.1358,
|
| 87 |
+
"step": 11
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"epoch": 0.12244897959183673,
|
| 91 |
+
"grad_norm": 103.75244903564453,
|
| 92 |
+
"learning_rate": 1.122448979591837e-06,
|
| 93 |
+
"loss": 0.5552,
|
| 94 |
+
"step": 12
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"epoch": 0.1326530612244898,
|
| 98 |
+
"grad_norm": 155.97923278808594,
|
| 99 |
+
"learning_rate": 1.2244897959183673e-06,
|
| 100 |
+
"loss": 0.5141,
|
| 101 |
+
"step": 13
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"epoch": 0.14285714285714285,
|
| 105 |
+
"grad_norm": 53.757484436035156,
|
| 106 |
+
"learning_rate": 1.3265306122448982e-06,
|
| 107 |
+
"loss": 0.1955,
|
| 108 |
+
"step": 14
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"epoch": 0.15306122448979592,
|
| 112 |
+
"grad_norm": 175.17491149902344,
|
| 113 |
+
"learning_rate": 1.4285714285714286e-06,
|
| 114 |
+
"loss": 1.9114,
|
| 115 |
+
"step": 15
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"epoch": 0.16326530612244897,
|
| 119 |
+
"grad_norm": 49.02252197265625,
|
| 120 |
+
"learning_rate": 1.5306122448979593e-06,
|
| 121 |
+
"loss": 0.2645,
|
| 122 |
+
"step": 16
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"epoch": 0.17346938775510204,
|
| 126 |
+
"grad_norm": 999.3756103515625,
|
| 127 |
+
"learning_rate": 1.6326530612244897e-06,
|
| 128 |
+
"loss": 7.5545,
|
| 129 |
+
"step": 17
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"epoch": 0.1836734693877551,
|
| 133 |
+
"grad_norm": 149.2627410888672,
|
| 134 |
+
"learning_rate": 1.7346938775510206e-06,
|
| 135 |
+
"loss": 0.4297,
|
| 136 |
+
"step": 18
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"epoch": 0.19387755102040816,
|
| 140 |
+
"grad_norm": 204.95181274414062,
|
| 141 |
+
"learning_rate": 1.8367346938775512e-06,
|
| 142 |
+
"loss": 0.678,
|
| 143 |
+
"step": 19
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"epoch": 0.20408163265306123,
|
| 147 |
+
"grad_norm": 103.94851684570312,
|
| 148 |
+
"learning_rate": 1.938775510204082e-06,
|
| 149 |
+
"loss": 0.4634,
|
| 150 |
+
"step": 20
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"epoch": 0.21428571428571427,
|
| 154 |
+
"grad_norm": 536.7100219726562,
|
| 155 |
+
"learning_rate": 2.0408163265306125e-06,
|
| 156 |
+
"loss": 4.2252,
|
| 157 |
+
"step": 21
|
| 158 |
+
},
|
| 159 |
+
{
|
| 160 |
+
"epoch": 0.22448979591836735,
|
| 161 |
+
"grad_norm": 444.44805908203125,
|
| 162 |
+
"learning_rate": 2.1428571428571427e-06,
|
| 163 |
+
"loss": 3.9985,
|
| 164 |
+
"step": 22
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
"epoch": 0.23469387755102042,
|
| 168 |
+
"grad_norm": 170.50369262695312,
|
| 169 |
+
"learning_rate": 2.244897959183674e-06,
|
| 170 |
+
"loss": 1.9242,
|
| 171 |
+
"step": 23
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"epoch": 0.24489795918367346,
|
| 175 |
+
"grad_norm": 626.5487060546875,
|
| 176 |
+
"learning_rate": 2.3469387755102044e-06,
|
| 177 |
+
"loss": 3.2716,
|
| 178 |
+
"step": 24
|
| 179 |
+
},
|
| 180 |
+
{
|
| 181 |
+
"epoch": 0.25510204081632654,
|
| 182 |
+
"grad_norm": 51.353050231933594,
|
| 183 |
+
"learning_rate": 2.4489795918367347e-06,
|
| 184 |
+
"loss": 0.123,
|
| 185 |
+
"step": 25
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"epoch": 0.2653061224489796,
|
| 189 |
+
"grad_norm": 108.25341796875,
|
| 190 |
+
"learning_rate": 2.5510204081632657e-06,
|
| 191 |
+
"loss": 1.0011,
|
| 192 |
+
"step": 26
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"epoch": 0.2755102040816326,
|
| 196 |
+
"grad_norm": 322.83502197265625,
|
| 197 |
+
"learning_rate": 2.6530612244897964e-06,
|
| 198 |
+
"loss": 3.5846,
|
| 199 |
+
"step": 27
|
| 200 |
+
},
|
| 201 |
+
{
|
| 202 |
+
"epoch": 0.2857142857142857,
|
| 203 |
+
"grad_norm": 203.38458251953125,
|
| 204 |
+
"learning_rate": 2.7551020408163266e-06,
|
| 205 |
+
"loss": 1.1365,
|
| 206 |
+
"step": 28
|
| 207 |
+
},
|
| 208 |
+
{
|
| 209 |
+
"epoch": 0.29591836734693877,
|
| 210 |
+
"grad_norm": 127.78427124023438,
|
| 211 |
+
"learning_rate": 2.8571428571428573e-06,
|
| 212 |
+
"loss": 0.7149,
|
| 213 |
+
"step": 29
|
| 214 |
+
},
|
| 215 |
+
{
|
| 216 |
+
"epoch": 0.30612244897959184,
|
| 217 |
+
"grad_norm": 283.67645263671875,
|
| 218 |
+
"learning_rate": 2.959183673469388e-06,
|
| 219 |
+
"loss": 1.2629,
|
| 220 |
+
"step": 30
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"epoch": 0.3163265306122449,
|
| 224 |
+
"grad_norm": 82.65542602539062,
|
| 225 |
+
"learning_rate": 3.0612244897959185e-06,
|
| 226 |
+
"loss": 0.6459,
|
| 227 |
+
"step": 31
|
| 228 |
+
},
|
| 229 |
+
{
|
| 230 |
+
"epoch": 0.32653061224489793,
|
| 231 |
+
"grad_norm": 42.66185760498047,
|
| 232 |
+
"learning_rate": 3.1632653061224496e-06,
|
| 233 |
+
"loss": 0.1934,
|
| 234 |
+
"step": 32
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"epoch": 0.336734693877551,
|
| 238 |
+
"grad_norm": 212.1294708251953,
|
| 239 |
+
"learning_rate": 3.2653061224489794e-06,
|
| 240 |
+
"loss": 1.4897,
|
| 241 |
+
"step": 33
|
| 242 |
+
},
|
| 243 |
+
{
|
| 244 |
+
"epoch": 0.3469387755102041,
|
| 245 |
+
"grad_norm": 188.0417022705078,
|
| 246 |
+
"learning_rate": 3.3673469387755105e-06,
|
| 247 |
+
"loss": 0.8561,
|
| 248 |
+
"step": 34
|
| 249 |
+
},
|
| 250 |
+
{
|
| 251 |
+
"epoch": 0.35714285714285715,
|
| 252 |
+
"grad_norm": 2.0467610359191895,
|
| 253 |
+
"learning_rate": 3.469387755102041e-06,
|
| 254 |
+
"loss": 0.0128,
|
| 255 |
+
"step": 35
|
| 256 |
+
},
|
| 257 |
+
{
|
| 258 |
+
"epoch": 0.3673469387755102,
|
| 259 |
+
"grad_norm": 283.3966979980469,
|
| 260 |
+
"learning_rate": 3.5714285714285718e-06,
|
| 261 |
+
"loss": 1.4952,
|
| 262 |
+
"step": 36
|
| 263 |
+
},
|
| 264 |
+
{
|
| 265 |
+
"epoch": 0.37755102040816324,
|
| 266 |
+
"grad_norm": 60.74869155883789,
|
| 267 |
+
"learning_rate": 3.6734693877551024e-06,
|
| 268 |
+
"loss": 0.3181,
|
| 269 |
+
"step": 37
|
| 270 |
+
},
|
| 271 |
+
{
|
| 272 |
+
"epoch": 0.3877551020408163,
|
| 273 |
+
"grad_norm": 824.6165771484375,
|
| 274 |
+
"learning_rate": 3.7755102040816327e-06,
|
| 275 |
+
"loss": 6.3681,
|
| 276 |
+
"step": 38
|
| 277 |
+
},
|
| 278 |
+
{
|
| 279 |
+
"epoch": 0.3979591836734694,
|
| 280 |
+
"grad_norm": 231.1636962890625,
|
| 281 |
+
"learning_rate": 3.877551020408164e-06,
|
| 282 |
+
"loss": 1.4487,
|
| 283 |
+
"step": 39
|
| 284 |
+
},
|
| 285 |
+
{
|
| 286 |
+
"epoch": 0.40816326530612246,
|
| 287 |
+
"grad_norm": 26.46611785888672,
|
| 288 |
+
"learning_rate": 3.979591836734694e-06,
|
| 289 |
+
"loss": 0.1702,
|
| 290 |
+
"step": 40
|
| 291 |
+
},
|
| 292 |
+
{
|
| 293 |
+
"epoch": 0.41836734693877553,
|
| 294 |
+
"grad_norm": 75.88525390625,
|
| 295 |
+
"learning_rate": 4.081632653061225e-06,
|
| 296 |
+
"loss": 0.2513,
|
| 297 |
+
"step": 41
|
| 298 |
+
},
|
| 299 |
+
{
|
| 300 |
+
"epoch": 0.42857142857142855,
|
| 301 |
+
"grad_norm": 465.83392333984375,
|
| 302 |
+
"learning_rate": 4.183673469387755e-06,
|
| 303 |
+
"loss": 4.1595,
|
| 304 |
+
"step": 42
|
| 305 |
+
},
|
| 306 |
+
{
|
| 307 |
+
"epoch": 0.4387755102040816,
|
| 308 |
+
"grad_norm": 306.2772521972656,
|
| 309 |
+
"learning_rate": 4.2857142857142855e-06,
|
| 310 |
+
"loss": 2.7347,
|
| 311 |
+
"step": 43
|
| 312 |
+
},
|
| 313 |
+
{
|
| 314 |
+
"epoch": 0.4489795918367347,
|
| 315 |
+
"grad_norm": 488.9759521484375,
|
| 316 |
+
"learning_rate": 4.3877551020408165e-06,
|
| 317 |
+
"loss": 2.3182,
|
| 318 |
+
"step": 44
|
| 319 |
+
},
|
| 320 |
+
{
|
| 321 |
+
"epoch": 0.45918367346938777,
|
| 322 |
+
"grad_norm": 355.1698913574219,
|
| 323 |
+
"learning_rate": 4.489795918367348e-06,
|
| 324 |
+
"loss": 1.3285,
|
| 325 |
+
"step": 45
|
| 326 |
+
},
|
| 327 |
+
{
|
| 328 |
+
"epoch": 0.46938775510204084,
|
| 329 |
+
"grad_norm": 263.558349609375,
|
| 330 |
+
"learning_rate": 4.591836734693878e-06,
|
| 331 |
+
"loss": 2.1155,
|
| 332 |
+
"step": 46
|
| 333 |
+
},
|
| 334 |
+
{
|
| 335 |
+
"epoch": 0.47959183673469385,
|
| 336 |
+
"grad_norm": 9.667963981628418,
|
| 337 |
+
"learning_rate": 4.693877551020409e-06,
|
| 338 |
+
"loss": 0.0645,
|
| 339 |
+
"step": 47
|
| 340 |
+
},
|
| 341 |
+
{
|
| 342 |
+
"epoch": 0.4897959183673469,
|
| 343 |
+
"grad_norm": 957.79345703125,
|
| 344 |
+
"learning_rate": 4.795918367346939e-06,
|
| 345 |
+
"loss": 7.1283,
|
| 346 |
+
"step": 48
|
| 347 |
+
},
|
| 348 |
+
{
|
| 349 |
+
"epoch": 0.5,
|
| 350 |
+
"grad_norm": 160.0965118408203,
|
| 351 |
+
"learning_rate": 4.897959183673469e-06,
|
| 352 |
+
"loss": 0.711,
|
| 353 |
+
"step": 49
|
| 354 |
+
},
|
| 355 |
+
{
|
| 356 |
+
"epoch": 0.5102040816326531,
|
| 357 |
+
"grad_norm": 93.697265625,
|
| 358 |
+
"learning_rate": 5e-06,
|
| 359 |
+
"loss": 0.4716,
|
| 360 |
+
"step": 50
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"epoch": 0.5204081632653061,
|
| 364 |
+
"grad_norm": 292.9518737792969,
|
| 365 |
+
"learning_rate": 5.1020408163265315e-06,
|
| 366 |
+
"loss": 2.2895,
|
| 367 |
+
"step": 51
|
| 368 |
+
},
|
| 369 |
+
{
|
| 370 |
+
"epoch": 0.5306122448979592,
|
| 371 |
+
"grad_norm": 335.4564514160156,
|
| 372 |
+
"learning_rate": 5.204081632653062e-06,
|
| 373 |
+
"loss": 1.9235,
|
| 374 |
+
"step": 52
|
| 375 |
+
},
|
| 376 |
+
{
|
| 377 |
+
"epoch": 0.5408163265306123,
|
| 378 |
+
"grad_norm": 138.63575744628906,
|
| 379 |
+
"learning_rate": 5.306122448979593e-06,
|
| 380 |
+
"loss": 0.8777,
|
| 381 |
+
"step": 53
|
| 382 |
+
},
|
| 383 |
+
{
|
| 384 |
+
"epoch": 0.5510204081632653,
|
| 385 |
+
"grad_norm": 1.011594533920288,
|
| 386 |
+
"learning_rate": 5.408163265306123e-06,
|
| 387 |
+
"loss": 0.0038,
|
| 388 |
+
"step": 54
|
| 389 |
+
},
|
| 390 |
+
{
|
| 391 |
+
"epoch": 0.5612244897959183,
|
| 392 |
+
"grad_norm": 506.25152587890625,
|
| 393 |
+
"learning_rate": 5.510204081632653e-06,
|
| 394 |
+
"loss": 1.5598,
|
| 395 |
+
"step": 55
|
| 396 |
+
},
|
| 397 |
+
{
|
| 398 |
+
"epoch": 0.5714285714285714,
|
| 399 |
+
"grad_norm": 2.2550530433654785,
|
| 400 |
+
"learning_rate": 5.6122448979591834e-06,
|
| 401 |
+
"loss": 0.0177,
|
| 402 |
+
"step": 56
|
| 403 |
+
},
|
| 404 |
+
{
|
| 405 |
+
"epoch": 0.5816326530612245,
|
| 406 |
+
"grad_norm": 13.93323802947998,
|
| 407 |
+
"learning_rate": 5.7142857142857145e-06,
|
| 408 |
+
"loss": 0.0837,
|
| 409 |
+
"step": 57
|
| 410 |
+
},
|
| 411 |
+
{
|
| 412 |
+
"epoch": 0.5918367346938775,
|
| 413 |
+
"grad_norm": 7.279649257659912,
|
| 414 |
+
"learning_rate": 5.816326530612246e-06,
|
| 415 |
+
"loss": 0.0429,
|
| 416 |
+
"step": 58
|
| 417 |
+
},
|
| 418 |
+
{
|
| 419 |
+
"epoch": 0.6020408163265306,
|
| 420 |
+
"grad_norm": 0.9923371076583862,
|
| 421 |
+
"learning_rate": 5.918367346938776e-06,
|
| 422 |
+
"loss": 0.0071,
|
| 423 |
+
"step": 59
|
| 424 |
+
},
|
| 425 |
+
{
|
| 426 |
+
"epoch": 0.6122448979591837,
|
| 427 |
+
"grad_norm": 743.8301391601562,
|
| 428 |
+
"learning_rate": 6.020408163265307e-06,
|
| 429 |
+
"loss": 2.7217,
|
| 430 |
+
"step": 60
|
| 431 |
+
},
|
| 432 |
+
{
|
| 433 |
+
"epoch": 0.6224489795918368,
|
| 434 |
+
"grad_norm": 227.04403686523438,
|
| 435 |
+
"learning_rate": 6.122448979591837e-06,
|
| 436 |
+
"loss": 3.9013,
|
| 437 |
+
"step": 61
|
| 438 |
+
},
|
| 439 |
+
{
|
| 440 |
+
"epoch": 0.6326530612244898,
|
| 441 |
+
"grad_norm": 193.12701416015625,
|
| 442 |
+
"learning_rate": 6.224489795918368e-06,
|
| 443 |
+
"loss": 1.417,
|
| 444 |
+
"step": 62
|
| 445 |
+
},
|
| 446 |
+
{
|
| 447 |
+
"epoch": 0.6428571428571429,
|
| 448 |
+
"grad_norm": 642.7814331054688,
|
| 449 |
+
"learning_rate": 6.326530612244899e-06,
|
| 450 |
+
"loss": 3.5854,
|
| 451 |
+
"step": 63
|
| 452 |
+
},
|
| 453 |
+
{
|
| 454 |
+
"epoch": 0.6530612244897959,
|
| 455 |
+
"grad_norm": 1007.544189453125,
|
| 456 |
+
"learning_rate": 6.4285714285714295e-06,
|
| 457 |
+
"loss": 12.918,
|
| 458 |
+
"step": 64
|
| 459 |
+
},
|
| 460 |
+
{
|
| 461 |
+
"epoch": 0.6632653061224489,
|
| 462 |
+
"grad_norm": 1310.942138671875,
|
| 463 |
+
"learning_rate": 6.530612244897959e-06,
|
| 464 |
+
"loss": 7.1566,
|
| 465 |
+
"step": 65
|
| 466 |
+
},
|
| 467 |
+
{
|
| 468 |
+
"epoch": 0.673469387755102,
|
| 469 |
+
"grad_norm": 810.1301879882812,
|
| 470 |
+
"learning_rate": 6.63265306122449e-06,
|
| 471 |
+
"loss": 3.9897,
|
| 472 |
+
"step": 66
|
| 473 |
+
},
|
| 474 |
+
{
|
| 475 |
+
"epoch": 0.6836734693877551,
|
| 476 |
+
"grad_norm": 513.1759643554688,
|
| 477 |
+
"learning_rate": 6.734693877551021e-06,
|
| 478 |
+
"loss": 8.1139,
|
| 479 |
+
"step": 67
|
| 480 |
+
},
|
| 481 |
+
{
|
| 482 |
+
"epoch": 0.6938775510204082,
|
| 483 |
+
"grad_norm": 1414.8878173828125,
|
| 484 |
+
"learning_rate": 6.836734693877551e-06,
|
| 485 |
+
"loss": 5.7005,
|
| 486 |
+
"step": 68
|
| 487 |
+
},
|
| 488 |
+
{
|
| 489 |
+
"epoch": 0.7040816326530612,
|
| 490 |
+
"grad_norm": 31.607126235961914,
|
| 491 |
+
"learning_rate": 6.938775510204082e-06,
|
| 492 |
+
"loss": 0.1219,
|
| 493 |
+
"step": 69
|
| 494 |
+
},
|
| 495 |
+
{
|
| 496 |
+
"epoch": 0.7142857142857143,
|
| 497 |
+
"grad_norm": 799.9751586914062,
|
| 498 |
+
"learning_rate": 7.0408163265306125e-06,
|
| 499 |
+
"loss": 5.7849,
|
| 500 |
+
"step": 70
|
| 501 |
+
},
|
| 502 |
+
{
|
| 503 |
+
"epoch": 0.7244897959183674,
|
| 504 |
+
"grad_norm": 132.71778869628906,
|
| 505 |
+
"learning_rate": 7.1428571428571436e-06,
|
| 506 |
+
"loss": 1.0726,
|
| 507 |
+
"step": 71
|
| 508 |
+
},
|
| 509 |
+
{
|
| 510 |
+
"epoch": 0.7346938775510204,
|
| 511 |
+
"grad_norm": 256.61041259765625,
|
| 512 |
+
"learning_rate": 7.244897959183675e-06,
|
| 513 |
+
"loss": 1.2599,
|
| 514 |
+
"step": 72
|
| 515 |
+
},
|
| 516 |
+
{
|
| 517 |
+
"epoch": 0.7448979591836735,
|
| 518 |
+
"grad_norm": 192.0435333251953,
|
| 519 |
+
"learning_rate": 7.346938775510205e-06,
|
| 520 |
+
"loss": 0.6473,
|
| 521 |
+
"step": 73
|
| 522 |
+
},
|
| 523 |
+
{
|
| 524 |
+
"epoch": 0.7551020408163265,
|
| 525 |
+
"grad_norm": 293.7915954589844,
|
| 526 |
+
"learning_rate": 7.448979591836736e-06,
|
| 527 |
+
"loss": 1.0397,
|
| 528 |
+
"step": 74
|
| 529 |
+
},
|
| 530 |
+
{
|
| 531 |
+
"epoch": 0.7653061224489796,
|
| 532 |
+
"grad_norm": 312.2645263671875,
|
| 533 |
+
"learning_rate": 7.551020408163265e-06,
|
| 534 |
+
"loss": 1.5555,
|
| 535 |
+
"step": 75
|
| 536 |
+
},
|
| 537 |
+
{
|
| 538 |
+
"epoch": 0.7755102040816326,
|
| 539 |
+
"grad_norm": 1.417815923690796,
|
| 540 |
+
"learning_rate": 7.653061224489796e-06,
|
| 541 |
+
"loss": 0.0078,
|
| 542 |
+
"step": 76
|
| 543 |
+
},
|
| 544 |
+
{
|
| 545 |
+
"epoch": 0.7857142857142857,
|
| 546 |
+
"grad_norm": 1.4391653537750244,
|
| 547 |
+
"learning_rate": 7.755102040816327e-06,
|
| 548 |
+
"loss": 0.0048,
|
| 549 |
+
"step": 77
|
| 550 |
+
},
|
| 551 |
+
{
|
| 552 |
+
"epoch": 0.7959183673469388,
|
| 553 |
+
"grad_norm": 5.628185749053955,
|
| 554 |
+
"learning_rate": 7.857142857142858e-06,
|
| 555 |
+
"loss": 0.0323,
|
| 556 |
+
"step": 78
|
| 557 |
+
},
|
| 558 |
+
{
|
| 559 |
+
"epoch": 0.8061224489795918,
|
| 560 |
+
"grad_norm": 264.5353698730469,
|
| 561 |
+
"learning_rate": 7.959183673469388e-06,
|
| 562 |
+
"loss": 1.7425,
|
| 563 |
+
"step": 79
|
| 564 |
+
},
|
| 565 |
+
{
|
| 566 |
+
"epoch": 0.8163265306122449,
|
| 567 |
+
"grad_norm": 1.5278851985931396,
|
| 568 |
+
"learning_rate": 8.06122448979592e-06,
|
| 569 |
+
"loss": 0.0035,
|
| 570 |
+
"step": 80
|
| 571 |
+
},
|
| 572 |
+
{
|
| 573 |
+
"epoch": 0.826530612244898,
|
| 574 |
+
"grad_norm": 932.3336181640625,
|
| 575 |
+
"learning_rate": 8.16326530612245e-06,
|
| 576 |
+
"loss": 6.4849,
|
| 577 |
+
"step": 81
|
| 578 |
+
},
|
| 579 |
+
{
|
| 580 |
+
"epoch": 0.8367346938775511,
|
| 581 |
+
"grad_norm": 635.4749145507812,
|
| 582 |
+
"learning_rate": 8.26530612244898e-06,
|
| 583 |
+
"loss": 4.3767,
|
| 584 |
+
"step": 82
|
| 585 |
+
},
|
| 586 |
+
{
|
| 587 |
+
"epoch": 0.8469387755102041,
|
| 588 |
+
"grad_norm": 8.875201225280762,
|
| 589 |
+
"learning_rate": 8.36734693877551e-06,
|
| 590 |
+
"loss": 0.0186,
|
| 591 |
+
"step": 83
|
| 592 |
+
},
|
| 593 |
+
{
|
| 594 |
+
"epoch": 0.8571428571428571,
|
| 595 |
+
"grad_norm": 0.15500876307487488,
|
| 596 |
+
"learning_rate": 8.469387755102042e-06,
|
| 597 |
+
"loss": 0.0008,
|
| 598 |
+
"step": 84
|
| 599 |
+
},
|
| 600 |
+
{
|
| 601 |
+
"epoch": 0.8673469387755102,
|
| 602 |
+
"grad_norm": 269.5357666015625,
|
| 603 |
+
"learning_rate": 8.571428571428571e-06,
|
| 604 |
+
"loss": 0.8354,
|
| 605 |
+
"step": 85
|
| 606 |
+
},
|
| 607 |
+
{
|
| 608 |
+
"epoch": 0.8775510204081632,
|
| 609 |
+
"grad_norm": 5.054287910461426,
|
| 610 |
+
"learning_rate": 8.673469387755103e-06,
|
| 611 |
+
"loss": 0.0162,
|
| 612 |
+
"step": 86
|
| 613 |
+
},
|
| 614 |
+
{
|
| 615 |
+
"epoch": 0.8877551020408163,
|
| 616 |
+
"grad_norm": 84.90735626220703,
|
| 617 |
+
"learning_rate": 8.775510204081633e-06,
|
| 618 |
+
"loss": 0.1282,
|
| 619 |
+
"step": 87
|
| 620 |
+
},
|
| 621 |
+
{
|
| 622 |
+
"epoch": 0.8979591836734694,
|
| 623 |
+
"grad_norm": 81.53719329833984,
|
| 624 |
+
"learning_rate": 8.877551020408163e-06,
|
| 625 |
+
"loss": 0.4514,
|
| 626 |
+
"step": 88
|
| 627 |
+
},
|
| 628 |
+
{
|
| 629 |
+
"epoch": 0.9081632653061225,
|
| 630 |
+
"grad_norm": 547.4005126953125,
|
| 631 |
+
"learning_rate": 8.979591836734695e-06,
|
| 632 |
+
"loss": 4.9103,
|
| 633 |
+
"step": 89
|
| 634 |
+
},
|
| 635 |
+
{
|
| 636 |
+
"epoch": 0.9183673469387755,
|
| 637 |
+
"grad_norm": 25.792213439941406,
|
| 638 |
+
"learning_rate": 9.081632653061225e-06,
|
| 639 |
+
"loss": 0.0762,
|
| 640 |
+
"step": 90
|
| 641 |
+
},
|
| 642 |
+
{
|
| 643 |
+
"epoch": 0.9285714285714286,
|
| 644 |
+
"grad_norm": 10.455421447753906,
|
| 645 |
+
"learning_rate": 9.183673469387756e-06,
|
| 646 |
+
"loss": 0.0444,
|
| 647 |
+
"step": 91
|
| 648 |
+
},
|
| 649 |
+
{
|
| 650 |
+
"epoch": 0.9387755102040817,
|
| 651 |
+
"grad_norm": 472.54376220703125,
|
| 652 |
+
"learning_rate": 9.285714285714288e-06,
|
| 653 |
+
"loss": 1.8609,
|
| 654 |
+
"step": 92
|
| 655 |
+
},
|
| 656 |
+
{
|
| 657 |
+
"epoch": 0.9489795918367347,
|
| 658 |
+
"grad_norm": 31.092357635498047,
|
| 659 |
+
"learning_rate": 9.387755102040818e-06,
|
| 660 |
+
"loss": 0.1489,
|
| 661 |
+
"step": 93
|
| 662 |
+
},
|
| 663 |
+
{
|
| 664 |
+
"epoch": 0.9591836734693877,
|
| 665 |
+
"grad_norm": 231.94151306152344,
|
| 666 |
+
"learning_rate": 9.489795918367348e-06,
|
| 667 |
+
"loss": 0.5926,
|
| 668 |
+
"step": 94
|
| 669 |
+
},
|
| 670 |
+
{
|
| 671 |
+
"epoch": 0.9693877551020408,
|
| 672 |
+
"grad_norm": 211.05117797851562,
|
| 673 |
+
"learning_rate": 9.591836734693878e-06,
|
| 674 |
+
"loss": 0.5344,
|
| 675 |
+
"step": 95
|
| 676 |
+
},
|
| 677 |
+
{
|
| 678 |
+
"epoch": 0.9795918367346939,
|
| 679 |
+
"grad_norm": 217.01339721679688,
|
| 680 |
+
"learning_rate": 9.693877551020408e-06,
|
| 681 |
+
"loss": 0.4693,
|
| 682 |
+
"step": 96
|
| 683 |
+
},
|
| 684 |
+
{
|
| 685 |
+
"epoch": 0.9897959183673469,
|
| 686 |
+
"grad_norm": 1123.96484375,
|
| 687 |
+
"learning_rate": 9.795918367346939e-06,
|
| 688 |
+
"loss": 9.2282,
|
| 689 |
+
"step": 97
|
| 690 |
+
},
|
| 691 |
+
{
|
| 692 |
+
"epoch": 1.0,
|
| 693 |
+
"grad_norm": 741.597412109375,
|
| 694 |
+
"learning_rate": 9.89795918367347e-06,
|
| 695 |
+
"loss": 4.6238,
|
| 696 |
+
"step": 98
|
| 697 |
+
},
|
| 698 |
+
{
|
| 699 |
+
"epoch": 1.0,
|
| 700 |
+
"eval_dim_1024_cosine_accuracy@1": 0.36235595390524966,
|
| 701 |
+
"eval_dim_1024_cosine_accuracy@10": 0.4334186939820743,
|
| 702 |
+
"eval_dim_1024_cosine_accuracy@3": 0.3681177976952625,
|
| 703 |
+
"eval_dim_1024_cosine_accuracy@5": 0.39308578745198464,
|
| 704 |
+
"eval_dim_1024_cosine_map@100": 0.45394800707643057,
|
| 705 |
+
"eval_dim_1024_cosine_mrr@10": 0.37430415828303115,
|
| 706 |
+
"eval_dim_1024_cosine_ndcg@10": 0.3858809020056271,
|
| 707 |
+
"eval_dim_1024_cosine_precision@1": 0.36235595390524966,
|
| 708 |
+
"eval_dim_1024_cosine_precision@10": 0.3176696542893726,
|
| 709 |
+
"eval_dim_1024_cosine_precision@3": 0.36192915066154496,
|
| 710 |
+
"eval_dim_1024_cosine_precision@5": 0.35172855313700385,
|
| 711 |
+
"eval_dim_1024_cosine_recall@1": 0.04346309464734114,
|
| 712 |
+
"eval_dim_1024_cosine_recall@10": 0.28096984500258326,
|
| 713 |
+
"eval_dim_1024_cosine_recall@3": 0.12757812796185336,
|
| 714 |
+
"eval_dim_1024_cosine_recall@5": 0.19200836801442767,
|
| 715 |
+
"eval_dim_128_cosine_accuracy@1": 0.3085787451984635,
|
| 716 |
+
"eval_dim_128_cosine_accuracy@10": 0.37964148527528807,
|
| 717 |
+
"eval_dim_128_cosine_accuracy@3": 0.31241997439180536,
|
| 718 |
+
"eval_dim_128_cosine_accuracy@5": 0.3361075544174136,
|
| 719 |
+
"eval_dim_128_cosine_map@100": 0.3963095303049961,
|
| 720 |
+
"eval_dim_128_cosine_mrr@10": 0.3199812511432227,
|
| 721 |
+
"eval_dim_128_cosine_ndcg@10": 0.3312285498294292,
|
| 722 |
+
"eval_dim_128_cosine_precision@1": 0.3085787451984635,
|
| 723 |
+
"eval_dim_128_cosine_precision@10": 0.2752880921895006,
|
| 724 |
+
"eval_dim_128_cosine_precision@3": 0.3079385403329065,
|
| 725 |
+
"eval_dim_128_cosine_precision@5": 0.29961587708066584,
|
| 726 |
+
"eval_dim_128_cosine_recall@1": 0.036297623853982414,
|
| 727 |
+
"eval_dim_128_cosine_recall@10": 0.24000960695821508,
|
| 728 |
+
"eval_dim_128_cosine_recall@3": 0.10638786483158841,
|
| 729 |
+
"eval_dim_128_cosine_recall@5": 0.16032639984514846,
|
| 730 |
+
"eval_dim_256_cosine_accuracy@1": 0.3437900128040973,
|
| 731 |
+
"eval_dim_256_cosine_accuracy@10": 0.41101152368758004,
|
| 732 |
+
"eval_dim_256_cosine_accuracy@3": 0.34763124199743917,
|
| 733 |
+
"eval_dim_256_cosine_accuracy@5": 0.3764404609475032,
|
| 734 |
+
"eval_dim_256_cosine_map@100": 0.4298669852983799,
|
| 735 |
+
"eval_dim_256_cosine_mrr@10": 0.3551361197487955,
|
| 736 |
+
"eval_dim_256_cosine_ndcg@10": 0.3670052960875804,
|
| 737 |
+
"eval_dim_256_cosine_precision@1": 0.3437900128040973,
|
| 738 |
+
"eval_dim_256_cosine_precision@10": 0.3040973111395647,
|
| 739 |
+
"eval_dim_256_cosine_precision@3": 0.342936406316688,
|
| 740 |
+
"eval_dim_256_cosine_precision@5": 0.33457106274007686,
|
| 741 |
+
"eval_dim_256_cosine_recall@1": 0.04013102608834382,
|
| 742 |
+
"eval_dim_256_cosine_recall@10": 0.2648598688529433,
|
| 743 |
+
"eval_dim_256_cosine_recall@3": 0.11771735023719074,
|
| 744 |
+
"eval_dim_256_cosine_recall@5": 0.17837935755014916,
|
| 745 |
+
"eval_dim_512_cosine_accuracy@1": 0.35979513444302175,
|
| 746 |
+
"eval_dim_512_cosine_accuracy@10": 0.4334186939820743,
|
| 747 |
+
"eval_dim_512_cosine_accuracy@3": 0.36555697823303457,
|
| 748 |
+
"eval_dim_512_cosine_accuracy@5": 0.3911651728553137,
|
| 749 |
+
"eval_dim_512_cosine_map@100": 0.4476805587612892,
|
| 750 |
+
"eval_dim_512_cosine_mrr@10": 0.37212542934373866,
|
| 751 |
+
"eval_dim_512_cosine_ndcg@10": 0.3843750966464458,
|
| 752 |
+
"eval_dim_512_cosine_precision@1": 0.35979513444302175,
|
| 753 |
+
"eval_dim_512_cosine_precision@10": 0.3173495518565941,
|
| 754 |
+
"eval_dim_512_cosine_precision@3": 0.35936833119931705,
|
| 755 |
+
"eval_dim_512_cosine_precision@5": 0.34967989756722156,
|
| 756 |
+
"eval_dim_512_cosine_recall@1": 0.04265405128130224,
|
| 757 |
+
"eval_dim_512_cosine_recall@10": 0.2781876565001863,
|
| 758 |
+
"eval_dim_512_cosine_recall@3": 0.12523102347193127,
|
| 759 |
+
"eval_dim_512_cosine_recall@5": 0.18912519336740205,
|
| 760 |
+
"eval_dim_64_cosine_accuracy@1": 0.2740076824583867,
|
| 761 |
+
"eval_dim_64_cosine_accuracy@10": 0.3354673495518566,
|
| 762 |
+
"eval_dim_64_cosine_accuracy@3": 0.27848911651728553,
|
| 763 |
+
"eval_dim_64_cosine_accuracy@5": 0.30153649167733676,
|
| 764 |
+
"eval_dim_64_cosine_map@100": 0.3539045084602349,
|
| 765 |
+
"eval_dim_64_cosine_mrr@10": 0.28429414873076814,
|
| 766 |
+
"eval_dim_64_cosine_ndcg@10": 0.29402896525927075,
|
| 767 |
+
"eval_dim_64_cosine_precision@1": 0.2740076824583867,
|
| 768 |
+
"eval_dim_64_cosine_precision@10": 0.24571062740076827,
|
| 769 |
+
"eval_dim_64_cosine_precision@3": 0.27315407597097735,
|
| 770 |
+
"eval_dim_64_cosine_precision@5": 0.2670934699103713,
|
| 771 |
+
"eval_dim_64_cosine_recall@1": 0.03167890172057568,
|
| 772 |
+
"eval_dim_64_cosine_recall@10": 0.21092883720941633,
|
| 773 |
+
"eval_dim_64_cosine_recall@3": 0.09267023360511464,
|
| 774 |
+
"eval_dim_64_cosine_recall@5": 0.14048625468314752,
|
| 775 |
+
"eval_dim_768_cosine_accuracy@1": 0.3591549295774648,
|
| 776 |
+
"eval_dim_768_cosine_accuracy@10": 0.4334186939820743,
|
| 777 |
+
"eval_dim_768_cosine_accuracy@3": 0.3649167733674776,
|
| 778 |
+
"eval_dim_768_cosine_accuracy@5": 0.3892445582586428,
|
| 779 |
+
"eval_dim_768_cosine_map@100": 0.4493001842217619,
|
| 780 |
+
"eval_dim_768_cosine_mrr@10": 0.37149335406377615,
|
| 781 |
+
"eval_dim_768_cosine_ndcg@10": 0.38308181752122755,
|
| 782 |
+
"eval_dim_768_cosine_precision@1": 0.3591549295774648,
|
| 783 |
+
"eval_dim_768_cosine_precision@10": 0.31670934699103714,
|
| 784 |
+
"eval_dim_768_cosine_precision@3": 0.3587281263337601,
|
| 785 |
+
"eval_dim_768_cosine_precision@5": 0.34852752880921894,
|
| 786 |
+
"eval_dim_768_cosine_recall@1": 0.04250079684114586,
|
| 787 |
+
"eval_dim_768_cosine_recall@10": 0.27695909667507057,
|
| 788 |
+
"eval_dim_768_cosine_recall@3": 0.12462187901616553,
|
| 789 |
+
"eval_dim_768_cosine_recall@5": 0.1875478484365334,
|
| 790 |
+
"eval_runtime": 99.0843,
|
| 791 |
+
"eval_samples_per_second": 0.0,
|
| 792 |
+
"eval_sequential_score": 0.29402896525927075,
|
| 793 |
+
"eval_steps_per_second": 0.0,
|
| 794 |
+
"step": 98
|
| 795 |
+
},
|
| 796 |
+
{
|
| 797 |
+
"epoch": 1.010204081632653,
|
| 798 |
+
"grad_norm": 342.861328125,
|
| 799 |
+
"learning_rate": 1e-05,
|
| 800 |
+
"loss": 1.9644,
|
| 801 |
+
"step": 99
|
| 802 |
+
},
|
| 803 |
+
{
|
| 804 |
+
"epoch": 1.0204081632653061,
|
| 805 |
+
"grad_norm": 761.8235473632812,
|
| 806 |
+
"learning_rate": 1.0102040816326531e-05,
|
| 807 |
+
"loss": 7.4242,
|
| 808 |
+
"step": 100
|
| 809 |
+
},
|
| 810 |
+
{
|
| 811 |
+
"epoch": 1.030612244897959,
|
| 812 |
+
"grad_norm": 146.39175415039062,
|
| 813 |
+
"learning_rate": 1.0204081632653063e-05,
|
| 814 |
+
"loss": 0.9592,
|
| 815 |
+
"step": 101
|
| 816 |
+
},
|
| 817 |
+
{
|
| 818 |
+
"epoch": 1.0408163265306123,
|
| 819 |
+
"grad_norm": 69.37447357177734,
|
| 820 |
+
"learning_rate": 1.0306122448979591e-05,
|
| 821 |
+
"loss": 0.3051,
|
| 822 |
+
"step": 102
|
| 823 |
+
},
|
| 824 |
+
{
|
| 825 |
+
"epoch": 1.0510204081632653,
|
| 826 |
+
"grad_norm": 241.93687438964844,
|
| 827 |
+
"learning_rate": 1.0408163265306123e-05,
|
| 828 |
+
"loss": 0.926,
|
| 829 |
+
"step": 103
|
| 830 |
+
},
|
| 831 |
+
{
|
| 832 |
+
"epoch": 1.0612244897959184,
|
| 833 |
+
"grad_norm": 13.75313949584961,
|
| 834 |
+
"learning_rate": 1.0510204081632654e-05,
|
| 835 |
+
"loss": 0.0751,
|
| 836 |
+
"step": 104
|
| 837 |
+
},
|
| 838 |
+
{
|
| 839 |
+
"epoch": 1.0714285714285714,
|
| 840 |
+
"grad_norm": 1.861573576927185,
|
| 841 |
+
"learning_rate": 1.0612244897959186e-05,
|
| 842 |
+
"loss": 0.0111,
|
| 843 |
+
"step": 105
|
| 844 |
+
},
|
| 845 |
+
{
|
| 846 |
+
"epoch": 1.0816326530612246,
|
| 847 |
+
"grad_norm": 1.4446377754211426,
|
| 848 |
+
"learning_rate": 1.0714285714285714e-05,
|
| 849 |
+
"loss": 0.0072,
|
| 850 |
+
"step": 106
|
| 851 |
+
},
|
| 852 |
+
{
|
| 853 |
+
"epoch": 1.0918367346938775,
|
| 854 |
+
"grad_norm": 2.217988967895508,
|
| 855 |
+
"learning_rate": 1.0816326530612246e-05,
|
| 856 |
+
"loss": 0.0107,
|
| 857 |
+
"step": 107
|
| 858 |
+
},
|
| 859 |
+
{
|
| 860 |
+
"epoch": 1.1020408163265305,
|
| 861 |
+
"grad_norm": 620.331787109375,
|
| 862 |
+
"learning_rate": 1.0918367346938776e-05,
|
| 863 |
+
"loss": 3.4505,
|
| 864 |
+
"step": 108
|
| 865 |
+
},
|
| 866 |
+
{
|
| 867 |
+
"epoch": 1.1122448979591837,
|
| 868 |
+
"grad_norm": 1.4038218259811401,
|
| 869 |
+
"learning_rate": 1.1020408163265306e-05,
|
| 870 |
+
"loss": 0.005,
|
| 871 |
+
"step": 109
|
| 872 |
+
},
|
| 873 |
+
{
|
| 874 |
+
"epoch": 1.1224489795918366,
|
| 875 |
+
"grad_norm": 46.48203659057617,
|
| 876 |
+
"learning_rate": 1.1122448979591838e-05,
|
| 877 |
+
"loss": 0.1701,
|
| 878 |
+
"step": 110
|
| 879 |
+
},
|
| 880 |
+
{
|
| 881 |
+
"epoch": 1.1326530612244898,
|
| 882 |
+
"grad_norm": 6.003911972045898,
|
| 883 |
+
"learning_rate": 1.1224489795918367e-05,
|
| 884 |
+
"loss": 0.027,
|
| 885 |
+
"step": 111
|
| 886 |
+
},
|
| 887 |
+
{
|
| 888 |
+
"epoch": 1.1428571428571428,
|
| 889 |
+
"grad_norm": 379.09527587890625,
|
| 890 |
+
"learning_rate": 1.1326530612244899e-05,
|
| 891 |
+
"loss": 1.3824,
|
| 892 |
+
"step": 112
|
| 893 |
+
},
|
| 894 |
+
{
|
| 895 |
+
"epoch": 1.153061224489796,
|
| 896 |
+
"grad_norm": 1103.1077880859375,
|
| 897 |
+
"learning_rate": 1.1428571428571429e-05,
|
| 898 |
+
"loss": 8.1459,
|
| 899 |
+
"step": 113
|
| 900 |
+
},
|
| 901 |
+
{
|
| 902 |
+
"epoch": 1.163265306122449,
|
| 903 |
+
"grad_norm": 29.499439239501953,
|
| 904 |
+
"learning_rate": 1.1530612244897961e-05,
|
| 905 |
+
"loss": 0.0917,
|
| 906 |
+
"step": 114
|
| 907 |
+
},
|
| 908 |
+
{
|
| 909 |
+
"epoch": 1.1734693877551021,
|
| 910 |
+
"grad_norm": 0.06352390348911285,
|
| 911 |
+
"learning_rate": 1.1632653061224491e-05,
|
| 912 |
+
"loss": 0.0003,
|
| 913 |
+
"step": 115
|
| 914 |
+
},
|
| 915 |
+
{
|
| 916 |
+
"epoch": 1.183673469387755,
|
| 917 |
+
"grad_norm": 111.55418395996094,
|
| 918 |
+
"learning_rate": 1.1734693877551021e-05,
|
| 919 |
+
"loss": 0.3716,
|
| 920 |
+
"step": 116
|
| 921 |
+
},
|
| 922 |
+
{
|
| 923 |
+
"epoch": 1.193877551020408,
|
| 924 |
+
"grad_norm": 32.166500091552734,
|
| 925 |
+
"learning_rate": 1.1836734693877552e-05,
|
| 926 |
+
"loss": 0.1704,
|
| 927 |
+
"step": 117
|
| 928 |
+
},
|
| 929 |
+
{
|
| 930 |
+
"epoch": 1.2040816326530612,
|
| 931 |
+
"grad_norm": 870.0745239257812,
|
| 932 |
+
"learning_rate": 1.1938775510204084e-05,
|
| 933 |
+
"loss": 9.8059,
|
| 934 |
+
"step": 118
|
| 935 |
+
},
|
| 936 |
+
{
|
| 937 |
+
"epoch": 1.2142857142857142,
|
| 938 |
+
"grad_norm": 139.17662048339844,
|
| 939 |
+
"learning_rate": 1.2040816326530614e-05,
|
| 940 |
+
"loss": 0.5882,
|
| 941 |
+
"step": 119
|
| 942 |
+
},
|
| 943 |
+
{
|
| 944 |
+
"epoch": 1.2244897959183674,
|
| 945 |
+
"grad_norm": 28.489713668823242,
|
| 946 |
+
"learning_rate": 1.2142857142857142e-05,
|
| 947 |
+
"loss": 0.0531,
|
| 948 |
+
"step": 120
|
| 949 |
+
},
|
| 950 |
+
{
|
| 951 |
+
"epoch": 1.2346938775510203,
|
| 952 |
+
"grad_norm": 0.18062859773635864,
|
| 953 |
+
"learning_rate": 1.2244897959183674e-05,
|
| 954 |
+
"loss": 0.0005,
|
| 955 |
+
"step": 121
|
| 956 |
+
},
|
| 957 |
+
{
|
| 958 |
+
"epoch": 1.2448979591836735,
|
| 959 |
+
"grad_norm": 8.26645565032959,
|
| 960 |
+
"learning_rate": 1.2346938775510204e-05,
|
| 961 |
+
"loss": 0.0314,
|
| 962 |
+
"step": 122
|
| 963 |
+
},
|
| 964 |
+
{
|
| 965 |
+
"epoch": 1.2551020408163265,
|
| 966 |
+
"grad_norm": 64.67955017089844,
|
| 967 |
+
"learning_rate": 1.2448979591836736e-05,
|
| 968 |
+
"loss": 0.1811,
|
| 969 |
+
"step": 123
|
| 970 |
+
},
|
| 971 |
+
{
|
| 972 |
+
"epoch": 1.2653061224489797,
|
| 973 |
+
"grad_norm": 420.44439697265625,
|
| 974 |
+
"learning_rate": 1.2551020408163267e-05,
|
| 975 |
+
"loss": 2.6136,
|
| 976 |
+
"step": 124
|
| 977 |
+
},
|
| 978 |
+
{
|
| 979 |
+
"epoch": 1.2755102040816326,
|
| 980 |
+
"grad_norm": 3.5323660373687744,
|
| 981 |
+
"learning_rate": 1.2653061224489798e-05,
|
| 982 |
+
"loss": 0.0087,
|
| 983 |
+
"step": 125
|
| 984 |
+
},
|
| 985 |
+
{
|
| 986 |
+
"epoch": 1.2857142857142856,
|
| 987 |
+
"grad_norm": 52.854801177978516,
|
| 988 |
+
"learning_rate": 1.2755102040816327e-05,
|
| 989 |
+
"loss": 0.1269,
|
| 990 |
+
"step": 126
|
| 991 |
+
},
|
| 992 |
+
{
|
| 993 |
+
"epoch": 1.2959183673469388,
|
| 994 |
+
"grad_norm": 4.583413124084473,
|
| 995 |
+
"learning_rate": 1.2857142857142859e-05,
|
| 996 |
+
"loss": 0.0091,
|
| 997 |
+
"step": 127
|
| 998 |
+
},
|
| 999 |
+
{
|
| 1000 |
+
"epoch": 1.306122448979592,
|
| 1001 |
+
"grad_norm": 17.20958137512207,
|
| 1002 |
+
"learning_rate": 1.2959183673469389e-05,
|
| 1003 |
+
"loss": 0.0467,
|
| 1004 |
+
"step": 128
|
| 1005 |
+
},
|
| 1006 |
+
{
|
| 1007 |
+
"epoch": 1.316326530612245,
|
| 1008 |
+
"grad_norm": 8.821357727050781,
|
| 1009 |
+
"learning_rate": 1.3061224489795918e-05,
|
| 1010 |
+
"loss": 0.0282,
|
| 1011 |
+
"step": 129
|
| 1012 |
+
},
|
| 1013 |
+
{
|
| 1014 |
+
"epoch": 1.3265306122448979,
|
| 1015 |
+
"grad_norm": 0.3024923503398895,
|
| 1016 |
+
"learning_rate": 1.316326530612245e-05,
|
| 1017 |
+
"loss": 0.0012,
|
| 1018 |
+
"step": 130
|
| 1019 |
+
},
|
| 1020 |
+
{
|
| 1021 |
+
"epoch": 1.336734693877551,
|
| 1022 |
+
"grad_norm": 1110.76513671875,
|
| 1023 |
+
"learning_rate": 1.326530612244898e-05,
|
| 1024 |
+
"loss": 3.5135,
|
| 1025 |
+
"step": 131
|
| 1026 |
+
},
|
| 1027 |
+
{
|
| 1028 |
+
"epoch": 1.346938775510204,
|
| 1029 |
+
"grad_norm": 4.655632495880127,
|
| 1030 |
+
"learning_rate": 1.3367346938775512e-05,
|
| 1031 |
+
"loss": 0.0186,
|
| 1032 |
+
"step": 132
|
| 1033 |
+
},
|
| 1034 |
+
{
|
| 1035 |
+
"epoch": 1.3571428571428572,
|
| 1036 |
+
"grad_norm": 641.764404296875,
|
| 1037 |
+
"learning_rate": 1.3469387755102042e-05,
|
| 1038 |
+
"loss": 3.2599,
|
| 1039 |
+
"step": 133
|
| 1040 |
+
},
|
| 1041 |
+
{
|
| 1042 |
+
"epoch": 1.3673469387755102,
|
| 1043 |
+
"grad_norm": 1076.8260498046875,
|
| 1044 |
+
"learning_rate": 1.3571428571428574e-05,
|
| 1045 |
+
"loss": 5.5417,
|
| 1046 |
+
"step": 134
|
| 1047 |
+
},
|
| 1048 |
+
{
|
| 1049 |
+
"epoch": 1.3775510204081631,
|
| 1050 |
+
"grad_norm": 0.5416738390922546,
|
| 1051 |
+
"learning_rate": 1.3673469387755102e-05,
|
| 1052 |
+
"loss": 0.0019,
|
| 1053 |
+
"step": 135
|
| 1054 |
+
},
|
| 1055 |
+
{
|
| 1056 |
+
"epoch": 1.3877551020408163,
|
| 1057 |
+
"grad_norm": 200.03311157226562,
|
| 1058 |
+
"learning_rate": 1.3775510204081634e-05,
|
| 1059 |
+
"loss": 0.5649,
|
| 1060 |
+
"step": 136
|
| 1061 |
+
},
|
| 1062 |
+
{
|
| 1063 |
+
"epoch": 1.3979591836734695,
|
| 1064 |
+
"grad_norm": 35.22038650512695,
|
| 1065 |
+
"learning_rate": 1.3877551020408165e-05,
|
| 1066 |
+
"loss": 0.084,
|
| 1067 |
+
"step": 137
|
| 1068 |
+
},
|
| 1069 |
+
{
|
| 1070 |
+
"epoch": 1.4081632653061225,
|
| 1071 |
+
"grad_norm": 141.9106903076172,
|
| 1072 |
+
"learning_rate": 1.3979591836734696e-05,
|
| 1073 |
+
"loss": 0.6062,
|
| 1074 |
+
"step": 138
|
| 1075 |
+
},
|
| 1076 |
+
{
|
| 1077 |
+
"epoch": 1.4183673469387754,
|
| 1078 |
+
"grad_norm": 15.920783996582031,
|
| 1079 |
+
"learning_rate": 1.4081632653061225e-05,
|
| 1080 |
+
"loss": 0.0639,
|
| 1081 |
+
"step": 139
|
| 1082 |
+
},
|
| 1083 |
+
{
|
| 1084 |
+
"epoch": 1.4285714285714286,
|
| 1085 |
+
"grad_norm": 206.33274841308594,
|
| 1086 |
+
"learning_rate": 1.4183673469387755e-05,
|
| 1087 |
+
"loss": 0.4069,
|
| 1088 |
+
"step": 140
|
| 1089 |
+
},
|
| 1090 |
+
{
|
| 1091 |
+
"epoch": 1.4387755102040816,
|
| 1092 |
+
"grad_norm": 51.149173736572266,
|
| 1093 |
+
"learning_rate": 1.4285714285714287e-05,
|
| 1094 |
+
"loss": 0.2462,
|
| 1095 |
+
"step": 141
|
| 1096 |
+
},
|
| 1097 |
+
{
|
| 1098 |
+
"epoch": 1.4489795918367347,
|
| 1099 |
+
"grad_norm": 658.653564453125,
|
| 1100 |
+
"learning_rate": 1.4387755102040817e-05,
|
| 1101 |
+
"loss": 4.9288,
|
| 1102 |
+
"step": 142
|
| 1103 |
+
},
|
| 1104 |
+
{
|
| 1105 |
+
"epoch": 1.4591836734693877,
|
| 1106 |
+
"grad_norm": 63.49065399169922,
|
| 1107 |
+
"learning_rate": 1.448979591836735e-05,
|
| 1108 |
+
"loss": 0.1852,
|
| 1109 |
+
"step": 143
|
| 1110 |
+
},
|
| 1111 |
+
{
|
| 1112 |
+
"epoch": 1.469387755102041,
|
| 1113 |
+
"grad_norm": 1453.699462890625,
|
| 1114 |
+
"learning_rate": 1.4591836734693878e-05,
|
| 1115 |
+
"loss": 3.0971,
|
| 1116 |
+
"step": 144
|
| 1117 |
+
},
|
| 1118 |
+
{
|
| 1119 |
+
"epoch": 1.4795918367346939,
|
| 1120 |
+
"grad_norm": 499.0628662109375,
|
| 1121 |
+
"learning_rate": 1.469387755102041e-05,
|
| 1122 |
+
"loss": 3.787,
|
| 1123 |
+
"step": 145
|
| 1124 |
+
},
|
| 1125 |
+
{
|
| 1126 |
+
"epoch": 1.489795918367347,
|
| 1127 |
+
"grad_norm": 253.33152770996094,
|
| 1128 |
+
"learning_rate": 1.479591836734694e-05,
|
| 1129 |
+
"loss": 0.8474,
|
| 1130 |
+
"step": 146
|
| 1131 |
+
},
|
| 1132 |
+
{
|
| 1133 |
+
"epoch": 1.5,
|
| 1134 |
+
"grad_norm": 0.8343175649642944,
|
| 1135 |
+
"learning_rate": 1.4897959183673472e-05,
|
| 1136 |
+
"loss": 0.0028,
|
| 1137 |
+
"step": 147
|
| 1138 |
+
},
|
| 1139 |
+
{
|
| 1140 |
+
"epoch": 1.510204081632653,
|
| 1141 |
+
"grad_norm": 38.5785026550293,
|
| 1142 |
+
"learning_rate": 1.5000000000000002e-05,
|
| 1143 |
+
"loss": 0.0931,
|
| 1144 |
+
"step": 148
|
| 1145 |
+
},
|
| 1146 |
+
{
|
| 1147 |
+
"epoch": 1.5204081632653061,
|
| 1148 |
+
"grad_norm": 563.4974365234375,
|
| 1149 |
+
"learning_rate": 1.510204081632653e-05,
|
| 1150 |
+
"loss": 1.8378,
|
| 1151 |
+
"step": 149
|
| 1152 |
+
},
|
| 1153 |
+
{
|
| 1154 |
+
"epoch": 1.5306122448979593,
|
| 1155 |
+
"grad_norm": 749.0945434570312,
|
| 1156 |
+
"learning_rate": 1.5204081632653063e-05,
|
| 1157 |
+
"loss": 2.6074,
|
| 1158 |
+
"step": 150
|
| 1159 |
+
},
|
| 1160 |
+
{
|
| 1161 |
+
"epoch": 1.5408163265306123,
|
| 1162 |
+
"grad_norm": 62.52786636352539,
|
| 1163 |
+
"learning_rate": 1.530612244897959e-05,
|
| 1164 |
+
"loss": 0.1441,
|
| 1165 |
+
"step": 151
|
| 1166 |
+
},
|
| 1167 |
+
{
|
| 1168 |
+
"epoch": 1.5510204081632653,
|
| 1169 |
+
"grad_norm": 281.54400634765625,
|
| 1170 |
+
"learning_rate": 1.5408163265306123e-05,
|
| 1171 |
+
"loss": 0.5622,
|
| 1172 |
+
"step": 152
|
| 1173 |
+
},
|
| 1174 |
+
{
|
| 1175 |
+
"epoch": 1.5612244897959182,
|
| 1176 |
+
"grad_norm": 1.1233166456222534,
|
| 1177 |
+
"learning_rate": 1.5510204081632655e-05,
|
| 1178 |
+
"loss": 0.0049,
|
| 1179 |
+
"step": 153
|
| 1180 |
+
},
|
| 1181 |
+
{
|
| 1182 |
+
"epoch": 1.5714285714285714,
|
| 1183 |
+
"grad_norm": 9.458003044128418,
|
| 1184 |
+
"learning_rate": 1.5612244897959187e-05,
|
| 1185 |
+
"loss": 0.0268,
|
| 1186 |
+
"step": 154
|
| 1187 |
+
},
|
| 1188 |
+
{
|
| 1189 |
+
"epoch": 1.5816326530612246,
|
| 1190 |
+
"grad_norm": 7.9042439460754395,
|
| 1191 |
+
"learning_rate": 1.5714285714285715e-05,
|
| 1192 |
+
"loss": 0.0281,
|
| 1193 |
+
"step": 155
|
| 1194 |
+
},
|
| 1195 |
+
{
|
| 1196 |
+
"epoch": 1.5918367346938775,
|
| 1197 |
+
"grad_norm": 402.8667907714844,
|
| 1198 |
+
"learning_rate": 1.5816326530612247e-05,
|
| 1199 |
+
"loss": 2.9755,
|
| 1200 |
+
"step": 156
|
| 1201 |
+
},
|
| 1202 |
+
{
|
| 1203 |
+
"epoch": 1.6020408163265305,
|
| 1204 |
+
"grad_norm": 359.3101806640625,
|
| 1205 |
+
"learning_rate": 1.5918367346938776e-05,
|
| 1206 |
+
"loss": 1.0982,
|
| 1207 |
+
"step": 157
|
| 1208 |
+
},
|
| 1209 |
+
{
|
| 1210 |
+
"epoch": 1.6122448979591837,
|
| 1211 |
+
"grad_norm": 26.466707229614258,
|
| 1212 |
+
"learning_rate": 1.6020408163265308e-05,
|
| 1213 |
+
"loss": 0.0621,
|
| 1214 |
+
"step": 158
|
| 1215 |
+
},
|
| 1216 |
+
{
|
| 1217 |
+
"epoch": 1.6224489795918369,
|
| 1218 |
+
"grad_norm": 472.1581726074219,
|
| 1219 |
+
"learning_rate": 1.612244897959184e-05,
|
| 1220 |
+
"loss": 6.9631,
|
| 1221 |
+
"step": 159
|
| 1222 |
+
},
|
| 1223 |
+
{
|
| 1224 |
+
"epoch": 1.6326530612244898,
|
| 1225 |
+
"grad_norm": 812.54638671875,
|
| 1226 |
+
"learning_rate": 1.6224489795918368e-05,
|
| 1227 |
+
"loss": 4.7216,
|
| 1228 |
+
"step": 160
|
| 1229 |
+
},
|
| 1230 |
+
{
|
| 1231 |
+
"epoch": 1.6428571428571428,
|
| 1232 |
+
"grad_norm": 252.12796020507812,
|
| 1233 |
+
"learning_rate": 1.63265306122449e-05,
|
| 1234 |
+
"loss": 0.848,
|
| 1235 |
+
"step": 161
|
| 1236 |
+
},
|
| 1237 |
+
{
|
| 1238 |
+
"epoch": 1.6530612244897958,
|
| 1239 |
+
"grad_norm": 1087.48828125,
|
| 1240 |
+
"learning_rate": 1.642857142857143e-05,
|
| 1241 |
+
"loss": 5.6006,
|
| 1242 |
+
"step": 162
|
| 1243 |
+
},
|
| 1244 |
+
{
|
| 1245 |
+
"epoch": 1.663265306122449,
|
| 1246 |
+
"grad_norm": 280.405517578125,
|
| 1247 |
+
"learning_rate": 1.653061224489796e-05,
|
| 1248 |
+
"loss": 4.299,
|
| 1249 |
+
"step": 163
|
| 1250 |
+
},
|
| 1251 |
+
{
|
| 1252 |
+
"epoch": 1.6734693877551021,
|
| 1253 |
+
"grad_norm": 457.81494140625,
|
| 1254 |
+
"learning_rate": 1.6632653061224492e-05,
|
| 1255 |
+
"loss": 2.042,
|
| 1256 |
+
"step": 164
|
| 1257 |
+
},
|
| 1258 |
+
{
|
| 1259 |
+
"epoch": 1.683673469387755,
|
| 1260 |
+
"grad_norm": 511.0380859375,
|
| 1261 |
+
"learning_rate": 1.673469387755102e-05,
|
| 1262 |
+
"loss": 2.4823,
|
| 1263 |
+
"step": 165
|
| 1264 |
+
},
|
| 1265 |
+
{
|
| 1266 |
+
"epoch": 1.693877551020408,
|
| 1267 |
+
"grad_norm": 7.505221366882324,
|
| 1268 |
+
"learning_rate": 1.6836734693877553e-05,
|
| 1269 |
+
"loss": 0.0189,
|
| 1270 |
+
"step": 166
|
| 1271 |
+
},
|
| 1272 |
+
{
|
| 1273 |
+
"epoch": 1.7040816326530612,
|
| 1274 |
+
"grad_norm": 1.01173734664917,
|
| 1275 |
+
"learning_rate": 1.6938775510204085e-05,
|
| 1276 |
+
"loss": 0.0039,
|
| 1277 |
+
"step": 167
|
| 1278 |
+
},
|
| 1279 |
+
{
|
| 1280 |
+
"epoch": 1.7142857142857144,
|
| 1281 |
+
"grad_norm": 0.5971992015838623,
|
| 1282 |
+
"learning_rate": 1.7040816326530613e-05,
|
| 1283 |
+
"loss": 0.0024,
|
| 1284 |
+
"step": 168
|
| 1285 |
+
},
|
| 1286 |
+
{
|
| 1287 |
+
"epoch": 1.7244897959183674,
|
| 1288 |
+
"grad_norm": 505.6401672363281,
|
| 1289 |
+
"learning_rate": 1.7142857142857142e-05,
|
| 1290 |
+
"loss": 2.0453,
|
| 1291 |
+
"step": 169
|
| 1292 |
+
},
|
| 1293 |
+
{
|
| 1294 |
+
"epoch": 1.7346938775510203,
|
| 1295 |
+
"grad_norm": 4.466002464294434,
|
| 1296 |
+
"learning_rate": 1.7244897959183674e-05,
|
| 1297 |
+
"loss": 0.0092,
|
| 1298 |
+
"step": 170
|
| 1299 |
+
},
|
| 1300 |
+
{
|
| 1301 |
+
"epoch": 1.7448979591836735,
|
| 1302 |
+
"grad_norm": 1.1195125579833984,
|
| 1303 |
+
"learning_rate": 1.7346938775510206e-05,
|
| 1304 |
+
"loss": 0.0029,
|
| 1305 |
+
"step": 171
|
| 1306 |
+
},
|
| 1307 |
+
{
|
| 1308 |
+
"epoch": 1.7551020408163265,
|
| 1309 |
+
"grad_norm": 104.82202911376953,
|
| 1310 |
+
"learning_rate": 1.7448979591836738e-05,
|
| 1311 |
+
"loss": 0.3271,
|
| 1312 |
+
"step": 172
|
| 1313 |
+
},
|
| 1314 |
+
{
|
| 1315 |
+
"epoch": 1.7653061224489797,
|
| 1316 |
+
"grad_norm": 1.860406756401062,
|
| 1317 |
+
"learning_rate": 1.7551020408163266e-05,
|
| 1318 |
+
"loss": 0.0054,
|
| 1319 |
+
"step": 173
|
| 1320 |
+
},
|
| 1321 |
+
{
|
| 1322 |
+
"epoch": 1.7755102040816326,
|
| 1323 |
+
"grad_norm": 0.044311508536338806,
|
| 1324 |
+
"learning_rate": 1.7653061224489798e-05,
|
| 1325 |
+
"loss": 0.0002,
|
| 1326 |
+
"step": 174
|
| 1327 |
+
},
|
| 1328 |
+
{
|
| 1329 |
+
"epoch": 1.7857142857142856,
|
| 1330 |
+
"grad_norm": 40.70656204223633,
|
| 1331 |
+
"learning_rate": 1.7755102040816327e-05,
|
| 1332 |
+
"loss": 0.0685,
|
| 1333 |
+
"step": 175
|
| 1334 |
+
},
|
| 1335 |
+
{
|
| 1336 |
+
"epoch": 1.7959183673469388,
|
| 1337 |
+
"grad_norm": 395.348388671875,
|
| 1338 |
+
"learning_rate": 1.785714285714286e-05,
|
| 1339 |
+
"loss": 1.3097,
|
| 1340 |
+
"step": 176
|
| 1341 |
+
},
|
| 1342 |
+
{
|
| 1343 |
+
"epoch": 1.806122448979592,
|
| 1344 |
+
"grad_norm": 326.2778015136719,
|
| 1345 |
+
"learning_rate": 1.795918367346939e-05,
|
| 1346 |
+
"loss": 1.8817,
|
| 1347 |
+
"step": 177
|
| 1348 |
+
},
|
| 1349 |
+
{
|
| 1350 |
+
"epoch": 1.816326530612245,
|
| 1351 |
+
"grad_norm": 41.05072784423828,
|
| 1352 |
+
"learning_rate": 1.806122448979592e-05,
|
| 1353 |
+
"loss": 0.2497,
|
| 1354 |
+
"step": 178
|
| 1355 |
+
},
|
| 1356 |
+
{
|
| 1357 |
+
"epoch": 1.8265306122448979,
|
| 1358 |
+
"grad_norm": 121.29589080810547,
|
| 1359 |
+
"learning_rate": 1.816326530612245e-05,
|
| 1360 |
+
"loss": 0.5822,
|
| 1361 |
+
"step": 179
|
| 1362 |
+
},
|
| 1363 |
+
{
|
| 1364 |
+
"epoch": 1.836734693877551,
|
| 1365 |
+
"grad_norm": 711.2618408203125,
|
| 1366 |
+
"learning_rate": 1.826530612244898e-05,
|
| 1367 |
+
"loss": 1.8103,
|
| 1368 |
+
"step": 180
|
| 1369 |
+
},
|
| 1370 |
+
{
|
| 1371 |
+
"epoch": 1.8469387755102042,
|
| 1372 |
+
"grad_norm": 500.7347106933594,
|
| 1373 |
+
"learning_rate": 1.836734693877551e-05,
|
| 1374 |
+
"loss": 1.5506,
|
| 1375 |
+
"step": 181
|
| 1376 |
+
},
|
| 1377 |
+
{
|
| 1378 |
+
"epoch": 1.8571428571428572,
|
| 1379 |
+
"grad_norm": 252.05322265625,
|
| 1380 |
+
"learning_rate": 1.8469387755102043e-05,
|
| 1381 |
+
"loss": 1.281,
|
| 1382 |
+
"step": 182
|
| 1383 |
+
},
|
| 1384 |
+
{
|
| 1385 |
+
"epoch": 1.8673469387755102,
|
| 1386 |
+
"grad_norm": 370.9935302734375,
|
| 1387 |
+
"learning_rate": 1.8571428571428575e-05,
|
| 1388 |
+
"loss": 2.8616,
|
| 1389 |
+
"step": 183
|
| 1390 |
+
},
|
| 1391 |
+
{
|
| 1392 |
+
"epoch": 1.8775510204081631,
|
| 1393 |
+
"grad_norm": 4.682647705078125,
|
| 1394 |
+
"learning_rate": 1.8673469387755104e-05,
|
| 1395 |
+
"loss": 0.0118,
|
| 1396 |
+
"step": 184
|
| 1397 |
+
},
|
| 1398 |
+
{
|
| 1399 |
+
"epoch": 1.8877551020408163,
|
| 1400 |
+
"grad_norm": 2.143557548522949,
|
| 1401 |
+
"learning_rate": 1.8775510204081636e-05,
|
| 1402 |
+
"loss": 0.0038,
|
| 1403 |
+
"step": 185
|
| 1404 |
+
},
|
| 1405 |
+
{
|
| 1406 |
+
"epoch": 1.8979591836734695,
|
| 1407 |
+
"grad_norm": 6.499508857727051,
|
| 1408 |
+
"learning_rate": 1.8877551020408164e-05,
|
| 1409 |
+
"loss": 0.0331,
|
| 1410 |
+
"step": 186
|
| 1411 |
+
},
|
| 1412 |
+
{
|
| 1413 |
+
"epoch": 1.9081632653061225,
|
| 1414 |
+
"grad_norm": 7.2162089347839355,
|
| 1415 |
+
"learning_rate": 1.8979591836734696e-05,
|
| 1416 |
+
"loss": 0.0273,
|
| 1417 |
+
"step": 187
|
| 1418 |
+
},
|
| 1419 |
+
{
|
| 1420 |
+
"epoch": 1.9183673469387754,
|
| 1421 |
+
"grad_norm": 23.073841094970703,
|
| 1422 |
+
"learning_rate": 1.9081632653061225e-05,
|
| 1423 |
+
"loss": 0.1026,
|
| 1424 |
+
"step": 188
|
| 1425 |
+
},
|
| 1426 |
+
{
|
| 1427 |
+
"epoch": 1.9285714285714286,
|
| 1428 |
+
"grad_norm": 48.74525833129883,
|
| 1429 |
+
"learning_rate": 1.9183673469387756e-05,
|
| 1430 |
+
"loss": 0.1942,
|
| 1431 |
+
"step": 189
|
| 1432 |
+
},
|
| 1433 |
+
{
|
| 1434 |
+
"epoch": 1.9387755102040818,
|
| 1435 |
+
"grad_norm": 384.64678955078125,
|
| 1436 |
+
"learning_rate": 1.928571428571429e-05,
|
| 1437 |
+
"loss": 3.4886,
|
| 1438 |
+
"step": 190
|
| 1439 |
+
},
|
| 1440 |
+
{
|
| 1441 |
+
"epoch": 1.9489795918367347,
|
| 1442 |
+
"grad_norm": 103.53422546386719,
|
| 1443 |
+
"learning_rate": 1.9387755102040817e-05,
|
| 1444 |
+
"loss": 0.628,
|
| 1445 |
+
"step": 191
|
| 1446 |
+
},
|
| 1447 |
+
{
|
| 1448 |
+
"epoch": 1.9591836734693877,
|
| 1449 |
+
"grad_norm": 42.5008544921875,
|
| 1450 |
+
"learning_rate": 1.948979591836735e-05,
|
| 1451 |
+
"loss": 0.1967,
|
| 1452 |
+
"step": 192
|
| 1453 |
+
},
|
| 1454 |
+
{
|
| 1455 |
+
"epoch": 1.9693877551020407,
|
| 1456 |
+
"grad_norm": 145.1553955078125,
|
| 1457 |
+
"learning_rate": 1.9591836734693877e-05,
|
| 1458 |
+
"loss": 3.9822,
|
| 1459 |
+
"step": 193
|
| 1460 |
+
},
|
| 1461 |
+
{
|
| 1462 |
+
"epoch": 1.9795918367346939,
|
| 1463 |
+
"grad_norm": 0.07428821176290512,
|
| 1464 |
+
"learning_rate": 1.969387755102041e-05,
|
| 1465 |
+
"loss": 0.0003,
|
| 1466 |
+
"step": 194
|
| 1467 |
+
},
|
| 1468 |
+
{
|
| 1469 |
+
"epoch": 1.989795918367347,
|
| 1470 |
+
"grad_norm": 545.6088256835938,
|
| 1471 |
+
"learning_rate": 1.979591836734694e-05,
|
| 1472 |
+
"loss": 3.7309,
|
| 1473 |
+
"step": 195
|
| 1474 |
+
},
|
| 1475 |
+
{
|
| 1476 |
+
"epoch": 2.0,
|
| 1477 |
+
"grad_norm": 0.5490627288818359,
|
| 1478 |
+
"learning_rate": 1.9897959183673473e-05,
|
| 1479 |
+
"loss": 0.0024,
|
| 1480 |
+
"step": 196
|
| 1481 |
+
},
|
| 1482 |
+
{
|
| 1483 |
+
"epoch": 2.0,
|
| 1484 |
+
"eval_dim_1024_cosine_accuracy@1": 0.32522407170294493,
|
| 1485 |
+
"eval_dim_1024_cosine_accuracy@10": 0.3969270166453265,
|
| 1486 |
+
"eval_dim_1024_cosine_accuracy@3": 0.33290653008962867,
|
| 1487 |
+
"eval_dim_1024_cosine_accuracy@5": 0.36043533930857874,
|
| 1488 |
+
"eval_dim_1024_cosine_map@100": 0.4164888021641558,
|
| 1489 |
+
"eval_dim_1024_cosine_mrr@10": 0.33769460195516493,
|
| 1490 |
+
"eval_dim_1024_cosine_ndcg@10": 0.34986350069216465,
|
| 1491 |
+
"eval_dim_1024_cosine_precision@1": 0.32522407170294493,
|
| 1492 |
+
"eval_dim_1024_cosine_precision@10": 0.28361075544174136,
|
| 1493 |
+
"eval_dim_1024_cosine_precision@3": 0.3254374733247973,
|
| 1494 |
+
"eval_dim_1024_cosine_precision@5": 0.31626120358514725,
|
| 1495 |
+
"eval_dim_1024_cosine_recall@1": 0.04113491331982186,
|
| 1496 |
+
"eval_dim_1024_cosine_recall@10": 0.2664549051060991,
|
| 1497 |
+
"eval_dim_1024_cosine_recall@3": 0.12080229545561262,
|
| 1498 |
+
"eval_dim_1024_cosine_recall@5": 0.18183789253196145,
|
| 1499 |
+
"eval_dim_128_cosine_accuracy@1": 0.30217669654289375,
|
| 1500 |
+
"eval_dim_128_cosine_accuracy@10": 0.3546734955185659,
|
| 1501 |
+
"eval_dim_128_cosine_accuracy@3": 0.3072983354673495,
|
| 1502 |
+
"eval_dim_128_cosine_accuracy@5": 0.3265044814340589,
|
| 1503 |
+
"eval_dim_128_cosine_map@100": 0.38014172959059034,
|
| 1504 |
+
"eval_dim_128_cosine_mrr@10": 0.3112729406743488,
|
| 1505 |
+
"eval_dim_128_cosine_ndcg@10": 0.32071443787836906,
|
| 1506 |
+
"eval_dim_128_cosine_precision@1": 0.30217669654289375,
|
| 1507 |
+
"eval_dim_128_cosine_precision@10": 0.26312419974391804,
|
| 1508 |
+
"eval_dim_128_cosine_precision@3": 0.30239009816474605,
|
| 1509 |
+
"eval_dim_128_cosine_precision@5": 0.29359795134443023,
|
| 1510 |
+
"eval_dim_128_cosine_recall@1": 0.03603846894598867,
|
| 1511 |
+
"eval_dim_128_cosine_recall@10": 0.23664446759855584,
|
| 1512 |
+
"eval_dim_128_cosine_recall@3": 0.10607255532328354,
|
| 1513 |
+
"eval_dim_128_cosine_recall@5": 0.15998840334482403,
|
| 1514 |
+
"eval_dim_256_cosine_accuracy@1": 0.31049935979513443,
|
| 1515 |
+
"eval_dim_256_cosine_accuracy@10": 0.3725992317541613,
|
| 1516 |
+
"eval_dim_256_cosine_accuracy@3": 0.31882202304737517,
|
| 1517 |
+
"eval_dim_256_cosine_accuracy@5": 0.34571062740076824,
|
| 1518 |
+
"eval_dim_256_cosine_map@100": 0.3940538127924734,
|
| 1519 |
+
"eval_dim_256_cosine_mrr@10": 0.3219094872263883,
|
| 1520 |
+
"eval_dim_256_cosine_ndcg@10": 0.33365785011470184,
|
| 1521 |
+
"eval_dim_256_cosine_precision@1": 0.31049935979513443,
|
| 1522 |
+
"eval_dim_256_cosine_precision@10": 0.2727272727272727,
|
| 1523 |
+
"eval_dim_256_cosine_precision@3": 0.3109261630388391,
|
| 1524 |
+
"eval_dim_256_cosine_precision@5": 0.3035851472471191,
|
| 1525 |
+
"eval_dim_256_cosine_recall@1": 0.0379038673811849,
|
| 1526 |
+
"eval_dim_256_cosine_recall@10": 0.25061548215235363,
|
| 1527 |
+
"eval_dim_256_cosine_recall@3": 0.11184662439829526,
|
| 1528 |
+
"eval_dim_256_cosine_recall@5": 0.16972372403865282,
|
| 1529 |
+
"eval_dim_512_cosine_accuracy@1": 0.32842509603072984,
|
| 1530 |
+
"eval_dim_512_cosine_accuracy@10": 0.39564660691421255,
|
| 1531 |
+
"eval_dim_512_cosine_accuracy@3": 0.33418693982074266,
|
| 1532 |
+
"eval_dim_512_cosine_accuracy@5": 0.36555697823303457,
|
| 1533 |
+
"eval_dim_512_cosine_map@100": 0.4125328284000196,
|
| 1534 |
+
"eval_dim_512_cosine_mrr@10": 0.34027168058858154,
|
| 1535 |
+
"eval_dim_512_cosine_ndcg@10": 0.3525488928748249,
|
| 1536 |
+
"eval_dim_512_cosine_precision@1": 0.32842509603072984,
|
| 1537 |
+
"eval_dim_512_cosine_precision@10": 0.28693982074263763,
|
| 1538 |
+
"eval_dim_512_cosine_precision@3": 0.3282116944088775,
|
| 1539 |
+
"eval_dim_512_cosine_precision@5": 0.31997439180537773,
|
| 1540 |
+
"eval_dim_512_cosine_recall@1": 0.04071091183465321,
|
| 1541 |
+
"eval_dim_512_cosine_recall@10": 0.2638449444559509,
|
| 1542 |
+
"eval_dim_512_cosine_recall@3": 0.11970757850133786,
|
| 1543 |
+
"eval_dim_512_cosine_recall@5": 0.1806811237454132,
|
| 1544 |
+
"eval_dim_64_cosine_accuracy@1": 0.28040973111395645,
|
| 1545 |
+
"eval_dim_64_cosine_accuracy@10": 0.3348271446862996,
|
| 1546 |
+
"eval_dim_64_cosine_accuracy@3": 0.28297055057618437,
|
| 1547 |
+
"eval_dim_64_cosine_accuracy@5": 0.3072983354673495,
|
| 1548 |
+
"eval_dim_64_cosine_map@100": 0.35085623648833997,
|
| 1549 |
+
"eval_dim_64_cosine_mrr@10": 0.28944678170030247,
|
| 1550 |
+
"eval_dim_64_cosine_ndcg@10": 0.2991224720529457,
|
| 1551 |
+
"eval_dim_64_cosine_precision@1": 0.28040973111395645,
|
| 1552 |
+
"eval_dim_64_cosine_precision@10": 0.24878361075544175,
|
| 1553 |
+
"eval_dim_64_cosine_precision@3": 0.27955612462654716,
|
| 1554 |
+
"eval_dim_64_cosine_precision@5": 0.27247119078105,
|
| 1555 |
+
"eval_dim_64_cosine_recall@1": 0.03187808455878807,
|
| 1556 |
+
"eval_dim_64_cosine_recall@10": 0.2128007008801171,
|
| 1557 |
+
"eval_dim_64_cosine_recall@3": 0.09363361347149868,
|
| 1558 |
+
"eval_dim_64_cosine_recall@5": 0.14192536615474802,
|
| 1559 |
+
"eval_dim_768_cosine_accuracy@1": 0.32970550576184376,
|
| 1560 |
+
"eval_dim_768_cosine_accuracy@10": 0.3994878361075544,
|
| 1561 |
+
"eval_dim_768_cosine_accuracy@3": 0.33418693982074266,
|
| 1562 |
+
"eval_dim_768_cosine_accuracy@5": 0.36427656850192064,
|
| 1563 |
+
"eval_dim_768_cosine_map@100": 0.4160652625925415,
|
| 1564 |
+
"eval_dim_768_cosine_mrr@10": 0.3415124585899229,
|
| 1565 |
+
"eval_dim_768_cosine_ndcg@10": 0.35370573856938964,
|
| 1566 |
+
"eval_dim_768_cosine_precision@1": 0.32970550576184376,
|
| 1567 |
+
"eval_dim_768_cosine_precision@10": 0.2877720870678617,
|
| 1568 |
+
"eval_dim_768_cosine_precision@3": 0.3288518992744345,
|
| 1569 |
+
"eval_dim_768_cosine_precision@5": 0.31997439180537773,
|
| 1570 |
+
"eval_dim_768_cosine_recall@1": 0.040955758827011135,
|
| 1571 |
+
"eval_dim_768_cosine_recall@10": 0.26685683005601735,
|
| 1572 |
+
"eval_dim_768_cosine_recall@3": 0.12009305539695316,
|
| 1573 |
+
"eval_dim_768_cosine_recall@5": 0.18142212378067016,
|
| 1574 |
+
"eval_runtime": 99.167,
|
| 1575 |
+
"eval_samples_per_second": 0.0,
|
| 1576 |
+
"eval_sequential_score": 0.2991224720529457,
|
| 1577 |
+
"eval_steps_per_second": 0.0,
|
| 1578 |
+
"step": 196
|
| 1579 |
+
},
|
| 1580 |
+
{
|
| 1581 |
+
"epoch": 2.010204081632653,
|
| 1582 |
+
"grad_norm": 231.35763549804688,
|
| 1583 |
+
"learning_rate": 2e-05,
|
| 1584 |
+
"loss": 4.7353,
|
| 1585 |
+
"step": 197
|
| 1586 |
+
},
|
| 1587 |
+
{
|
| 1588 |
+
"epoch": 2.020408163265306,
|
| 1589 |
+
"grad_norm": 26.110666275024414,
|
| 1590 |
+
"learning_rate": 1.9999984141121447e-05,
|
| 1591 |
+
"loss": 0.0998,
|
| 1592 |
+
"step": 198
|
| 1593 |
+
},
|
| 1594 |
+
{
|
| 1595 |
+
"epoch": 2.0306122448979593,
|
| 1596 |
+
"grad_norm": 17.7508544921875,
|
| 1597 |
+
"learning_rate": 1.9999936564536085e-05,
|
| 1598 |
+
"loss": 0.0474,
|
| 1599 |
+
"step": 199
|
| 1600 |
+
},
|
| 1601 |
+
{
|
| 1602 |
+
"epoch": 2.0408163265306123,
|
| 1603 |
+
"grad_norm": 0.015349287539720535,
|
| 1604 |
+
"learning_rate": 1.9999857270394818e-05,
|
| 1605 |
+
"loss": 0.0,
|
| 1606 |
+
"step": 200
|
| 1607 |
+
},
|
| 1608 |
+
{
|
| 1609 |
+
"epoch": 2.0510204081632653,
|
| 1610 |
+
"grad_norm": 11.307738304138184,
|
| 1611 |
+
"learning_rate": 1.9999746258949146e-05,
|
| 1612 |
+
"loss": 0.0592,
|
| 1613 |
+
"step": 201
|
| 1614 |
+
},
|
| 1615 |
+
{
|
| 1616 |
+
"epoch": 2.061224489795918,
|
| 1617 |
+
"grad_norm": 0.015763908624649048,
|
| 1618 |
+
"learning_rate": 1.9999603530551178e-05,
|
| 1619 |
+
"loss": 0.0001,
|
| 1620 |
+
"step": 202
|
| 1621 |
+
},
|
| 1622 |
+
{
|
| 1623 |
+
"epoch": 2.0714285714285716,
|
| 1624 |
+
"grad_norm": 104.52203369140625,
|
| 1625 |
+
"learning_rate": 1.999942908565361e-05,
|
| 1626 |
+
"loss": 0.5587,
|
| 1627 |
+
"step": 203
|
| 1628 |
+
},
|
| 1629 |
+
{
|
| 1630 |
+
"epoch": 2.0816326530612246,
|
| 1631 |
+
"grad_norm": 149.8668212890625,
|
| 1632 |
+
"learning_rate": 1.999922292480975e-05,
|
| 1633 |
+
"loss": 1.9037,
|
| 1634 |
+
"step": 204
|
| 1635 |
+
},
|
| 1636 |
+
{
|
| 1637 |
+
"epoch": 2.0918367346938775,
|
| 1638 |
+
"grad_norm": 28.32903480529785,
|
| 1639 |
+
"learning_rate": 1.9998985048673486e-05,
|
| 1640 |
+
"loss": 0.1247,
|
| 1641 |
+
"step": 205
|
| 1642 |
+
},
|
| 1643 |
+
{
|
| 1644 |
+
"epoch": 2.1020408163265305,
|
| 1645 |
+
"grad_norm": 361.1968688964844,
|
| 1646 |
+
"learning_rate": 1.9998715457999313e-05,
|
| 1647 |
+
"loss": 2.3233,
|
| 1648 |
+
"step": 206
|
| 1649 |
+
},
|
| 1650 |
+
{
|
| 1651 |
+
"epoch": 2.1122448979591835,
|
| 1652 |
+
"grad_norm": 96.5677719116211,
|
| 1653 |
+
"learning_rate": 1.999841415364231e-05,
|
| 1654 |
+
"loss": 0.255,
|
| 1655 |
+
"step": 207
|
| 1656 |
+
},
|
| 1657 |
+
{
|
| 1658 |
+
"epoch": 2.122448979591837,
|
| 1659 |
+
"grad_norm": 80.80358123779297,
|
| 1660 |
+
"learning_rate": 1.999808113655815e-05,
|
| 1661 |
+
"loss": 0.3498,
|
| 1662 |
+
"step": 208
|
| 1663 |
+
},
|
| 1664 |
+
{
|
| 1665 |
+
"epoch": 2.13265306122449,
|
| 1666 |
+
"grad_norm": 0.5918006300926208,
|
| 1667 |
+
"learning_rate": 1.999771640780308e-05,
|
| 1668 |
+
"loss": 0.003,
|
| 1669 |
+
"step": 209
|
| 1670 |
+
},
|
| 1671 |
+
{
|
| 1672 |
+
"epoch": 2.142857142857143,
|
| 1673 |
+
"grad_norm": 1223.066650390625,
|
| 1674 |
+
"learning_rate": 1.999731996853395e-05,
|
| 1675 |
+
"loss": 9.2851,
|
| 1676 |
+
"step": 210
|
| 1677 |
+
},
|
| 1678 |
+
{
|
| 1679 |
+
"epoch": 2.1530612244897958,
|
| 1680 |
+
"grad_norm": 310.9404602050781,
|
| 1681 |
+
"learning_rate": 1.9996891820008165e-05,
|
| 1682 |
+
"loss": 1.0812,
|
| 1683 |
+
"step": 211
|
| 1684 |
+
},
|
| 1685 |
+
{
|
| 1686 |
+
"epoch": 2.163265306122449,
|
| 1687 |
+
"grad_norm": 72.62071990966797,
|
| 1688 |
+
"learning_rate": 1.9996431963583724e-05,
|
| 1689 |
+
"loss": 0.3192,
|
| 1690 |
+
"step": 212
|
| 1691 |
+
},
|
| 1692 |
+
{
|
| 1693 |
+
"epoch": 2.173469387755102,
|
| 1694 |
+
"grad_norm": 3.9059784412384033,
|
| 1695 |
+
"learning_rate": 1.9995940400719184e-05,
|
| 1696 |
+
"loss": 0.0121,
|
| 1697 |
+
"step": 213
|
| 1698 |
+
},
|
| 1699 |
+
{
|
| 1700 |
+
"epoch": 2.183673469387755,
|
| 1701 |
+
"grad_norm": 753.7849731445312,
|
| 1702 |
+
"learning_rate": 1.9995417132973674e-05,
|
| 1703 |
+
"loss": 5.7421,
|
| 1704 |
+
"step": 214
|
| 1705 |
+
},
|
| 1706 |
+
{
|
| 1707 |
+
"epoch": 2.193877551020408,
|
| 1708 |
+
"grad_norm": 63.62609100341797,
|
| 1709 |
+
"learning_rate": 1.999486216200688e-05,
|
| 1710 |
+
"loss": 0.2867,
|
| 1711 |
+
"step": 215
|
| 1712 |
+
},
|
| 1713 |
+
{
|
| 1714 |
+
"epoch": 2.204081632653061,
|
| 1715 |
+
"grad_norm": 47.21674346923828,
|
| 1716 |
+
"learning_rate": 1.999427548957905e-05,
|
| 1717 |
+
"loss": 0.2971,
|
| 1718 |
+
"step": 216
|
| 1719 |
+
},
|
| 1720 |
+
{
|
| 1721 |
+
"epoch": 2.2142857142857144,
|
| 1722 |
+
"grad_norm": 274.9565734863281,
|
| 1723 |
+
"learning_rate": 1.9993657117550972e-05,
|
| 1724 |
+
"loss": 1.616,
|
| 1725 |
+
"step": 217
|
| 1726 |
+
},
|
| 1727 |
+
{
|
| 1728 |
+
"epoch": 2.2244897959183674,
|
| 1729 |
+
"grad_norm": 151.56639099121094,
|
| 1730 |
+
"learning_rate": 1.9993007047883988e-05,
|
| 1731 |
+
"loss": 0.8724,
|
| 1732 |
+
"step": 218
|
| 1733 |
+
},
|
| 1734 |
+
{
|
| 1735 |
+
"epoch": 2.2346938775510203,
|
| 1736 |
+
"grad_norm": 2.2050163745880127,
|
| 1737 |
+
"learning_rate": 1.999232528263997e-05,
|
| 1738 |
+
"loss": 0.0049,
|
| 1739 |
+
"step": 219
|
| 1740 |
+
},
|
| 1741 |
+
{
|
| 1742 |
+
"epoch": 2.2448979591836733,
|
| 1743 |
+
"grad_norm": 93.08734130859375,
|
| 1744 |
+
"learning_rate": 1.9991611823981322e-05,
|
| 1745 |
+
"loss": 0.285,
|
| 1746 |
+
"step": 220
|
| 1747 |
+
},
|
| 1748 |
+
{
|
| 1749 |
+
"epoch": 2.2551020408163267,
|
| 1750 |
+
"grad_norm": 1049.092529296875,
|
| 1751 |
+
"learning_rate": 1.9990866674170984e-05,
|
| 1752 |
+
"loss": 4.5212,
|
| 1753 |
+
"step": 221
|
| 1754 |
+
},
|
| 1755 |
+
{
|
| 1756 |
+
"epoch": 2.2653061224489797,
|
| 1757 |
+
"grad_norm": 532.4985961914062,
|
| 1758 |
+
"learning_rate": 1.99900898355724e-05,
|
| 1759 |
+
"loss": 1.0961,
|
| 1760 |
+
"step": 222
|
| 1761 |
+
},
|
| 1762 |
+
{
|
| 1763 |
+
"epoch": 2.2755102040816326,
|
| 1764 |
+
"grad_norm": 13.340104103088379,
|
| 1765 |
+
"learning_rate": 1.9989281310649516e-05,
|
| 1766 |
+
"loss": 0.0426,
|
| 1767 |
+
"step": 223
|
| 1768 |
+
},
|
| 1769 |
+
{
|
| 1770 |
+
"epoch": 2.2857142857142856,
|
| 1771 |
+
"grad_norm": 894.0902099609375,
|
| 1772 |
+
"learning_rate": 1.9988441101966807e-05,
|
| 1773 |
+
"loss": 6.8518,
|
| 1774 |
+
"step": 224
|
| 1775 |
+
},
|
| 1776 |
+
{
|
| 1777 |
+
"epoch": 2.295918367346939,
|
| 1778 |
+
"grad_norm": 1.1868412494659424,
|
| 1779 |
+
"learning_rate": 1.9987569212189224e-05,
|
| 1780 |
+
"loss": 0.0034,
|
| 1781 |
+
"step": 225
|
| 1782 |
+
},
|
| 1783 |
+
{
|
| 1784 |
+
"epoch": 2.306122448979592,
|
| 1785 |
+
"grad_norm": 13.72503662109375,
|
| 1786 |
+
"learning_rate": 1.9986665644082204e-05,
|
| 1787 |
+
"loss": 0.0195,
|
| 1788 |
+
"step": 226
|
| 1789 |
+
},
|
| 1790 |
+
{
|
| 1791 |
+
"epoch": 2.316326530612245,
|
| 1792 |
+
"grad_norm": 14.843038558959961,
|
| 1793 |
+
"learning_rate": 1.9985730400511658e-05,
|
| 1794 |
+
"loss": 0.0502,
|
| 1795 |
+
"step": 227
|
| 1796 |
+
},
|
| 1797 |
+
{
|
| 1798 |
+
"epoch": 2.326530612244898,
|
| 1799 |
+
"grad_norm": 226.0292205810547,
|
| 1800 |
+
"learning_rate": 1.998476348444397e-05,
|
| 1801 |
+
"loss": 0.4465,
|
| 1802 |
+
"step": 228
|
| 1803 |
+
},
|
| 1804 |
+
{
|
| 1805 |
+
"epoch": 2.336734693877551,
|
| 1806 |
+
"grad_norm": 0.6770716309547424,
|
| 1807 |
+
"learning_rate": 1.998376489894599e-05,
|
| 1808 |
+
"loss": 0.0024,
|
| 1809 |
+
"step": 229
|
| 1810 |
+
},
|
| 1811 |
+
{
|
| 1812 |
+
"epoch": 2.3469387755102042,
|
| 1813 |
+
"grad_norm": 101.99034881591797,
|
| 1814 |
+
"learning_rate": 1.9982734647184997e-05,
|
| 1815 |
+
"loss": 0.4306,
|
| 1816 |
+
"step": 230
|
| 1817 |
+
},
|
| 1818 |
+
{
|
| 1819 |
+
"epoch": 2.357142857142857,
|
| 1820 |
+
"grad_norm": 402.2799377441406,
|
| 1821 |
+
"learning_rate": 1.998167273242872e-05,
|
| 1822 |
+
"loss": 1.4035,
|
| 1823 |
+
"step": 231
|
| 1824 |
+
},
|
| 1825 |
+
{
|
| 1826 |
+
"epoch": 2.36734693877551,
|
| 1827 |
+
"grad_norm": 1080.5897216796875,
|
| 1828 |
+
"learning_rate": 1.9980579158045322e-05,
|
| 1829 |
+
"loss": 10.1881,
|
| 1830 |
+
"step": 232
|
| 1831 |
+
},
|
| 1832 |
+
{
|
| 1833 |
+
"epoch": 2.377551020408163,
|
| 1834 |
+
"grad_norm": 701.9442138671875,
|
| 1835 |
+
"learning_rate": 1.9979453927503366e-05,
|
| 1836 |
+
"loss": 3.6306,
|
| 1837 |
+
"step": 233
|
| 1838 |
+
},
|
| 1839 |
+
{
|
| 1840 |
+
"epoch": 2.387755102040816,
|
| 1841 |
+
"grad_norm": 526.434326171875,
|
| 1842 |
+
"learning_rate": 1.9978297044371834e-05,
|
| 1843 |
+
"loss": 1.3337,
|
| 1844 |
+
"step": 234
|
| 1845 |
+
},
|
| 1846 |
+
{
|
| 1847 |
+
"epoch": 2.3979591836734695,
|
| 1848 |
+
"grad_norm": 111.15966033935547,
|
| 1849 |
+
"learning_rate": 1.9977108512320103e-05,
|
| 1850 |
+
"loss": 0.6753,
|
| 1851 |
+
"step": 235
|
| 1852 |
+
},
|
| 1853 |
+
{
|
| 1854 |
+
"epoch": 2.4081632653061225,
|
| 1855 |
+
"grad_norm": 138.61196899414062,
|
| 1856 |
+
"learning_rate": 1.9975888335117927e-05,
|
| 1857 |
+
"loss": 0.6526,
|
| 1858 |
+
"step": 236
|
| 1859 |
+
},
|
| 1860 |
+
{
|
| 1861 |
+
"epoch": 2.4183673469387754,
|
| 1862 |
+
"grad_norm": 421.8229675292969,
|
| 1863 |
+
"learning_rate": 1.9974636516635436e-05,
|
| 1864 |
+
"loss": 2.3458,
|
| 1865 |
+
"step": 237
|
| 1866 |
+
},
|
| 1867 |
+
{
|
| 1868 |
+
"epoch": 2.4285714285714284,
|
| 1869 |
+
"grad_norm": 70.16128540039062,
|
| 1870 |
+
"learning_rate": 1.9973353060843118e-05,
|
| 1871 |
+
"loss": 0.2163,
|
| 1872 |
+
"step": 238
|
| 1873 |
+
},
|
| 1874 |
+
{
|
| 1875 |
+
"epoch": 2.438775510204082,
|
| 1876 |
+
"grad_norm": 750.59619140625,
|
| 1877 |
+
"learning_rate": 1.9972037971811802e-05,
|
| 1878 |
+
"loss": 10.2189,
|
| 1879 |
+
"step": 239
|
| 1880 |
+
},
|
| 1881 |
+
{
|
| 1882 |
+
"epoch": 2.4489795918367347,
|
| 1883 |
+
"grad_norm": 73.94668579101562,
|
| 1884 |
+
"learning_rate": 1.9970691253712663e-05,
|
| 1885 |
+
"loss": 0.3347,
|
| 1886 |
+
"step": 240
|
| 1887 |
+
},
|
| 1888 |
+
{
|
| 1889 |
+
"epoch": 2.4591836734693877,
|
| 1890 |
+
"grad_norm": 431.5457763671875,
|
| 1891 |
+
"learning_rate": 1.9969312910817183e-05,
|
| 1892 |
+
"loss": 2.5343,
|
| 1893 |
+
"step": 241
|
| 1894 |
+
},
|
| 1895 |
+
{
|
| 1896 |
+
"epoch": 2.4693877551020407,
|
| 1897 |
+
"grad_norm": 2.1232171058654785,
|
| 1898 |
+
"learning_rate": 1.9967902947497158e-05,
|
| 1899 |
+
"loss": 0.0063,
|
| 1900 |
+
"step": 242
|
| 1901 |
+
},
|
| 1902 |
+
{
|
| 1903 |
+
"epoch": 2.479591836734694,
|
| 1904 |
+
"grad_norm": 0.7545721530914307,
|
| 1905 |
+
"learning_rate": 1.9966461368224676e-05,
|
| 1906 |
+
"loss": 0.0025,
|
| 1907 |
+
"step": 243
|
| 1908 |
+
},
|
| 1909 |
+
{
|
| 1910 |
+
"epoch": 2.489795918367347,
|
| 1911 |
+
"grad_norm": 26.93790626525879,
|
| 1912 |
+
"learning_rate": 1.9964988177572106e-05,
|
| 1913 |
+
"loss": 0.1384,
|
| 1914 |
+
"step": 244
|
| 1915 |
+
},
|
| 1916 |
+
{
|
| 1917 |
+
"epoch": 2.5,
|
| 1918 |
+
"grad_norm": 2.8282601833343506,
|
| 1919 |
+
"learning_rate": 1.996348338021207e-05,
|
| 1920 |
+
"loss": 0.0052,
|
| 1921 |
+
"step": 245
|
| 1922 |
+
},
|
| 1923 |
+
{
|
| 1924 |
+
"epoch": 2.510204081632653,
|
| 1925 |
+
"grad_norm": 584.0491333007812,
|
| 1926 |
+
"learning_rate": 1.9961946980917457e-05,
|
| 1927 |
+
"loss": 12.8801,
|
| 1928 |
+
"step": 246
|
| 1929 |
+
},
|
| 1930 |
+
{
|
| 1931 |
+
"epoch": 2.520408163265306,
|
| 1932 |
+
"grad_norm": 720.6881103515625,
|
| 1933 |
+
"learning_rate": 1.9960378984561377e-05,
|
| 1934 |
+
"loss": 8.5862,
|
| 1935 |
+
"step": 247
|
| 1936 |
+
},
|
| 1937 |
+
{
|
| 1938 |
+
"epoch": 2.5306122448979593,
|
| 1939 |
+
"grad_norm": 834.9110717773438,
|
| 1940 |
+
"learning_rate": 1.9958779396117162e-05,
|
| 1941 |
+
"loss": 7.4971,
|
| 1942 |
+
"step": 248
|
| 1943 |
+
},
|
| 1944 |
+
{
|
| 1945 |
+
"epoch": 2.5408163265306123,
|
| 1946 |
+
"grad_norm": 217.15977478027344,
|
| 1947 |
+
"learning_rate": 1.9957148220658348e-05,
|
| 1948 |
+
"loss": 0.9741,
|
| 1949 |
+
"step": 249
|
| 1950 |
+
},
|
| 1951 |
+
{
|
| 1952 |
+
"epoch": 2.5510204081632653,
|
| 1953 |
+
"grad_norm": 486.4735107421875,
|
| 1954 |
+
"learning_rate": 1.9955485463358655e-05,
|
| 1955 |
+
"loss": 4.6348,
|
| 1956 |
+
"step": 250
|
| 1957 |
+
},
|
| 1958 |
+
{
|
| 1959 |
+
"epoch": 2.561224489795918,
|
| 1960 |
+
"grad_norm": 8.122939109802246,
|
| 1961 |
+
"learning_rate": 1.9953791129491985e-05,
|
| 1962 |
+
"loss": 0.0336,
|
| 1963 |
+
"step": 251
|
| 1964 |
+
},
|
| 1965 |
+
{
|
| 1966 |
+
"epoch": 2.571428571428571,
|
| 1967 |
+
"grad_norm": 110.34471893310547,
|
| 1968 |
+
"learning_rate": 1.9952065224432376e-05,
|
| 1969 |
+
"loss": 0.5127,
|
| 1970 |
+
"step": 252
|
| 1971 |
+
},
|
| 1972 |
+
{
|
| 1973 |
+
"epoch": 2.5816326530612246,
|
| 1974 |
+
"grad_norm": 465.07745361328125,
|
| 1975 |
+
"learning_rate": 1.9950307753654016e-05,
|
| 1976 |
+
"loss": 4.2685,
|
| 1977 |
+
"step": 253
|
| 1978 |
+
},
|
| 1979 |
+
{
|
| 1980 |
+
"epoch": 2.5918367346938775,
|
| 1981 |
+
"grad_norm": 177.638671875,
|
| 1982 |
+
"learning_rate": 1.9948518722731208e-05,
|
| 1983 |
+
"loss": 1.1622,
|
| 1984 |
+
"step": 254
|
| 1985 |
+
},
|
| 1986 |
+
{
|
| 1987 |
+
"epoch": 2.6020408163265305,
|
| 1988 |
+
"grad_norm": 1.3182055950164795,
|
| 1989 |
+
"learning_rate": 1.9946698137338357e-05,
|
| 1990 |
+
"loss": 0.0067,
|
| 1991 |
+
"step": 255
|
| 1992 |
+
},
|
| 1993 |
+
{
|
| 1994 |
+
"epoch": 2.612244897959184,
|
| 1995 |
+
"grad_norm": 158.78639221191406,
|
| 1996 |
+
"learning_rate": 1.994484600324995e-05,
|
| 1997 |
+
"loss": 0.443,
|
| 1998 |
+
"step": 256
|
| 1999 |
+
},
|
| 2000 |
+
{
|
| 2001 |
+
"epoch": 2.622448979591837,
|
| 2002 |
+
"grad_norm": 1005.9036865234375,
|
| 2003 |
+
"learning_rate": 1.994296232634054e-05,
|
| 2004 |
+
"loss": 15.6073,
|
| 2005 |
+
"step": 257
|
| 2006 |
+
},
|
| 2007 |
+
{
|
| 2008 |
+
"epoch": 2.63265306122449,
|
| 2009 |
+
"grad_norm": 3.7752788066864014,
|
| 2010 |
+
"learning_rate": 1.994104711258473e-05,
|
| 2011 |
+
"loss": 0.0127,
|
| 2012 |
+
"step": 258
|
| 2013 |
+
},
|
| 2014 |
+
{
|
| 2015 |
+
"epoch": 2.642857142857143,
|
| 2016 |
+
"grad_norm": 48.576602935791016,
|
| 2017 |
+
"learning_rate": 1.9939100368057144e-05,
|
| 2018 |
+
"loss": 0.1056,
|
| 2019 |
+
"step": 259
|
| 2020 |
+
},
|
| 2021 |
+
{
|
| 2022 |
+
"epoch": 2.6530612244897958,
|
| 2023 |
+
"grad_norm": 340.2049560546875,
|
| 2024 |
+
"learning_rate": 1.9937122098932428e-05,
|
| 2025 |
+
"loss": 1.0591,
|
| 2026 |
+
"step": 260
|
| 2027 |
+
},
|
| 2028 |
+
{
|
| 2029 |
+
"epoch": 2.663265306122449,
|
| 2030 |
+
"grad_norm": 472.6616516113281,
|
| 2031 |
+
"learning_rate": 1.99351123114852e-05,
|
| 2032 |
+
"loss": 2.0244,
|
| 2033 |
+
"step": 261
|
| 2034 |
+
},
|
| 2035 |
+
{
|
| 2036 |
+
"epoch": 2.673469387755102,
|
| 2037 |
+
"grad_norm": 1.43545663356781,
|
| 2038 |
+
"learning_rate": 1.993307101209006e-05,
|
| 2039 |
+
"loss": 0.0047,
|
| 2040 |
+
"step": 262
|
| 2041 |
+
},
|
| 2042 |
+
{
|
| 2043 |
+
"epoch": 2.683673469387755,
|
| 2044 |
+
"grad_norm": 23.00522804260254,
|
| 2045 |
+
"learning_rate": 1.993099820722155e-05,
|
| 2046 |
+
"loss": 0.0402,
|
| 2047 |
+
"step": 263
|
| 2048 |
+
},
|
| 2049 |
+
{
|
| 2050 |
+
"epoch": 2.693877551020408,
|
| 2051 |
+
"grad_norm": 144.068115234375,
|
| 2052 |
+
"learning_rate": 1.992889390345414e-05,
|
| 2053 |
+
"loss": 2.0309,
|
| 2054 |
+
"step": 264
|
| 2055 |
+
},
|
| 2056 |
+
{
|
| 2057 |
+
"epoch": 2.704081632653061,
|
| 2058 |
+
"grad_norm": 26.34888458251953,
|
| 2059 |
+
"learning_rate": 1.9926758107462208e-05,
|
| 2060 |
+
"loss": 0.0599,
|
| 2061 |
+
"step": 265
|
| 2062 |
+
},
|
| 2063 |
+
{
|
| 2064 |
+
"epoch": 2.7142857142857144,
|
| 2065 |
+
"grad_norm": 1333.631591796875,
|
| 2066 |
+
"learning_rate": 1.9924590826020027e-05,
|
| 2067 |
+
"loss": 6.106,
|
| 2068 |
+
"step": 266
|
| 2069 |
+
},
|
| 2070 |
+
{
|
| 2071 |
+
"epoch": 2.7244897959183674,
|
| 2072 |
+
"grad_norm": 4.13116455078125,
|
| 2073 |
+
"learning_rate": 1.9922392066001724e-05,
|
| 2074 |
+
"loss": 0.007,
|
| 2075 |
+
"step": 267
|
| 2076 |
+
},
|
| 2077 |
+
{
|
| 2078 |
+
"epoch": 2.7346938775510203,
|
| 2079 |
+
"grad_norm": 569.2820434570312,
|
| 2080 |
+
"learning_rate": 1.992016183438127e-05,
|
| 2081 |
+
"loss": 4.5277,
|
| 2082 |
+
"step": 268
|
| 2083 |
+
},
|
| 2084 |
+
{
|
| 2085 |
+
"epoch": 2.7448979591836737,
|
| 2086 |
+
"grad_norm": 9.351508140563965,
|
| 2087 |
+
"learning_rate": 1.991790013823246e-05,
|
| 2088 |
+
"loss": 0.0202,
|
| 2089 |
+
"step": 269
|
| 2090 |
+
},
|
| 2091 |
+
{
|
| 2092 |
+
"epoch": 2.7551020408163263,
|
| 2093 |
+
"grad_norm": 0.10505271703004837,
|
| 2094 |
+
"learning_rate": 1.9915606984728896e-05,
|
| 2095 |
+
"loss": 0.0004,
|
| 2096 |
+
"step": 270
|
| 2097 |
+
},
|
| 2098 |
+
{
|
| 2099 |
+
"epoch": 2.7653061224489797,
|
| 2100 |
+
"grad_norm": 16.56380271911621,
|
| 2101 |
+
"learning_rate": 1.9913282381143934e-05,
|
| 2102 |
+
"loss": 0.052,
|
| 2103 |
+
"step": 271
|
| 2104 |
+
},
|
| 2105 |
+
{
|
| 2106 |
+
"epoch": 2.7755102040816326,
|
| 2107 |
+
"grad_norm": 15.175297737121582,
|
| 2108 |
+
"learning_rate": 1.99109263348507e-05,
|
| 2109 |
+
"loss": 0.0429,
|
| 2110 |
+
"step": 272
|
| 2111 |
+
},
|
| 2112 |
+
{
|
| 2113 |
+
"epoch": 2.7857142857142856,
|
| 2114 |
+
"grad_norm": 27.31505012512207,
|
| 2115 |
+
"learning_rate": 1.9908538853322046e-05,
|
| 2116 |
+
"loss": 0.0423,
|
| 2117 |
+
"step": 273
|
| 2118 |
+
},
|
| 2119 |
+
{
|
| 2120 |
+
"epoch": 2.795918367346939,
|
| 2121 |
+
"grad_norm": 87.07357788085938,
|
| 2122 |
+
"learning_rate": 1.9906119944130527e-05,
|
| 2123 |
+
"loss": 0.2729,
|
| 2124 |
+
"step": 274
|
| 2125 |
+
},
|
| 2126 |
+
{
|
| 2127 |
+
"epoch": 2.806122448979592,
|
| 2128 |
+
"grad_norm": 1.1642284393310547,
|
| 2129 |
+
"learning_rate": 1.9903669614948382e-05,
|
| 2130 |
+
"loss": 0.0025,
|
| 2131 |
+
"step": 275
|
| 2132 |
+
},
|
| 2133 |
+
{
|
| 2134 |
+
"epoch": 2.816326530612245,
|
| 2135 |
+
"grad_norm": 12.991185188293457,
|
| 2136 |
+
"learning_rate": 1.9901187873547504e-05,
|
| 2137 |
+
"loss": 0.0278,
|
| 2138 |
+
"step": 276
|
| 2139 |
+
},
|
| 2140 |
+
{
|
| 2141 |
+
"epoch": 2.826530612244898,
|
| 2142 |
+
"grad_norm": 86.49649810791016,
|
| 2143 |
+
"learning_rate": 1.9898674727799418e-05,
|
| 2144 |
+
"loss": 0.4171,
|
| 2145 |
+
"step": 277
|
| 2146 |
+
},
|
| 2147 |
+
{
|
| 2148 |
+
"epoch": 2.836734693877551,
|
| 2149 |
+
"grad_norm": 14.625425338745117,
|
| 2150 |
+
"learning_rate": 1.9896130185675263e-05,
|
| 2151 |
+
"loss": 0.0553,
|
| 2152 |
+
"step": 278
|
| 2153 |
+
},
|
| 2154 |
+
{
|
| 2155 |
+
"epoch": 2.8469387755102042,
|
| 2156 |
+
"grad_norm": 339.1553649902344,
|
| 2157 |
+
"learning_rate": 1.9893554255245748e-05,
|
| 2158 |
+
"loss": 3.4933,
|
| 2159 |
+
"step": 279
|
| 2160 |
+
},
|
| 2161 |
+
{
|
| 2162 |
+
"epoch": 2.857142857142857,
|
| 2163 |
+
"grad_norm": 9.26375961303711,
|
| 2164 |
+
"learning_rate": 1.9890946944681157e-05,
|
| 2165 |
+
"loss": 0.0454,
|
| 2166 |
+
"step": 280
|
| 2167 |
+
},
|
| 2168 |
+
{
|
| 2169 |
+
"epoch": 2.86734693877551,
|
| 2170 |
+
"grad_norm": 66.49364471435547,
|
| 2171 |
+
"learning_rate": 1.9888308262251286e-05,
|
| 2172 |
+
"loss": 0.2936,
|
| 2173 |
+
"step": 281
|
| 2174 |
+
},
|
| 2175 |
+
{
|
| 2176 |
+
"epoch": 2.877551020408163,
|
| 2177 |
+
"grad_norm": 0.08851417154073715,
|
| 2178 |
+
"learning_rate": 1.988563821632545e-05,
|
| 2179 |
+
"loss": 0.0003,
|
| 2180 |
+
"step": 282
|
| 2181 |
+
},
|
| 2182 |
+
{
|
| 2183 |
+
"epoch": 2.887755102040816,
|
| 2184 |
+
"grad_norm": 694.4219360351562,
|
| 2185 |
+
"learning_rate": 1.9882936815372432e-05,
|
| 2186 |
+
"loss": 5.218,
|
| 2187 |
+
"step": 283
|
| 2188 |
+
},
|
| 2189 |
+
{
|
| 2190 |
+
"epoch": 2.8979591836734695,
|
| 2191 |
+
"grad_norm": 932.3369750976562,
|
| 2192 |
+
"learning_rate": 1.9880204067960473e-05,
|
| 2193 |
+
"loss": 12.6645,
|
| 2194 |
+
"step": 284
|
| 2195 |
+
},
|
| 2196 |
+
{
|
| 2197 |
+
"epoch": 2.9081632653061225,
|
| 2198 |
+
"grad_norm": 99.3906021118164,
|
| 2199 |
+
"learning_rate": 1.9877439982757228e-05,
|
| 2200 |
+
"loss": 0.6918,
|
| 2201 |
+
"step": 285
|
| 2202 |
+
},
|
| 2203 |
+
{
|
| 2204 |
+
"epoch": 2.9183673469387754,
|
| 2205 |
+
"grad_norm": 354.6759338378906,
|
| 2206 |
+
"learning_rate": 1.9874644568529763e-05,
|
| 2207 |
+
"loss": 1.913,
|
| 2208 |
+
"step": 286
|
| 2209 |
+
},
|
| 2210 |
+
{
|
| 2211 |
+
"epoch": 2.928571428571429,
|
| 2212 |
+
"grad_norm": 54.5423698425293,
|
| 2213 |
+
"learning_rate": 1.9871817834144506e-05,
|
| 2214 |
+
"loss": 0.2328,
|
| 2215 |
+
"step": 287
|
| 2216 |
+
},
|
| 2217 |
+
{
|
| 2218 |
+
"epoch": 2.938775510204082,
|
| 2219 |
+
"grad_norm": 13.632817268371582,
|
| 2220 |
+
"learning_rate": 1.9868959788567213e-05,
|
| 2221 |
+
"loss": 0.0461,
|
| 2222 |
+
"step": 288
|
| 2223 |
+
},
|
| 2224 |
+
{
|
| 2225 |
+
"epoch": 2.9489795918367347,
|
| 2226 |
+
"grad_norm": 0.13646447658538818,
|
| 2227 |
+
"learning_rate": 1.9866070440862977e-05,
|
| 2228 |
+
"loss": 0.0004,
|
| 2229 |
+
"step": 289
|
| 2230 |
+
},
|
| 2231 |
+
{
|
| 2232 |
+
"epoch": 2.9591836734693877,
|
| 2233 |
+
"grad_norm": 0.027559075504541397,
|
| 2234 |
+
"learning_rate": 1.9863149800196152e-05,
|
| 2235 |
+
"loss": 0.0001,
|
| 2236 |
+
"step": 290
|
| 2237 |
+
},
|
| 2238 |
+
{
|
| 2239 |
+
"epoch": 2.9693877551020407,
|
| 2240 |
+
"grad_norm": 3.3276469707489014,
|
| 2241 |
+
"learning_rate": 1.9860197875830355e-05,
|
| 2242 |
+
"loss": 0.0137,
|
| 2243 |
+
"step": 291
|
| 2244 |
+
},
|
| 2245 |
+
{
|
| 2246 |
+
"epoch": 2.979591836734694,
|
| 2247 |
+
"grad_norm": 29.392108917236328,
|
| 2248 |
+
"learning_rate": 1.9857214677128436e-05,
|
| 2249 |
+
"loss": 0.078,
|
| 2250 |
+
"step": 292
|
| 2251 |
+
},
|
| 2252 |
+
{
|
| 2253 |
+
"epoch": 2.989795918367347,
|
| 2254 |
+
"grad_norm": 156.80345153808594,
|
| 2255 |
+
"learning_rate": 1.9854200213552426e-05,
|
| 2256 |
+
"loss": 0.3899,
|
| 2257 |
+
"step": 293
|
| 2258 |
+
},
|
| 2259 |
+
{
|
| 2260 |
+
"epoch": 3.0,
|
| 2261 |
+
"grad_norm": 545.2831420898438,
|
| 2262 |
+
"learning_rate": 1.985115449466353e-05,
|
| 2263 |
+
"loss": 2.8353,
|
| 2264 |
+
"step": 294
|
| 2265 |
+
},
|
| 2266 |
+
{
|
| 2267 |
+
"epoch": 3.0,
|
| 2268 |
+
"eval_dim_1024_cosine_accuracy@1": 0.30985915492957744,
|
| 2269 |
+
"eval_dim_1024_cosine_accuracy@10": 0.37836107554417414,
|
| 2270 |
+
"eval_dim_1024_cosine_accuracy@3": 0.31498079385403327,
|
| 2271 |
+
"eval_dim_1024_cosine_accuracy@5": 0.34699103713188223,
|
| 2272 |
+
"eval_dim_1024_cosine_map@100": 0.3968421394024028,
|
| 2273 |
+
"eval_dim_1024_cosine_mrr@10": 0.32174181452350414,
|
| 2274 |
+
"eval_dim_1024_cosine_ndcg@10": 0.33364818903542787,
|
| 2275 |
+
"eval_dim_1024_cosine_precision@1": 0.30985915492957744,
|
| 2276 |
+
"eval_dim_1024_cosine_precision@10": 0.2694622279129321,
|
| 2277 |
+
"eval_dim_1024_cosine_precision@3": 0.30900554844216815,
|
| 2278 |
+
"eval_dim_1024_cosine_precision@5": 0.30115236875800255,
|
| 2279 |
+
"eval_dim_1024_cosine_recall@1": 0.03936027574360421,
|
| 2280 |
+
"eval_dim_1024_cosine_recall@10": 0.2548212686119806,
|
| 2281 |
+
"eval_dim_1024_cosine_recall@3": 0.11544349976954149,
|
| 2282 |
+
"eval_dim_1024_cosine_recall@5": 0.17456487753074904,
|
| 2283 |
+
"eval_dim_128_cosine_accuracy@1": 0.2765685019206146,
|
| 2284 |
+
"eval_dim_128_cosine_accuracy@10": 0.3501920614596671,
|
| 2285 |
+
"eval_dim_128_cosine_accuracy@3": 0.2887323943661972,
|
| 2286 |
+
"eval_dim_128_cosine_accuracy@5": 0.31882202304737517,
|
| 2287 |
+
"eval_dim_128_cosine_map@100": 0.36208318391000843,
|
| 2288 |
+
"eval_dim_128_cosine_mrr@10": 0.29051937889966023,
|
| 2289 |
+
"eval_dim_128_cosine_ndcg@10": 0.3054184027921396,
|
| 2290 |
+
"eval_dim_128_cosine_precision@1": 0.2765685019206146,
|
| 2291 |
+
"eval_dim_128_cosine_precision@10": 0.24916773367477596,
|
| 2292 |
+
"eval_dim_128_cosine_precision@3": 0.2787025181391378,
|
| 2293 |
+
"eval_dim_128_cosine_precision@5": 0.27413572343149806,
|
| 2294 |
+
"eval_dim_128_cosine_recall@1": 0.03462627857091171,
|
| 2295 |
+
"eval_dim_128_cosine_recall@10": 0.23219859983003413,
|
| 2296 |
+
"eval_dim_128_cosine_recall@3": 0.10222485929387912,
|
| 2297 |
+
"eval_dim_128_cosine_recall@5": 0.15567435868523452,
|
| 2298 |
+
"eval_dim_256_cosine_accuracy@1": 0.29577464788732394,
|
| 2299 |
+
"eval_dim_256_cosine_accuracy@10": 0.3585147247119078,
|
| 2300 |
+
"eval_dim_256_cosine_accuracy@3": 0.3047375160051216,
|
| 2301 |
+
"eval_dim_256_cosine_accuracy@5": 0.33098591549295775,
|
| 2302 |
+
"eval_dim_256_cosine_map@100": 0.377358622211706,
|
| 2303 |
+
"eval_dim_256_cosine_mrr@10": 0.3073963985935813,
|
| 2304 |
+
"eval_dim_256_cosine_ndcg@10": 0.31951819898251643,
|
| 2305 |
+
"eval_dim_256_cosine_precision@1": 0.29577464788732394,
|
| 2306 |
+
"eval_dim_256_cosine_precision@10": 0.25845070422535216,
|
| 2307 |
+
"eval_dim_256_cosine_precision@3": 0.2968416559965856,
|
| 2308 |
+
"eval_dim_256_cosine_precision@5": 0.2898847631241997,
|
| 2309 |
+
"eval_dim_256_cosine_recall@1": 0.03692836080135826,
|
| 2310 |
+
"eval_dim_256_cosine_recall@10": 0.24162273030445708,
|
| 2311 |
+
"eval_dim_256_cosine_recall@3": 0.1089192018057998,
|
| 2312 |
+
"eval_dim_256_cosine_recall@5": 0.16530160845995479,
|
| 2313 |
+
"eval_dim_512_cosine_accuracy@1": 0.3047375160051216,
|
| 2314 |
+
"eval_dim_512_cosine_accuracy@10": 0.3719590268886043,
|
| 2315 |
+
"eval_dim_512_cosine_accuracy@3": 0.31049935979513443,
|
| 2316 |
+
"eval_dim_512_cosine_accuracy@5": 0.34507042253521125,
|
| 2317 |
+
"eval_dim_512_cosine_map@100": 0.39281877553256617,
|
| 2318 |
+
"eval_dim_512_cosine_mrr@10": 0.31690623539215046,
|
| 2319 |
+
"eval_dim_512_cosine_ndcg@10": 0.3300149893720946,
|
| 2320 |
+
"eval_dim_512_cosine_precision@1": 0.3047375160051216,
|
| 2321 |
+
"eval_dim_512_cosine_precision@10": 0.2661331626120359,
|
| 2322 |
+
"eval_dim_512_cosine_precision@3": 0.30431071276141697,
|
| 2323 |
+
"eval_dim_512_cosine_precision@5": 0.29756722151088344,
|
| 2324 |
+
"eval_dim_512_cosine_recall@1": 0.03902184942619328,
|
| 2325 |
+
"eval_dim_512_cosine_recall@10": 0.2526764166009778,
|
| 2326 |
+
"eval_dim_512_cosine_recall@3": 0.11440062517351587,
|
| 2327 |
+
"eval_dim_512_cosine_recall@5": 0.17317031567103489,
|
| 2328 |
+
"eval_dim_64_cosine_accuracy@1": 0.25480153649167736,
|
| 2329 |
+
"eval_dim_64_cosine_accuracy@10": 0.323303457106274,
|
| 2330 |
+
"eval_dim_64_cosine_accuracy@3": 0.2605633802816901,
|
| 2331 |
+
"eval_dim_64_cosine_accuracy@5": 0.2906530089628681,
|
| 2332 |
+
"eval_dim_64_cosine_map@100": 0.3332689262079475,
|
| 2333 |
+
"eval_dim_64_cosine_mrr@10": 0.266841960856045,
|
| 2334 |
+
"eval_dim_64_cosine_ndcg@10": 0.28022682237950125,
|
| 2335 |
+
"eval_dim_64_cosine_precision@1": 0.25480153649167736,
|
| 2336 |
+
"eval_dim_64_cosine_precision@10": 0.23079385403329064,
|
| 2337 |
+
"eval_dim_64_cosine_precision@3": 0.25480153649167736,
|
| 2338 |
+
"eval_dim_64_cosine_precision@5": 0.25006402048655574,
|
| 2339 |
+
"eval_dim_64_cosine_recall@1": 0.031011767980561305,
|
| 2340 |
+
"eval_dim_64_cosine_recall@10": 0.21011380307216662,
|
| 2341 |
+
"eval_dim_64_cosine_recall@3": 0.09100224310580617,
|
| 2342 |
+
"eval_dim_64_cosine_recall@5": 0.13823759538062028,
|
| 2343 |
+
"eval_dim_768_cosine_accuracy@1": 0.3072983354673495,
|
| 2344 |
+
"eval_dim_768_cosine_accuracy@10": 0.37451984635083224,
|
| 2345 |
+
"eval_dim_768_cosine_accuracy@3": 0.31049935979513443,
|
| 2346 |
+
"eval_dim_768_cosine_accuracy@5": 0.3444302176696543,
|
| 2347 |
+
"eval_dim_768_cosine_map@100": 0.3944113472988561,
|
| 2348 |
+
"eval_dim_768_cosine_mrr@10": 0.3188075422230347,
|
| 2349 |
+
"eval_dim_768_cosine_ndcg@10": 0.3310954692046881,
|
| 2350 |
+
"eval_dim_768_cosine_precision@1": 0.3072983354673495,
|
| 2351 |
+
"eval_dim_768_cosine_precision@10": 0.26677336747759284,
|
| 2352 |
+
"eval_dim_768_cosine_precision@3": 0.3060179257362356,
|
| 2353 |
+
"eval_dim_768_cosine_precision@5": 0.29795134443021765,
|
| 2354 |
+
"eval_dim_768_cosine_recall@1": 0.03940235994624546,
|
| 2355 |
+
"eval_dim_768_cosine_recall@10": 0.2544826642178083,
|
| 2356 |
+
"eval_dim_768_cosine_recall@3": 0.11527075559959522,
|
| 2357 |
+
"eval_dim_768_cosine_recall@5": 0.17393586357387436,
|
| 2358 |
+
"eval_runtime": 99.0959,
|
| 2359 |
+
"eval_samples_per_second": 0.0,
|
| 2360 |
+
"eval_sequential_score": 0.28022682237950125,
|
| 2361 |
+
"eval_steps_per_second": 0.0,
|
| 2362 |
+
"step": 294
|
| 2363 |
+
}
|
| 2364 |
+
],
|
| 2365 |
+
"logging_steps": 1,
|
| 2366 |
+
"max_steps": 1960,
|
| 2367 |
+
"num_input_tokens_seen": 0,
|
| 2368 |
+
"num_train_epochs": 20,
|
| 2369 |
+
"save_steps": 500,
|
| 2370 |
+
"stateful_callbacks": {
|
| 2371 |
+
"EarlyStoppingCallback": {
|
| 2372 |
+
"args": {
|
| 2373 |
+
"early_stopping_patience": 2,
|
| 2374 |
+
"early_stopping_threshold": 0.0
|
| 2375 |
+
},
|
| 2376 |
+
"attributes": {
|
| 2377 |
+
"early_stopping_patience_counter": 2
|
| 2378 |
+
}
|
| 2379 |
+
},
|
| 2380 |
+
"TrainerControl": {
|
| 2381 |
+
"args": {
|
| 2382 |
+
"should_epoch_stop": false,
|
| 2383 |
+
"should_evaluate": false,
|
| 2384 |
+
"should_log": false,
|
| 2385 |
+
"should_save": true,
|
| 2386 |
+
"should_training_stop": true
|
| 2387 |
+
},
|
| 2388 |
+
"attributes": {}
|
| 2389 |
+
}
|
| 2390 |
+
},
|
| 2391 |
+
"total_flos": 0.0,
|
| 2392 |
+
"train_batch_size": 2,
|
| 2393 |
+
"trial_name": null,
|
| 2394 |
+
"trial_params": null
|
| 2395 |
+
}
|
checkpoint-294/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:739b4a0a62fdf782034d2ababe5e5ea588023ed6263f2604e31385fc77a8faab
|
| 3 |
+
size 6097
|
checkpoint-98/1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 1024,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": true,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
checkpoint-98/README.md
ADDED
|
@@ -0,0 +1,1621 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language:
|
| 3 |
+
- en
|
| 4 |
+
license: apache-2.0
|
| 5 |
+
tags:
|
| 6 |
+
- sentence-transformers
|
| 7 |
+
- sentence-similarity
|
| 8 |
+
- feature-extraction
|
| 9 |
+
- dense
|
| 10 |
+
- generated_from_trainer
|
| 11 |
+
- dataset_size:391
|
| 12 |
+
- loss:MatryoshkaLoss
|
| 13 |
+
- loss:MultipleNegativesRankingLoss
|
| 14 |
+
base_model: intfloat/multilingual-e5-large
|
| 15 |
+
widget:
|
| 16 |
+
- source_sentence: What does 'personal data breach' entail?
|
| 17 |
+
sentences:
|
| 18 |
+
- '1.Processing of personal data revealing racial or ethnic origin, political opinions,
|
| 19 |
+
religious or philosophical beliefs, or trade union membership, and the processing
|
| 20 |
+
of genetic data, biometric data for the purpose of uniquely identifying a natural
|
| 21 |
+
person, data concerning health or data concerning a natural person''s sex life
|
| 22 |
+
or sexual orientation shall be prohibited.
|
| 23 |
+
|
| 24 |
+
2.Paragraph 1 shall not apply if one of the following applies: (a) the data subject
|
| 25 |
+
has given explicit consent to the processing of those personal data for one or
|
| 26 |
+
more specified purposes, except where Union or Member State law provide that the
|
| 27 |
+
prohibition referred to in paragraph 1 may not be lifted by the data subject;
|
| 28 |
+
(b) processing is necessary for the purposes of carrying out the obligations
|
| 29 |
+
and exercising specific rights of the controller or of the data subject in the
|
| 30 |
+
field of employment and social security and social protection law in so far as
|
| 31 |
+
it is authorised by Union or Member State law or a collective agreement pursuant
|
| 32 |
+
to Member State law providing for appropriate safeguards for the fundamental rights
|
| 33 |
+
and the interests of the data subject; (c) processing is necessary to protect
|
| 34 |
+
the vital interests of the data subject or of another natural person where the
|
| 35 |
+
data subject is physically or legally incapable of giving consent; (d) processing
|
| 36 |
+
is carried out in the course of its legitimate activities with appropriate safeguards
|
| 37 |
+
by a foundation, association or any other not-for-profit body with a political,
|
| 38 |
+
philosophical, religious or trade union aim and on condition that the processing
|
| 39 |
+
relates solely to the members or to former members of the body or to persons who
|
| 40 |
+
have regular contact with it in connection with its purposes and that the personal
|
| 41 |
+
data are not disclosed outside that body without the consent of the data subjects;
|
| 42 |
+
(e) processing relates to personal data which are manifestly made public by the
|
| 43 |
+
data subject; (f) processing is necessary for the establishment, exercise or
|
| 44 |
+
defence of legal claims or whenever courts are acting in their judicial capacity;
|
| 45 |
+
(g) processing is necessary for reasons of substantial public interest, on the
|
| 46 |
+
basis of Union or Member State law which shall be proportionate to the aim pursued,
|
| 47 |
+
respect the essence of the right to data protection and provide for suitable and
|
| 48 |
+
specific measures to safeguard the fundamental rights and the interests of the
|
| 49 |
+
data subject; (h) processing is necessary for the purposes of preventive or occupational
|
| 50 |
+
medicine, for the assessment of the working capacity of the employee, medical
|
| 51 |
+
diagnosis, the provision of health or social care or treatment or the management
|
| 52 |
+
of health or social care systems and services on the basis of Union or Member
|
| 53 |
+
State law or pursuant to contract with a health professional and subject to the
|
| 54 |
+
conditions and safeguards referred to in paragraph 3; (i) processing is necessary
|
| 55 |
+
for reasons of public interest in the area of public health, such as protecting
|
| 56 |
+
against serious cross-border threats to health or ensuring high standards of quality
|
| 57 |
+
and safety of health care and of medicinal products or medical devices, on the
|
| 58 |
+
basis of Union or Member State law which provides for suitable and specific measures
|
| 59 |
+
to safeguard the rights and freedoms of the data subject, in particular professional
|
| 60 |
+
secrecy; 4.5.2016 L 119/38 (j) processing is necessary for archiving purposes
|
| 61 |
+
in the public interest, scientific or historical research purposes or statistical
|
| 62 |
+
purposes in accordance with Article 89(1) based on Union or Member State law which
|
| 63 |
+
shall be proportionate to the aim pursued, respect the essence of the right to
|
| 64 |
+
data protection and provide for suitable and specific measures to safeguard the
|
| 65 |
+
fundamental rights and the interests of the data subject.
|
| 66 |
+
|
| 67 |
+
3.Personal data referred to in paragraph 1 may be processed for the purposes referred
|
| 68 |
+
to in point (h) of paragraph 2 when those data are processed by or under the responsibility
|
| 69 |
+
of a professional subject to the obligation of professional secrecy under Union
|
| 70 |
+
or Member State law or rules established by national competent bodies or by another
|
| 71 |
+
person also subject to an obligation of secrecy under Union or Member State law
|
| 72 |
+
or rules established by national competent bodies.
|
| 73 |
+
|
| 74 |
+
4.Member States may maintain or introduce further conditions, including limitations,
|
| 75 |
+
with regard to the processing of genetic data, biometric data or data concerning
|
| 76 |
+
health.'
|
| 77 |
+
- '1) ''personal data'' means any information relating to an identified or identifiable
|
| 78 |
+
natural person (''data subject''); an identifiable natural person is one who can
|
| 79 |
+
be identified, directly or indirectly, in particular by reference to an identifier
|
| 80 |
+
such as a name, an identification number, location data, an online identifier
|
| 81 |
+
or to one or more factors specific to the physical, physiological, genetic, mental,
|
| 82 |
+
economic, cultural or social identity of that natural person;
|
| 83 |
+
|
| 84 |
+
(2) ‘processing’ means any operation or set of operations which is performed on
|
| 85 |
+
personal data or on sets of personal data, whether or not by automated means,
|
| 86 |
+
such as collection, recording, organisation, structuring, storage, adaptation
|
| 87 |
+
or alteration, retrieval, consultation, use, disclosure by transmission, dissemination
|
| 88 |
+
or otherwise making available, alignment or combination, restriction, erasure
|
| 89 |
+
or destruction;
|
| 90 |
+
|
| 91 |
+
(3) ‘restriction of processing’ means the marking of stored personal data with
|
| 92 |
+
the aim of limiting their processing in the future;
|
| 93 |
+
|
| 94 |
+
(4) ‘profiling’ means any form of automated processing of personal data consisting
|
| 95 |
+
of the use of personal data to evaluate certain personal aspects relating to a
|
| 96 |
+
natural person, in particular to analyse or predict aspects concerning that natural
|
| 97 |
+
person''s performance at work, economic situation, health, personal preferences,
|
| 98 |
+
interests, reliability, behaviour, location or movements;
|
| 99 |
+
|
| 100 |
+
(5) ‘pseudonymisation’ means the processing of personal data in such a manner
|
| 101 |
+
that the personal data can no longer be attributed to a specific data subject
|
| 102 |
+
without the use of additional information, provided that such additional information
|
| 103 |
+
is kept separately and is subject to technical and organisational measures to
|
| 104 |
+
ensure that the personal data are not attributed to an identified or identifiable
|
| 105 |
+
natural person;
|
| 106 |
+
|
| 107 |
+
(6) ‘filing system’ means any structured set of personal data which are accessible
|
| 108 |
+
according to specific criteria, whether centralised, decentralised or dispersed
|
| 109 |
+
on a functional or geographical basis;
|
| 110 |
+
|
| 111 |
+
(7) ‘controller’ means the natural or legal person, public authority, agency or
|
| 112 |
+
other body which, alone or jointly with others, determines the purposes and means
|
| 113 |
+
of the processing of personal data; where the purposes and means of such processing
|
| 114 |
+
are determined by Union or Member State law, the controller or the specific criteria
|
| 115 |
+
for its nomination may be provided for by Union or Member State law;
|
| 116 |
+
|
| 117 |
+
(8) ‘processor’ means a natural or legal person, public authority, agency or other
|
| 118 |
+
body which processes personal data on behalf of the controller;
|
| 119 |
+
|
| 120 |
+
(9) ‘recipient’ means a natural or legal person, public authority, agency or another
|
| 121 |
+
body, to which the personal data are disclosed, whether a third party or not.
|
| 122 |
+
However, public authorities which may receive personal data in the framework of
|
| 123 |
+
a particular inquiry in accordance with Union or Member State law shall not be
|
| 124 |
+
regarded as recipients; the processing of those data by those public authorities
|
| 125 |
+
shall be in compliance with the applicable data protection rules according to
|
| 126 |
+
the purposes of the processing;
|
| 127 |
+
|
| 128 |
+
(10) ‘third party’ means a natural or legal person, public authority, agency or
|
| 129 |
+
body other than the data subject, controller, processor and persons who, under
|
| 130 |
+
the direct authority of the controller or processor, are authorised to process
|
| 131 |
+
personal data;
|
| 132 |
+
|
| 133 |
+
(11) ‘consent’ of the data subject means any freely given, specific, informed
|
| 134 |
+
and unambiguous indication of the data subject''s wishes by which he or she, by
|
| 135 |
+
a statement or by a clear affirmative action, signifies agreement to the processing
|
| 136 |
+
of personal data relating to him or her;
|
| 137 |
+
|
| 138 |
+
(12) ‘personal data breach’ means a breach of security leading to the accidental
|
| 139 |
+
or unlawful destruction, loss, alteration, unauthorised disclosure of, or access
|
| 140 |
+
to, personal data transmitted, stored or otherwise processed;
|
| 141 |
+
|
| 142 |
+
(13) ‘genetic data’ means personal data relating to the inherited or acquired
|
| 143 |
+
genetic characteristics of a natural person which give unique information about
|
| 144 |
+
the physiology or the health of that natural person and which result, in particular,
|
| 145 |
+
from an analysis of a biological sample from the natural person in question;
|
| 146 |
+
|
| 147 |
+
(14) ‘biometric data’ means personal data resulting from specific technical processing
|
| 148 |
+
relating to the physical, physiological or behavioural characteristics of a natural
|
| 149 |
+
person, which allow or confirm the unique identification of that natural person,
|
| 150 |
+
such as facial images or dactyloscopic data;
|
| 151 |
+
|
| 152 |
+
(15) ‘data concerning health’ means personal data related to the physical or mental
|
| 153 |
+
health of a natural person, including the provision of health care services, which
|
| 154 |
+
reveal information about his or her health status;
|
| 155 |
+
|
| 156 |
+
(16) ‘main establishment’ means: (a) as regards a controller with establishments
|
| 157 |
+
in more than one Member State, the place of its central administration in the
|
| 158 |
+
Union, unless the decisions on the purposes and means of the processing of personal
|
| 159 |
+
data are taken in another establishment of the controller in the Union and the
|
| 160 |
+
latter establishment has the power to have such decisions implemented, in which
|
| 161 |
+
case the establishment having taken such decisions is to be considered to be the
|
| 162 |
+
main establishment; (b) as regards a processor with establishments in more than
|
| 163 |
+
one Member State, the place of its central administration in the Union, or, if
|
| 164 |
+
the processor has no central administration in the Union, the establishment of
|
| 165 |
+
the processor in the Union where the main processing activities in the context
|
| 166 |
+
of the activities of an establishment of the processor take place to the extent
|
| 167 |
+
that the processor is subject to specific obligations under this Regulation;
|
| 168 |
+
|
| 169 |
+
(17) ‘representative’ means a natural or legal person established in the Union
|
| 170 |
+
who, designated by the controller or processor in writing pursuant to Article
|
| 171 |
+
27, represents the controller or processor with regard to their respective obligations
|
| 172 |
+
under this Regulation;
|
| 173 |
+
|
| 174 |
+
(18) ‘enterprise’ means a natural or legal person engaged in an economic activity,
|
| 175 |
+
irrespective of its legal form, including partnerships or associations regularly
|
| 176 |
+
engaged in an economic activity;
|
| 177 |
+
|
| 178 |
+
(19) ‘group of undertakings’ means a controlling undertaking and its controlled
|
| 179 |
+
undertakings;
|
| 180 |
+
|
| 181 |
+
(20) ‘binding corporate rules’ means personal data protection policies which are
|
| 182 |
+
adhered to by a controller or processor established on the territory of a Member
|
| 183 |
+
State for transfers or a set of transfers of personal data to a controller or
|
| 184 |
+
processor in one or more third countries within a group of undertakings, or group
|
| 185 |
+
of enterprises engaged in a joint economic activity;
|
| 186 |
+
|
| 187 |
+
(21) ‘supervisory authority’ means an independent public authority which is established
|
| 188 |
+
by a Member State pursuant to Article 51;
|
| 189 |
+
|
| 190 |
+
(22) ‘supervisory authority concerned’ means a supervisory authority which is
|
| 191 |
+
concerned by the processing of personal data because: (a) the controller or processor
|
| 192 |
+
is established on the territory of the Member State of that supervisory authority;
|
| 193 |
+
(b) data subjects residing in the Member State of that supervisory authority are
|
| 194 |
+
substantially affected or likely to be substantially affected by the processing;
|
| 195 |
+
or (c) a complaint has been lodged with that supervisory authority;
|
| 196 |
+
|
| 197 |
+
(23) ‘cross-border processing’ means either: (a) processing of personal data which
|
| 198 |
+
takes place in the context of the activities of establishments in more than one
|
| 199 |
+
Member State of a controller or processor in the Union where the controller or
|
| 200 |
+
processor is established in more than one Member State; or (b) processing of personal
|
| 201 |
+
data which takes place in the context of the activities of a single establishment
|
| 202 |
+
of a controller or processor in the Union but which substantially affects or is
|
| 203 |
+
likely to substantially affect data subjects in more than one Member State.
|
| 204 |
+
|
| 205 |
+
(24) ‘relevant and reasoned objection’ means an objection to a draft decision
|
| 206 |
+
as to whether there is an infringement of this Regulation, or whether envisaged
|
| 207 |
+
action in relation to the controller or processor complies with this Regulation,
|
| 208 |
+
which clearly demonstrates the significance of the risks posed by the draft decision
|
| 209 |
+
as regards the fundamental rights and freedoms of data subjects and, where applicable,
|
| 210 |
+
the free flow of personal data within the Union;
|
| 211 |
+
|
| 212 |
+
(25) ‘information society service’ means a service as defined in point (b) of
|
| 213 |
+
Article 1(1) of Directive (EU) 2015/1535 of the European Parliament and of the
|
| 214 |
+
Council (1);
|
| 215 |
+
|
| 216 |
+
(26) ‘international organisation’ means an organisation and its subordinate bodies
|
| 217 |
+
governed by public international law, or any other body which is set up by, or
|
| 218 |
+
on the basis of, an agreement between two or more countries.'
|
| 219 |
+
- Any processing of personal data should be lawful and fair. It should be transparent
|
| 220 |
+
to natural persons that personal data concerning them are collected, used, consulted
|
| 221 |
+
or otherwise processed and to what extent the personal data are or will be processed.
|
| 222 |
+
The principle of transparency requires that any information and communication
|
| 223 |
+
relating to the processing of those personal data be easily accessible and easy
|
| 224 |
+
to understand, and that clear and plain language be used. That principle concerns,
|
| 225 |
+
in particular, information to the data subjects on the identity of the controller
|
| 226 |
+
and the purposes of the processing and further information to ensure fair and
|
| 227 |
+
transparent processing in respect of the natural persons concerned and their right
|
| 228 |
+
to obtain confirmation and communication of personal data concerning them which
|
| 229 |
+
are being processed. Natural persons should be made aware of risks, rules, safeguards
|
| 230 |
+
and rights in relation to the processing of personal data and how to exercise
|
| 231 |
+
their rights in relation to such processing. In particular, the specific purposes
|
| 232 |
+
for which personal data are processed should be explicit and legitimate and determined
|
| 233 |
+
at the time of the collection of the personal data. The personal data should be
|
| 234 |
+
adequate, relevant and limited to what is necessary for the purposes for which
|
| 235 |
+
they are processed. This requires, in particular, ensuring that the period for
|
| 236 |
+
which the personal data are stored is limited to a strict minimum. Personal data
|
| 237 |
+
should be processed only if the purpose of the processing could not reasonably
|
| 238 |
+
be fulfilled by other means. In order to ensure that the personal data are not
|
| 239 |
+
kept longer than necessary, time limits should be established by the controller
|
| 240 |
+
for erasure or for a periodic review. Every reasonable step should be taken to
|
| 241 |
+
ensure that personal data which are inaccurate are rectified or deleted. Personal
|
| 242 |
+
data should be processed in a manner that ensures appropriate security and confidentiality
|
| 243 |
+
of the personal data, including for preventing unauthorised access to or use of
|
| 244 |
+
personal data and the equipment used for the processing.
|
| 245 |
+
- source_sentence: In what situations could providing information to the data subject
|
| 246 |
+
be considered impossible or involve a disproportionate effort?
|
| 247 |
+
sentences:
|
| 248 |
+
- '1.The controller shall consult the supervisory authority prior to processing
|
| 249 |
+
where a data protection impact assessment under Article 35 indicates that the
|
| 250 |
+
processing would result in a high risk in the absence of measures taken by the
|
| 251 |
+
controller to mitigate the risk.
|
| 252 |
+
|
| 253 |
+
2.Where the supervisory authority is of the opinion that the intended processing
|
| 254 |
+
referred to in paragraph 1 would infringe this Regulation, in particular where
|
| 255 |
+
the controller has insufficiently identified or mitigated the risk, the supervisory
|
| 256 |
+
authority shall, within period of up to eight weeks of receipt of the request
|
| 257 |
+
for consultation, provide written advice to the controller and, where applicable
|
| 258 |
+
to the processor, and may use any of its powers referred to in Article 58. That
|
| 259 |
+
period may be extended by six weeks, taking into account the complexity of the
|
| 260 |
+
intended processing. The supervisory authority shall inform the controller and,
|
| 261 |
+
where applicable, the processor, of any such extension within one month of receipt
|
| 262 |
+
of the request for consultation together with the reasons for the delay. Those
|
| 263 |
+
periods may be suspended until the supervisory authority has obtained information
|
| 264 |
+
it has requested for the purposes of the consultation.
|
| 265 |
+
|
| 266 |
+
3.When consulting the supervisory authority pursuant to paragraph 1, the controller
|
| 267 |
+
shall provide the supervisory authority with: (a) where applicable, the respective
|
| 268 |
+
responsibilities of the controller, joint controllers and processors involved
|
| 269 |
+
in the processing, in particular for processing within a group of undertakings;
|
| 270 |
+
(b) the purposes and means of the intended processing; (c) the measures and
|
| 271 |
+
safeguards provided to protect the rights and freedoms of data subjects pursuant
|
| 272 |
+
to this Regulation; (d) where applicable, the contact details of the data protection
|
| 273 |
+
officer; 4.5.2016 L 119/54 (e) the data protection impact assessment provided
|
| 274 |
+
for in Article 35; and (f) any other information requested by the supervisory
|
| 275 |
+
authority.
|
| 276 |
+
|
| 277 |
+
4.Member States shall consult the supervisory authority during the preparation
|
| 278 |
+
of a proposal for a legislative measure to be adopted by a national parliament,
|
| 279 |
+
or of a regulatory measure based on such a legislative measure, which relates
|
| 280 |
+
to processing.
|
| 281 |
+
|
| 282 |
+
5.Notwithstanding paragraph 1, Member State law may require controllers to consult
|
| 283 |
+
with, and obtain prior authorisation from, the supervisory authority in relation
|
| 284 |
+
to processing by a controller for the performance of a task carried out by the
|
| 285 |
+
controller in the public interest, including processing in relation to social
|
| 286 |
+
protection and public health'
|
| 287 |
+
- "1.The Member States, the supervisory authorities, the Board and the Commission\
|
| 288 |
+
\ shall encourage, in particular at Union level, the establishment of data protection\
|
| 289 |
+
\ certification mechanisms and of data protection seals and marks, for the purpose\
|
| 290 |
+
\ of demonstrating compliance with this Regulation of processing operations by\
|
| 291 |
+
\ controllers and processors. The specific needs of micro, small and medium-sized\
|
| 292 |
+
\ enterprises shall be taken into account. 4.5.2016 L 119/58 \n2.In addition\
|
| 293 |
+
\ to adherence by controllers or processors subject to this Regulation, data protection\
|
| 294 |
+
\ certification mechanisms, seals or marks approved pursuant to paragraph 5 of\
|
| 295 |
+
\ this Article may be established for the purpose of demonstrating the existence\
|
| 296 |
+
\ of appropriate safeguards provided by controllers or processors that are not\
|
| 297 |
+
\ subject to this Regulation pursuant to Article 3 within the framework of personal\
|
| 298 |
+
\ data transfers to third countries or international organisations under the terms\
|
| 299 |
+
\ referred to in point (f) of Article 46(2). Such controllers or processors shall\
|
| 300 |
+
\ make binding and enforceable commitments, via contractual or other legally binding\
|
| 301 |
+
\ instruments, to apply those appropriate safeguards, including with regard to\
|
| 302 |
+
\ the rights of data subjects.\n3.The certification shall be voluntary and available\
|
| 303 |
+
\ via a process that is transparent.\n4.A certification pursuant to this Article\
|
| 304 |
+
\ does not reduce the responsibility of the controller or the processor for compliance\
|
| 305 |
+
\ with this Regulation and is without prejudice to the tasks and powers of the\
|
| 306 |
+
\ supervisory authorities which are competent pursuant to Article 55 or 56\n5.A\
|
| 307 |
+
\ certification pursuant to this Article shall be issued by the certification\
|
| 308 |
+
\ bodies referred to in Article 43 or by the competent supervisory authority,\
|
| 309 |
+
\ on the basis of criteria approved by that competent supervisory authority pursuant\
|
| 310 |
+
\ to Article 58(3) or by the Board pursuant to Article 63. Where the criteria\
|
| 311 |
+
\ are approved by the Board, this may result in a common certification, the European\
|
| 312 |
+
\ Data Protection Seal.\n6.The controller or processor which submits its processing\
|
| 313 |
+
\ to the certification mechanism shall provide the certification body referred\
|
| 314 |
+
\ to in Article 43, or where applicable, the competent supervisory authority,\
|
| 315 |
+
\ with all information and access to its processing activities which are necessary\
|
| 316 |
+
\ to conduct the certification procedure.\n7.Certification shall be issued to\
|
| 317 |
+
\ a controller or processor for a maximum period of three years and may be renewed,\
|
| 318 |
+
\ under the same conditions, provided that the relevant requirements continue\
|
| 319 |
+
\ to be met. Certification shall be withdrawn, as applicable, by the certification\
|
| 320 |
+
\ bodies referred to in Article 43 or by the competent supervisory authority where\
|
| 321 |
+
\ the requirements for the certification are not or are no longer met.\n8.The\
|
| 322 |
+
\ Board shall collate all certification mechanisms and data protection seals and\
|
| 323 |
+
\ marks in a register and shall make them publicly available by any appropriate\
|
| 324 |
+
\ means."
|
| 325 |
+
- However, it is not necessary to impose the obligation to provide information where
|
| 326 |
+
the data subject already possesses the information, where the recording or disclosure
|
| 327 |
+
of the personal data is expressly laid down by law or where the provision of information
|
| 328 |
+
to the data subject proves to be impossible or would involve a disproportionate
|
| 329 |
+
effort. The latter could in particular be the case where processing is carried
|
| 330 |
+
out for archiving purposes in the public interest, scientific or historical research
|
| 331 |
+
purposes or statistical purposes. In that regard, the number of data subjects,
|
| 332 |
+
the age of the data and any appropriate safeguards adopted should be taken into
|
| 333 |
+
consideration.
|
| 334 |
+
- source_sentence: What is the data subject provided with prior to further processing
|
| 335 |
+
of personal data?
|
| 336 |
+
sentences:
|
| 337 |
+
- '1.Where personal data relating to a data subject are collected from the data
|
| 338 |
+
subject, the controller shall, at the time when personal data are obtained, provide
|
| 339 |
+
the data subject with all of the following information: (a) the identity and
|
| 340 |
+
the contact details of the controller and, where applicable, of the controller''s
|
| 341 |
+
representative; (b) the contact details of the data protection officer, where
|
| 342 |
+
applicable; (c) the purposes of the processing for which the personal data are
|
| 343 |
+
intended as well as the legal basis for the processing; 4.5.2016 L 119/40 (d) where
|
| 344 |
+
the processing is based on point (f) of Article 6(1), the legitimate interests
|
| 345 |
+
pursued by the controller or by a third party; (e) the recipients or categories
|
| 346 |
+
of recipients of the personal data, if any; (f) where applicable, the fact that
|
| 347 |
+
the controller intends to transfer personal data to a third country or international
|
| 348 |
+
organisation and the existence or absence of an adequacy decision by the Commission,
|
| 349 |
+
or in the case of transfers referred to in Article 46 or 47, or the second subparagraph
|
| 350 |
+
of Article 49(1), reference to the appropriate or suitable safeguards and the
|
| 351 |
+
means by which to obtain a copy of them or where they have been made available.
|
| 352 |
+
|
| 353 |
+
2.In addition to the information referred to in paragraph 1, the controller shall,
|
| 354 |
+
at the time when personal data are obtained, provide the data subject with the
|
| 355 |
+
following further information necessary to ensure fair and transparent processing:
|
| 356 |
+
(a) the period for which the personal data will be stored, or if that is not
|
| 357 |
+
possible, the criteria used to determine that period; (b) the existence of the
|
| 358 |
+
right to request from the controller access to and rectification or erasure of
|
| 359 |
+
personal data or restriction of processing concerning the data subject or to object
|
| 360 |
+
to processing as well as the right to data portability; (c) where the processing
|
| 361 |
+
is based on point (a) of Article 6(1) or point (a) of Article 9(2), the existence
|
| 362 |
+
of the right to withdraw consent at any time, without affecting the lawfulness
|
| 363 |
+
of processing based on consent before its withdrawal; (d) the right to lodge
|
| 364 |
+
a complaint with a supervisory authority; (e) whether the provision of personal
|
| 365 |
+
data is a statutory or contractual requirement, or a requirement necessary to
|
| 366 |
+
enter into a contract, as well as whether the data subject is obliged to provide
|
| 367 |
+
the personal data and of the possible consequences of failure to provide such
|
| 368 |
+
data; (f) the existence of automated decision-making, including profiling, referred
|
| 369 |
+
to in Article 22(1) and (4) and, at least in those cases, meaningful information
|
| 370 |
+
about the logic involved, as well as the significance and the envisaged consequences
|
| 371 |
+
of such processing for the data subject.
|
| 372 |
+
|
| 373 |
+
3.Where the controller intends to further process the personal data for a purpose
|
| 374 |
+
other than that for which the personal data were collected, the controller shall
|
| 375 |
+
provide the data subject prior to that further processing with information on
|
| 376 |
+
that other purpose and with any relevant further information as referred to in
|
| 377 |
+
paragraph 2
|
| 378 |
+
|
| 379 |
+
4.Paragraphs 1, 2 and 3 shall not apply where and insofar as the data subject
|
| 380 |
+
already has the information.'
|
| 381 |
+
- This Regulation respects and does not prejudice the status under existing constitutional
|
| 382 |
+
law of churches and religious associations or communities in the Member States,
|
| 383 |
+
as recognised in Article 17 TFEU.
|
| 384 |
+
- '1) ''personal data'' means any information relating to an identified or identifiable
|
| 385 |
+
natural person (''data subject''); an identifiable natural person is one who can
|
| 386 |
+
be identified, directly or indirectly, in particular by reference to an identifier
|
| 387 |
+
such as a name, an identification number, location data, an online identifier
|
| 388 |
+
or to one or more factors specific to the physical, physiological, genetic, mental,
|
| 389 |
+
economic, cultural or social identity of that natural person;
|
| 390 |
+
|
| 391 |
+
(2) ‘processing’ means any operation or set of operations which is performed on
|
| 392 |
+
personal data or on sets of personal data, whether or not by automated means,
|
| 393 |
+
such as collection, recording, organisation, structuring, storage, adaptation
|
| 394 |
+
or alteration, retrieval, consultation, use, disclosure by transmission, dissemination
|
| 395 |
+
or otherwise making available, alignment or combination, restriction, erasure
|
| 396 |
+
or destruction;
|
| 397 |
+
|
| 398 |
+
(3) ‘restriction of processing’ means the marking of stored personal data with
|
| 399 |
+
the aim of limiting their processing in the future;
|
| 400 |
+
|
| 401 |
+
(4) ‘profiling’ means any form of automated processing of personal data consisting
|
| 402 |
+
of the use of personal data to evaluate certain personal aspects relating to a
|
| 403 |
+
natural person, in particular to analyse or predict aspects concerning that natural
|
| 404 |
+
person''s performance at work, economic situation, health, personal preferences,
|
| 405 |
+
interests, reliability, behaviour, location or movements;
|
| 406 |
+
|
| 407 |
+
(5) ‘pseudonymisation’ means the processing of personal data in such a manner
|
| 408 |
+
that the personal data can no longer be attributed to a specific data subject
|
| 409 |
+
without the use of additional information, provided that such additional information
|
| 410 |
+
is kept separately and is subject to technical and organisational measures to
|
| 411 |
+
ensure that the personal data are not attributed to an identified or identifiable
|
| 412 |
+
natural person;
|
| 413 |
+
|
| 414 |
+
(6) ‘filing system’ means any structured set of personal data which are accessible
|
| 415 |
+
according to specific criteria, whether centralised, decentralised or dispersed
|
| 416 |
+
on a functional or geographical basis;
|
| 417 |
+
|
| 418 |
+
(7) ‘controller’ means the natural or legal person, public authority, agency or
|
| 419 |
+
other body which, alone or jointly with others, determines the purposes and means
|
| 420 |
+
of the processing of personal data; where the purposes and means of such processing
|
| 421 |
+
are determined by Union or Member State law, the controller or the specific criteria
|
| 422 |
+
for its nomination may be provided for by Union or Member State law;
|
| 423 |
+
|
| 424 |
+
(8) ‘processor’ means a natural or legal person, public authority, agency or other
|
| 425 |
+
body which processes personal data on behalf of the controller;
|
| 426 |
+
|
| 427 |
+
(9) ‘recipient’ means a natural or legal person, public authority, agency or another
|
| 428 |
+
body, to which the personal data are disclosed, whether a third party or not.
|
| 429 |
+
However, public authorities which may receive personal data in the framework of
|
| 430 |
+
a particular inquiry in accordance with Union or Member State law shall not be
|
| 431 |
+
regarded as recipients; the processing of those data by those public authorities
|
| 432 |
+
shall be in compliance with the applicable data protection rules according to
|
| 433 |
+
the purposes of the processing;
|
| 434 |
+
|
| 435 |
+
(10) ‘third party’ means a natural or legal person, public authority, agency or
|
| 436 |
+
body other than the data subject, controller, processor and persons who, under
|
| 437 |
+
the direct authority of the controller or processor, are authorised to process
|
| 438 |
+
personal data;
|
| 439 |
+
|
| 440 |
+
(11) ‘consent’ of the data subject means any freely given, specific, informed
|
| 441 |
+
and unambiguous indication of the data subject''s wishes by which he or she, by
|
| 442 |
+
a statement or by a clear affirmative action, signifies agreement to the processing
|
| 443 |
+
of personal data relating to him or her;
|
| 444 |
+
|
| 445 |
+
(12) ‘personal data breach’ means a breach of security leading to the accidental
|
| 446 |
+
or unlawful destruction, loss, alteration, unauthorised disclosure of, or access
|
| 447 |
+
to, personal data transmitted, stored or otherwise processed;
|
| 448 |
+
|
| 449 |
+
(13) ‘genetic data’ means personal data relating to the inherited or acquired
|
| 450 |
+
genetic characteristics of a natural person which give unique information about
|
| 451 |
+
the physiology or the health of that natural person and which result, in particular,
|
| 452 |
+
from an analysis of a biological sample from the natural person in question;
|
| 453 |
+
|
| 454 |
+
(14) ‘biometric data’ means personal data resulting from specific technical processing
|
| 455 |
+
relating to the physical, physiological or behavioural characteristics of a natural
|
| 456 |
+
person, which allow or confirm the unique identification of that natural person,
|
| 457 |
+
such as facial images or dactyloscopic data;
|
| 458 |
+
|
| 459 |
+
(15) ‘data concerning health’ means personal data related to the physical or mental
|
| 460 |
+
health of a natural person, including the provision of health care services, which
|
| 461 |
+
reveal information about his or her health status;
|
| 462 |
+
|
| 463 |
+
(16) ‘main establishment’ means: (a) as regards a controller with establishments
|
| 464 |
+
in more than one Member State, the place of its central administration in the
|
| 465 |
+
Union, unless the decisions on the purposes and means of the processing of personal
|
| 466 |
+
data are taken in another establishment of the controller in the Union and the
|
| 467 |
+
latter establishment has the power to have such decisions implemented, in which
|
| 468 |
+
case the establishment having taken such decisions is to be considered to be the
|
| 469 |
+
main establishment; (b) as regards a processor with establishments in more than
|
| 470 |
+
one Member State, the place of its central administration in the Union, or, if
|
| 471 |
+
the processor has no central administration in the Union, the establishment of
|
| 472 |
+
the processor in the Union where the main processing activities in the context
|
| 473 |
+
of the activities of an establishment of the processor take place to the extent
|
| 474 |
+
that the processor is subject to specific obligations under this Regulation;
|
| 475 |
+
|
| 476 |
+
(17) ‘representative’ means a natural or legal person established in the Union
|
| 477 |
+
who, designated by the controller or processor in writing pursuant to Article
|
| 478 |
+
27, represents the controller or processor with regard to their respective obligations
|
| 479 |
+
under this Regulation;
|
| 480 |
+
|
| 481 |
+
(18) ‘enterprise’ means a natural or legal person engaged in an economic activity,
|
| 482 |
+
irrespective of its legal form, including partnerships or associations regularly
|
| 483 |
+
engaged in an economic activity;
|
| 484 |
+
|
| 485 |
+
(19) ‘group of undertakings’ means a controlling undertaking and its controlled
|
| 486 |
+
undertakings;
|
| 487 |
+
|
| 488 |
+
(20) ‘binding corporate rules’ means personal data protection policies which are
|
| 489 |
+
adhered to by a controller or processor established on the territory of a Member
|
| 490 |
+
State for transfers or a set of transfers of personal data to a controller or
|
| 491 |
+
processor in one or more third countries within a group of undertakings, or group
|
| 492 |
+
of enterprises engaged in a joint economic activity;
|
| 493 |
+
|
| 494 |
+
(21) ‘supervisory authority’ means an independent public authority which is established
|
| 495 |
+
by a Member State pursuant to Article 51;
|
| 496 |
+
|
| 497 |
+
(22) ‘supervisory authority concerned’ means a supervisory authority which is
|
| 498 |
+
concerned by the processing of personal data because: (a) the controller or processor
|
| 499 |
+
is established on the territory of the Member State of that supervisory authority;
|
| 500 |
+
(b) data subjects residing in the Member State of that supervisory authority are
|
| 501 |
+
substantially affected or likely to be substantially affected by the processing;
|
| 502 |
+
or (c) a complaint has been lodged with that supervisory authority;
|
| 503 |
+
|
| 504 |
+
(23) ‘cross-border processing’ means either: (a) processing of personal data which
|
| 505 |
+
takes place in the context of the activities of establishments in more than one
|
| 506 |
+
Member State of a controller or processor in the Union where the controller or
|
| 507 |
+
processor is established in more than one Member State; or (b) processing of personal
|
| 508 |
+
data which takes place in the context of the activities of a single establishment
|
| 509 |
+
of a controller or processor in the Union but which substantially affects or is
|
| 510 |
+
likely to substantially affect data subjects in more than one Member State.
|
| 511 |
+
|
| 512 |
+
(24) ‘relevant and reasoned objection’ means an objection to a draft decision
|
| 513 |
+
as to whether there is an infringement of this Regulation, or whether envisaged
|
| 514 |
+
action in relation to the controller or processor complies with this Regulation,
|
| 515 |
+
which clearly demonstrates the significance of the risks posed by the draft decision
|
| 516 |
+
as regards the fundamental rights and freedoms of data subjects and, where applicable,
|
| 517 |
+
the free flow of personal data within the Union;
|
| 518 |
+
|
| 519 |
+
(25) ‘information society service’ means a service as defined in point (b) of
|
| 520 |
+
Article 1(1) of Directive (EU) 2015/1535 of the European Parliament and of the
|
| 521 |
+
Council (1);
|
| 522 |
+
|
| 523 |
+
(26) ‘international organisation’ means an organisation and its subordinate bodies
|
| 524 |
+
governed by public international law, or any other body which is set up by, or
|
| 525 |
+
on the basis of, an agreement between two or more countries.'
|
| 526 |
+
- source_sentence: What type of data may be processed for purposes related to point
|
| 527 |
+
(h) of paragraph 2?
|
| 528 |
+
sentences:
|
| 529 |
+
- '1.Processing of personal data revealing racial or ethnic origin, political opinions,
|
| 530 |
+
religious or philosophical beliefs, or trade union membership, and the processing
|
| 531 |
+
of genetic data, biometric data for the purpose of uniquely identifying a natural
|
| 532 |
+
person, data concerning health or data concerning a natural person''s sex life
|
| 533 |
+
or sexual orientation shall be prohibited.
|
| 534 |
+
|
| 535 |
+
2.Paragraph 1 shall not apply if one of the following applies: (a) the data subject
|
| 536 |
+
has given explicit consent to the processing of those personal data for one or
|
| 537 |
+
more specified purposes, except where Union or Member State law provide that the
|
| 538 |
+
prohibition referred to in paragraph 1 may not be lifted by the data subject;
|
| 539 |
+
(b) processing is necessary for the purposes of carrying out the obligations
|
| 540 |
+
and exercising specific rights of the controller or of the data subject in the
|
| 541 |
+
field of employment and social security and social protection law in so far as
|
| 542 |
+
it is authorised by Union or Member State law or a collective agreement pursuant
|
| 543 |
+
to Member State law providing for appropriate safeguards for the fundamental rights
|
| 544 |
+
and the interests of the data subject; (c) processing is necessary to protect
|
| 545 |
+
the vital interests of the data subject or of another natural person where the
|
| 546 |
+
data subject is physically or legally incapable of giving consent; (d) processing
|
| 547 |
+
is carried out in the course of its legitimate activities with appropriate safeguards
|
| 548 |
+
by a foundation, association or any other not-for-profit body with a political,
|
| 549 |
+
philosophical, religious or trade union aim and on condition that the processing
|
| 550 |
+
relates solely to the members or to former members of the body or to persons who
|
| 551 |
+
have regular contact with it in connection with its purposes and that the personal
|
| 552 |
+
data are not disclosed outside that body without the consent of the data subjects;
|
| 553 |
+
(e) processing relates to personal data which are manifestly made public by the
|
| 554 |
+
data subject; (f) processing is necessary for the establishment, exercise or
|
| 555 |
+
defence of legal claims or whenever courts are acting in their judicial capacity;
|
| 556 |
+
(g) processing is necessary for reasons of substantial public interest, on the
|
| 557 |
+
basis of Union or Member State law which shall be proportionate to the aim pursued,
|
| 558 |
+
respect the essence of the right to data protection and provide for suitable and
|
| 559 |
+
specific measures to safeguard the fundamental rights and the interests of the
|
| 560 |
+
data subject; (h) processing is necessary for the purposes of preventive or occupational
|
| 561 |
+
medicine, for the assessment of the working capacity of the employee, medical
|
| 562 |
+
diagnosis, the provision of health or social care or treatment or the management
|
| 563 |
+
of health or social care systems and services on the basis of Union or Member
|
| 564 |
+
State law or pursuant to contract with a health professional and subject to the
|
| 565 |
+
conditions and safeguards referred to in paragraph 3; (i) processing is necessary
|
| 566 |
+
for reasons of public interest in the area of public health, such as protecting
|
| 567 |
+
against serious cross-border threats to health or ensuring high standards of quality
|
| 568 |
+
and safety of health care and of medicinal products or medical devices, on the
|
| 569 |
+
basis of Union or Member State law which provides for suitable and specific measures
|
| 570 |
+
to safeguard the rights and freedoms of the data subject, in particular professional
|
| 571 |
+
secrecy; 4.5.2016 L 119/38 (j) processing is necessary for archiving purposes
|
| 572 |
+
in the public interest, scientific or historical research purposes or statistical
|
| 573 |
+
purposes in accordance with Article 89(1) based on Union or Member State law which
|
| 574 |
+
shall be proportionate to the aim pursued, respect the essence of the right to
|
| 575 |
+
data protection and provide for suitable and specific measures to safeguard the
|
| 576 |
+
fundamental rights and the interests of the data subject.
|
| 577 |
+
|
| 578 |
+
3.Personal data referred to in paragraph 1 may be processed for the purposes referred
|
| 579 |
+
to in point (h) of paragraph 2 when those data are processed by or under the responsibility
|
| 580 |
+
of a professional subject to the obligation of professional secrecy under Union
|
| 581 |
+
or Member State law or rules established by national competent bodies or by another
|
| 582 |
+
person also subject to an obligation of secrecy under Union or Member State law
|
| 583 |
+
or rules established by national competent bodies.
|
| 584 |
+
|
| 585 |
+
4.Member States may maintain or introduce further conditions, including limitations,
|
| 586 |
+
with regard to the processing of genetic data, biometric data or data concerning
|
| 587 |
+
health.'
|
| 588 |
+
- '1.The data protection officer shall have at least the following tasks: (a) to
|
| 589 |
+
inform and advise the controller or the processor and the employees who carry
|
| 590 |
+
out processing of their obligations pursuant to this Regulation and to other Union
|
| 591 |
+
or Member State data protection provisions; (b) to monitor compliance with this
|
| 592 |
+
Regulation, with other Union or Member State data protection provisions and with
|
| 593 |
+
the policies of the controller or processor in relation to the protection of personal
|
| 594 |
+
data, including the assignment of responsibilities, awareness-raising and training
|
| 595 |
+
of staff involved in processing operations, and the related audits; (c) to provide
|
| 596 |
+
advice where requested as regards the data protection impact assessment and monitor
|
| 597 |
+
its performance pursuant to Article 35; (d) to cooperate with the supervisory
|
| 598 |
+
authority; (e) to act as the contact point for the supervisory authority on issues
|
| 599 |
+
relating to processing, including the prior consultation referred to in Article
|
| 600 |
+
36, and to consult, where appropriate, with regard to any other matter.
|
| 601 |
+
|
| 602 |
+
2.The data protection officer shall in the performance of his or her tasks have
|
| 603 |
+
due regard to the risk associated with processing operations, taking into account
|
| 604 |
+
the nature, scope, context and purposes of processing. Section 5 Codes of conduct
|
| 605 |
+
and certification'
|
| 606 |
+
- Processing should be lawful where it is necessary in the context of a contract
|
| 607 |
+
or the intention to enter into a contract.
|
| 608 |
+
- source_sentence: What may impede authorities in the discharge of their responsibilities
|
| 609 |
+
under Union law?
|
| 610 |
+
sentences:
|
| 611 |
+
- '1.The controller and the processor shall designate a data protection officer
|
| 612 |
+
in any case where: (a) the processing is carried out by a public authority or
|
| 613 |
+
body, except for courts acting in their judicial capacity; (b) the core activities
|
| 614 |
+
of the controller or the processor consist of processing operations which, by
|
| 615 |
+
virtue of their nature, their scope and/or their purposes, require regular and
|
| 616 |
+
systematic monitoring of data subjects on a large scale; or (c) the core activities
|
| 617 |
+
of the controller or the processor consist of processing on a large scale of special
|
| 618 |
+
categories of data pursuant to Article 9 and personal data relating to criminal
|
| 619 |
+
convictions and offences referred to in Article 10
|
| 620 |
+
|
| 621 |
+
2.A group of undertakings may appoint a single data protection officer provided
|
| 622 |
+
that a data protection officer is easily accessible from each establishment.
|
| 623 |
+
|
| 624 |
+
3.Where the controller or the processor is a public authority or body, a single
|
| 625 |
+
data protection officer may be designated for several such authorities or bodies,
|
| 626 |
+
taking account of their organisational structure and size.
|
| 627 |
+
|
| 628 |
+
4.In cases other than those referred to in paragraph 1, the controller or processor
|
| 629 |
+
or associations and other bodies representing categories of controllers or processors
|
| 630 |
+
may or, where required by Union or Member State law shall, designate a data protection
|
| 631 |
+
officer. The data protection officer may act for such associations and other bodies
|
| 632 |
+
representing controllers or processors.
|
| 633 |
+
|
| 634 |
+
5.The data protection officer shall be designated on the basis of professional
|
| 635 |
+
qualities and, in particular, expert knowledge of data protection law and practices
|
| 636 |
+
and the ability to fulfil the tasks referred to in Article 39
|
| 637 |
+
|
| 638 |
+
6.The data protection officer may be a staff member of the controller or processor,
|
| 639 |
+
or fulfil the tasks on the basis of a service contract.
|
| 640 |
+
|
| 641 |
+
7.The controller or the processor shall publish the contact details of the data
|
| 642 |
+
protection officer and communicate them to the supervisory authority.'
|
| 643 |
+
- This Regulation is without prejudice to international agreements concluded between
|
| 644 |
+
the Union and third countries regulating the transfer of personal data including
|
| 645 |
+
appropriate safeguards for the data subjects. Member States may conclude international
|
| 646 |
+
agreements which involve the transfer of personal data to third countries or international
|
| 647 |
+
organisations, as far as such agreements do not affect this Regulation or any
|
| 648 |
+
other provisions of Union law and include an appropriate level of protection for
|
| 649 |
+
the fundamental rights of the data subjects.
|
| 650 |
+
- The objectives and principles of Directive 95/46/EC remain sound, but it has not
|
| 651 |
+
prevented fragmentation in the implementation of data protection across the Union,
|
| 652 |
+
legal uncertainty or a widespread public perception that there are significant
|
| 653 |
+
risks to the protection of natural persons, in particular with regard to online
|
| 654 |
+
activity. Differences in the level of protection of the rights and freedoms of
|
| 655 |
+
natural persons, in particular the right to the protection of personal data, with
|
| 656 |
+
regard to the processing of personal data in the Member States may prevent the
|
| 657 |
+
free flow of personal data throughout the Union. Those differences may therefore
|
| 658 |
+
constitute an obstacle to the pursuit of economic activities at the level of the
|
| 659 |
+
Union, distort competition and impede authorities in the discharge of their responsibilities
|
| 660 |
+
under Union law. Such a difference in levels of protection is due to the existence
|
| 661 |
+
of differences in the implementation and application of Directive 95/46/EC.
|
| 662 |
+
pipeline_tag: sentence-similarity
|
| 663 |
+
library_name: sentence-transformers
|
| 664 |
+
metrics:
|
| 665 |
+
- cosine_accuracy@1
|
| 666 |
+
- cosine_accuracy@3
|
| 667 |
+
- cosine_accuracy@5
|
| 668 |
+
- cosine_accuracy@10
|
| 669 |
+
- cosine_precision@1
|
| 670 |
+
- cosine_precision@3
|
| 671 |
+
- cosine_precision@5
|
| 672 |
+
- cosine_precision@10
|
| 673 |
+
- cosine_recall@1
|
| 674 |
+
- cosine_recall@3
|
| 675 |
+
- cosine_recall@5
|
| 676 |
+
- cosine_recall@10
|
| 677 |
+
- cosine_ndcg@10
|
| 678 |
+
- cosine_mrr@10
|
| 679 |
+
- cosine_map@100
|
| 680 |
+
model-index:
|
| 681 |
+
- name: multilingual-e5-large
|
| 682 |
+
results:
|
| 683 |
+
- task:
|
| 684 |
+
type: information-retrieval
|
| 685 |
+
name: Information Retrieval
|
| 686 |
+
dataset:
|
| 687 |
+
name: dim 1024
|
| 688 |
+
type: dim_1024
|
| 689 |
+
metrics:
|
| 690 |
+
- type: cosine_accuracy@1
|
| 691 |
+
value: 0.36235595390524966
|
| 692 |
+
name: Cosine Accuracy@1
|
| 693 |
+
- type: cosine_accuracy@3
|
| 694 |
+
value: 0.3681177976952625
|
| 695 |
+
name: Cosine Accuracy@3
|
| 696 |
+
- type: cosine_accuracy@5
|
| 697 |
+
value: 0.39308578745198464
|
| 698 |
+
name: Cosine Accuracy@5
|
| 699 |
+
- type: cosine_accuracy@10
|
| 700 |
+
value: 0.4334186939820743
|
| 701 |
+
name: Cosine Accuracy@10
|
| 702 |
+
- type: cosine_precision@1
|
| 703 |
+
value: 0.36235595390524966
|
| 704 |
+
name: Cosine Precision@1
|
| 705 |
+
- type: cosine_precision@3
|
| 706 |
+
value: 0.36192915066154496
|
| 707 |
+
name: Cosine Precision@3
|
| 708 |
+
- type: cosine_precision@5
|
| 709 |
+
value: 0.35172855313700385
|
| 710 |
+
name: Cosine Precision@5
|
| 711 |
+
- type: cosine_precision@10
|
| 712 |
+
value: 0.3176696542893726
|
| 713 |
+
name: Cosine Precision@10
|
| 714 |
+
- type: cosine_recall@1
|
| 715 |
+
value: 0.04346309464734114
|
| 716 |
+
name: Cosine Recall@1
|
| 717 |
+
- type: cosine_recall@3
|
| 718 |
+
value: 0.12757812796185336
|
| 719 |
+
name: Cosine Recall@3
|
| 720 |
+
- type: cosine_recall@5
|
| 721 |
+
value: 0.19200836801442767
|
| 722 |
+
name: Cosine Recall@5
|
| 723 |
+
- type: cosine_recall@10
|
| 724 |
+
value: 0.28096984500258326
|
| 725 |
+
name: Cosine Recall@10
|
| 726 |
+
- type: cosine_ndcg@10
|
| 727 |
+
value: 0.3858809020056271
|
| 728 |
+
name: Cosine Ndcg@10
|
| 729 |
+
- type: cosine_mrr@10
|
| 730 |
+
value: 0.37430415828303115
|
| 731 |
+
name: Cosine Mrr@10
|
| 732 |
+
- type: cosine_map@100
|
| 733 |
+
value: 0.45394800707643057
|
| 734 |
+
name: Cosine Map@100
|
| 735 |
+
- task:
|
| 736 |
+
type: information-retrieval
|
| 737 |
+
name: Information Retrieval
|
| 738 |
+
dataset:
|
| 739 |
+
name: dim 768
|
| 740 |
+
type: dim_768
|
| 741 |
+
metrics:
|
| 742 |
+
- type: cosine_accuracy@1
|
| 743 |
+
value: 0.3591549295774648
|
| 744 |
+
name: Cosine Accuracy@1
|
| 745 |
+
- type: cosine_accuracy@3
|
| 746 |
+
value: 0.3649167733674776
|
| 747 |
+
name: Cosine Accuracy@3
|
| 748 |
+
- type: cosine_accuracy@5
|
| 749 |
+
value: 0.3892445582586428
|
| 750 |
+
name: Cosine Accuracy@5
|
| 751 |
+
- type: cosine_accuracy@10
|
| 752 |
+
value: 0.4334186939820743
|
| 753 |
+
name: Cosine Accuracy@10
|
| 754 |
+
- type: cosine_precision@1
|
| 755 |
+
value: 0.3591549295774648
|
| 756 |
+
name: Cosine Precision@1
|
| 757 |
+
- type: cosine_precision@3
|
| 758 |
+
value: 0.3587281263337601
|
| 759 |
+
name: Cosine Precision@3
|
| 760 |
+
- type: cosine_precision@5
|
| 761 |
+
value: 0.34852752880921894
|
| 762 |
+
name: Cosine Precision@5
|
| 763 |
+
- type: cosine_precision@10
|
| 764 |
+
value: 0.31670934699103714
|
| 765 |
+
name: Cosine Precision@10
|
| 766 |
+
- type: cosine_recall@1
|
| 767 |
+
value: 0.04250079684114586
|
| 768 |
+
name: Cosine Recall@1
|
| 769 |
+
- type: cosine_recall@3
|
| 770 |
+
value: 0.12462187901616553
|
| 771 |
+
name: Cosine Recall@3
|
| 772 |
+
- type: cosine_recall@5
|
| 773 |
+
value: 0.1875478484365334
|
| 774 |
+
name: Cosine Recall@5
|
| 775 |
+
- type: cosine_recall@10
|
| 776 |
+
value: 0.27695909667507057
|
| 777 |
+
name: Cosine Recall@10
|
| 778 |
+
- type: cosine_ndcg@10
|
| 779 |
+
value: 0.38308181752122755
|
| 780 |
+
name: Cosine Ndcg@10
|
| 781 |
+
- type: cosine_mrr@10
|
| 782 |
+
value: 0.37149335406377615
|
| 783 |
+
name: Cosine Mrr@10
|
| 784 |
+
- type: cosine_map@100
|
| 785 |
+
value: 0.4493001842217619
|
| 786 |
+
name: Cosine Map@100
|
| 787 |
+
- task:
|
| 788 |
+
type: information-retrieval
|
| 789 |
+
name: Information Retrieval
|
| 790 |
+
dataset:
|
| 791 |
+
name: dim 512
|
| 792 |
+
type: dim_512
|
| 793 |
+
metrics:
|
| 794 |
+
- type: cosine_accuracy@1
|
| 795 |
+
value: 0.35979513444302175
|
| 796 |
+
name: Cosine Accuracy@1
|
| 797 |
+
- type: cosine_accuracy@3
|
| 798 |
+
value: 0.36555697823303457
|
| 799 |
+
name: Cosine Accuracy@3
|
| 800 |
+
- type: cosine_accuracy@5
|
| 801 |
+
value: 0.3911651728553137
|
| 802 |
+
name: Cosine Accuracy@5
|
| 803 |
+
- type: cosine_accuracy@10
|
| 804 |
+
value: 0.4334186939820743
|
| 805 |
+
name: Cosine Accuracy@10
|
| 806 |
+
- type: cosine_precision@1
|
| 807 |
+
value: 0.35979513444302175
|
| 808 |
+
name: Cosine Precision@1
|
| 809 |
+
- type: cosine_precision@3
|
| 810 |
+
value: 0.35936833119931705
|
| 811 |
+
name: Cosine Precision@3
|
| 812 |
+
- type: cosine_precision@5
|
| 813 |
+
value: 0.34967989756722156
|
| 814 |
+
name: Cosine Precision@5
|
| 815 |
+
- type: cosine_precision@10
|
| 816 |
+
value: 0.3173495518565941
|
| 817 |
+
name: Cosine Precision@10
|
| 818 |
+
- type: cosine_recall@1
|
| 819 |
+
value: 0.04265405128130224
|
| 820 |
+
name: Cosine Recall@1
|
| 821 |
+
- type: cosine_recall@3
|
| 822 |
+
value: 0.12523102347193127
|
| 823 |
+
name: Cosine Recall@3
|
| 824 |
+
- type: cosine_recall@5
|
| 825 |
+
value: 0.18912519336740205
|
| 826 |
+
name: Cosine Recall@5
|
| 827 |
+
- type: cosine_recall@10
|
| 828 |
+
value: 0.2781876565001863
|
| 829 |
+
name: Cosine Recall@10
|
| 830 |
+
- type: cosine_ndcg@10
|
| 831 |
+
value: 0.3843750966464458
|
| 832 |
+
name: Cosine Ndcg@10
|
| 833 |
+
- type: cosine_mrr@10
|
| 834 |
+
value: 0.37212542934373866
|
| 835 |
+
name: Cosine Mrr@10
|
| 836 |
+
- type: cosine_map@100
|
| 837 |
+
value: 0.4476805587612892
|
| 838 |
+
name: Cosine Map@100
|
| 839 |
+
- task:
|
| 840 |
+
type: information-retrieval
|
| 841 |
+
name: Information Retrieval
|
| 842 |
+
dataset:
|
| 843 |
+
name: dim 256
|
| 844 |
+
type: dim_256
|
| 845 |
+
metrics:
|
| 846 |
+
- type: cosine_accuracy@1
|
| 847 |
+
value: 0.3437900128040973
|
| 848 |
+
name: Cosine Accuracy@1
|
| 849 |
+
- type: cosine_accuracy@3
|
| 850 |
+
value: 0.34763124199743917
|
| 851 |
+
name: Cosine Accuracy@3
|
| 852 |
+
- type: cosine_accuracy@5
|
| 853 |
+
value: 0.3764404609475032
|
| 854 |
+
name: Cosine Accuracy@5
|
| 855 |
+
- type: cosine_accuracy@10
|
| 856 |
+
value: 0.41101152368758004
|
| 857 |
+
name: Cosine Accuracy@10
|
| 858 |
+
- type: cosine_precision@1
|
| 859 |
+
value: 0.3437900128040973
|
| 860 |
+
name: Cosine Precision@1
|
| 861 |
+
- type: cosine_precision@3
|
| 862 |
+
value: 0.342936406316688
|
| 863 |
+
name: Cosine Precision@3
|
| 864 |
+
- type: cosine_precision@5
|
| 865 |
+
value: 0.33457106274007686
|
| 866 |
+
name: Cosine Precision@5
|
| 867 |
+
- type: cosine_precision@10
|
| 868 |
+
value: 0.3040973111395647
|
| 869 |
+
name: Cosine Precision@10
|
| 870 |
+
- type: cosine_recall@1
|
| 871 |
+
value: 0.04013102608834382
|
| 872 |
+
name: Cosine Recall@1
|
| 873 |
+
- type: cosine_recall@3
|
| 874 |
+
value: 0.11771735023719074
|
| 875 |
+
name: Cosine Recall@3
|
| 876 |
+
- type: cosine_recall@5
|
| 877 |
+
value: 0.17837935755014916
|
| 878 |
+
name: Cosine Recall@5
|
| 879 |
+
- type: cosine_recall@10
|
| 880 |
+
value: 0.2648598688529433
|
| 881 |
+
name: Cosine Recall@10
|
| 882 |
+
- type: cosine_ndcg@10
|
| 883 |
+
value: 0.3670052960875804
|
| 884 |
+
name: Cosine Ndcg@10
|
| 885 |
+
- type: cosine_mrr@10
|
| 886 |
+
value: 0.3551361197487955
|
| 887 |
+
name: Cosine Mrr@10
|
| 888 |
+
- type: cosine_map@100
|
| 889 |
+
value: 0.4298669852983799
|
| 890 |
+
name: Cosine Map@100
|
| 891 |
+
- task:
|
| 892 |
+
type: information-retrieval
|
| 893 |
+
name: Information Retrieval
|
| 894 |
+
dataset:
|
| 895 |
+
name: dim 128
|
| 896 |
+
type: dim_128
|
| 897 |
+
metrics:
|
| 898 |
+
- type: cosine_accuracy@1
|
| 899 |
+
value: 0.3085787451984635
|
| 900 |
+
name: Cosine Accuracy@1
|
| 901 |
+
- type: cosine_accuracy@3
|
| 902 |
+
value: 0.31241997439180536
|
| 903 |
+
name: Cosine Accuracy@3
|
| 904 |
+
- type: cosine_accuracy@5
|
| 905 |
+
value: 0.3361075544174136
|
| 906 |
+
name: Cosine Accuracy@5
|
| 907 |
+
- type: cosine_accuracy@10
|
| 908 |
+
value: 0.37964148527528807
|
| 909 |
+
name: Cosine Accuracy@10
|
| 910 |
+
- type: cosine_precision@1
|
| 911 |
+
value: 0.3085787451984635
|
| 912 |
+
name: Cosine Precision@1
|
| 913 |
+
- type: cosine_precision@3
|
| 914 |
+
value: 0.3079385403329065
|
| 915 |
+
name: Cosine Precision@3
|
| 916 |
+
- type: cosine_precision@5
|
| 917 |
+
value: 0.29961587708066584
|
| 918 |
+
name: Cosine Precision@5
|
| 919 |
+
- type: cosine_precision@10
|
| 920 |
+
value: 0.2752880921895006
|
| 921 |
+
name: Cosine Precision@10
|
| 922 |
+
- type: cosine_recall@1
|
| 923 |
+
value: 0.036297623853982414
|
| 924 |
+
name: Cosine Recall@1
|
| 925 |
+
- type: cosine_recall@3
|
| 926 |
+
value: 0.10638786483158841
|
| 927 |
+
name: Cosine Recall@3
|
| 928 |
+
- type: cosine_recall@5
|
| 929 |
+
value: 0.16032639984514846
|
| 930 |
+
name: Cosine Recall@5
|
| 931 |
+
- type: cosine_recall@10
|
| 932 |
+
value: 0.24000960695821508
|
| 933 |
+
name: Cosine Recall@10
|
| 934 |
+
- type: cosine_ndcg@10
|
| 935 |
+
value: 0.3312285498294292
|
| 936 |
+
name: Cosine Ndcg@10
|
| 937 |
+
- type: cosine_mrr@10
|
| 938 |
+
value: 0.3199812511432227
|
| 939 |
+
name: Cosine Mrr@10
|
| 940 |
+
- type: cosine_map@100
|
| 941 |
+
value: 0.3963095303049961
|
| 942 |
+
name: Cosine Map@100
|
| 943 |
+
- task:
|
| 944 |
+
type: information-retrieval
|
| 945 |
+
name: Information Retrieval
|
| 946 |
+
dataset:
|
| 947 |
+
name: dim 64
|
| 948 |
+
type: dim_64
|
| 949 |
+
metrics:
|
| 950 |
+
- type: cosine_accuracy@1
|
| 951 |
+
value: 0.2740076824583867
|
| 952 |
+
name: Cosine Accuracy@1
|
| 953 |
+
- type: cosine_accuracy@3
|
| 954 |
+
value: 0.27848911651728553
|
| 955 |
+
name: Cosine Accuracy@3
|
| 956 |
+
- type: cosine_accuracy@5
|
| 957 |
+
value: 0.30153649167733676
|
| 958 |
+
name: Cosine Accuracy@5
|
| 959 |
+
- type: cosine_accuracy@10
|
| 960 |
+
value: 0.3354673495518566
|
| 961 |
+
name: Cosine Accuracy@10
|
| 962 |
+
- type: cosine_precision@1
|
| 963 |
+
value: 0.2740076824583867
|
| 964 |
+
name: Cosine Precision@1
|
| 965 |
+
- type: cosine_precision@3
|
| 966 |
+
value: 0.27315407597097735
|
| 967 |
+
name: Cosine Precision@3
|
| 968 |
+
- type: cosine_precision@5
|
| 969 |
+
value: 0.2670934699103713
|
| 970 |
+
name: Cosine Precision@5
|
| 971 |
+
- type: cosine_precision@10
|
| 972 |
+
value: 0.24571062740076827
|
| 973 |
+
name: Cosine Precision@10
|
| 974 |
+
- type: cosine_recall@1
|
| 975 |
+
value: 0.03167890172057568
|
| 976 |
+
name: Cosine Recall@1
|
| 977 |
+
- type: cosine_recall@3
|
| 978 |
+
value: 0.09267023360511464
|
| 979 |
+
name: Cosine Recall@3
|
| 980 |
+
- type: cosine_recall@5
|
| 981 |
+
value: 0.14048625468314752
|
| 982 |
+
name: Cosine Recall@5
|
| 983 |
+
- type: cosine_recall@10
|
| 984 |
+
value: 0.21092883720941633
|
| 985 |
+
name: Cosine Recall@10
|
| 986 |
+
- type: cosine_ndcg@10
|
| 987 |
+
value: 0.29402896525927075
|
| 988 |
+
name: Cosine Ndcg@10
|
| 989 |
+
- type: cosine_mrr@10
|
| 990 |
+
value: 0.28429414873076814
|
| 991 |
+
name: Cosine Mrr@10
|
| 992 |
+
- type: cosine_map@100
|
| 993 |
+
value: 0.3539045084602349
|
| 994 |
+
name: Cosine Map@100
|
| 995 |
+
---
|
| 996 |
+
|
| 997 |
+
# multilingual-e5-large
|
| 998 |
+
|
| 999 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [intfloat/multilingual-e5-large](https://huggingface.co/intfloat/multilingual-e5-large). It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
| 1000 |
+
|
| 1001 |
+
## Model Details
|
| 1002 |
+
|
| 1003 |
+
### Model Description
|
| 1004 |
+
- **Model Type:** Sentence Transformer
|
| 1005 |
+
- **Base model:** [intfloat/multilingual-e5-large](https://huggingface.co/intfloat/multilingual-e5-large) <!-- at revision 0dc5580a448e4284468b8909bae50fa925907bc5 -->
|
| 1006 |
+
- **Maximum Sequence Length:** 512 tokens
|
| 1007 |
+
- **Output Dimensionality:** 1024 dimensions
|
| 1008 |
+
- **Similarity Function:** Cosine Similarity
|
| 1009 |
+
<!-- - **Training Dataset:** Unknown -->
|
| 1010 |
+
- **Language:** en
|
| 1011 |
+
- **License:** apache-2.0
|
| 1012 |
+
|
| 1013 |
+
### Model Sources
|
| 1014 |
+
|
| 1015 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
| 1016 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
| 1017 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
| 1018 |
+
|
| 1019 |
+
### Full Model Architecture
|
| 1020 |
+
|
| 1021 |
+
```
|
| 1022 |
+
SentenceTransformer(
|
| 1023 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False, 'architecture': 'XLMRobertaModel'})
|
| 1024 |
+
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
| 1025 |
+
(2): Normalize()
|
| 1026 |
+
)
|
| 1027 |
+
```
|
| 1028 |
+
|
| 1029 |
+
## Usage
|
| 1030 |
+
|
| 1031 |
+
### Direct Usage (Sentence Transformers)
|
| 1032 |
+
|
| 1033 |
+
First install the Sentence Transformers library:
|
| 1034 |
+
|
| 1035 |
+
```bash
|
| 1036 |
+
pip install -U sentence-transformers
|
| 1037 |
+
```
|
| 1038 |
+
|
| 1039 |
+
Then you can load this model and run inference.
|
| 1040 |
+
```python
|
| 1041 |
+
from sentence_transformers import SentenceTransformer
|
| 1042 |
+
|
| 1043 |
+
# Download from the 🤗 Hub
|
| 1044 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
| 1045 |
+
# Run inference
|
| 1046 |
+
sentences = [
|
| 1047 |
+
'What may impede authorities in the discharge of their responsibilities under Union law?',
|
| 1048 |
+
'The objectives and principles of Directive 95/46/EC remain sound, but it has not prevented fragmentation in the implementation of data protection across the Union, legal uncertainty or a widespread public perception that there are significant risks to the protection of natural persons, in particular with regard to online activity. Differences in the level of protection of the rights and freedoms of natural persons, in particular the right to the protection of personal data, with regard to the processing of personal data in the Member States may prevent the free flow of personal data throughout the Union. Those differences may therefore constitute an obstacle to the pursuit of economic activities at the level of the Union, distort competition and impede authorities in the discharge of their responsibilities under Union law. Such a difference in levels of protection is due to the existence of differences in the implementation and application of Directive 95/46/EC.',
|
| 1049 |
+
'This Regulation is without prejudice to international agreements concluded between the Union and third countries regulating the transfer of personal data including appropriate safeguards for the data subjects. Member States may conclude international agreements which involve the transfer of personal data to third countries or international organisations, as far as such agreements do not affect this Regulation or any other provisions of Union law and include an appropriate level of protection for the fundamental rights of the data subjects.',
|
| 1050 |
+
]
|
| 1051 |
+
embeddings = model.encode(sentences)
|
| 1052 |
+
print(embeddings.shape)
|
| 1053 |
+
# [3, 1024]
|
| 1054 |
+
|
| 1055 |
+
# Get the similarity scores for the embeddings
|
| 1056 |
+
similarities = model.similarity(embeddings, embeddings)
|
| 1057 |
+
print(similarities)
|
| 1058 |
+
# tensor([[1.0000, 0.5388, 0.3874],
|
| 1059 |
+
# [0.5388, 1.0000, 0.6300],
|
| 1060 |
+
# [0.3874, 0.6300, 1.0000]])
|
| 1061 |
+
```
|
| 1062 |
+
|
| 1063 |
+
<!--
|
| 1064 |
+
### Direct Usage (Transformers)
|
| 1065 |
+
|
| 1066 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
| 1067 |
+
|
| 1068 |
+
</details>
|
| 1069 |
+
-->
|
| 1070 |
+
|
| 1071 |
+
<!--
|
| 1072 |
+
### Downstream Usage (Sentence Transformers)
|
| 1073 |
+
|
| 1074 |
+
You can finetune this model on your own dataset.
|
| 1075 |
+
|
| 1076 |
+
<details><summary>Click to expand</summary>
|
| 1077 |
+
|
| 1078 |
+
</details>
|
| 1079 |
+
-->
|
| 1080 |
+
|
| 1081 |
+
<!--
|
| 1082 |
+
### Out-of-Scope Use
|
| 1083 |
+
|
| 1084 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 1085 |
+
-->
|
| 1086 |
+
|
| 1087 |
+
## Evaluation
|
| 1088 |
+
|
| 1089 |
+
### Metrics
|
| 1090 |
+
|
| 1091 |
+
#### Information Retrieval
|
| 1092 |
+
|
| 1093 |
+
* Dataset: `dim_1024`
|
| 1094 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
|
| 1095 |
+
```json
|
| 1096 |
+
{
|
| 1097 |
+
"truncate_dim": 1024
|
| 1098 |
+
}
|
| 1099 |
+
```
|
| 1100 |
+
|
| 1101 |
+
| Metric | Value |
|
| 1102 |
+
|:--------------------|:-----------|
|
| 1103 |
+
| cosine_accuracy@1 | 0.3624 |
|
| 1104 |
+
| cosine_accuracy@3 | 0.3681 |
|
| 1105 |
+
| cosine_accuracy@5 | 0.3931 |
|
| 1106 |
+
| cosine_accuracy@10 | 0.4334 |
|
| 1107 |
+
| cosine_precision@1 | 0.3624 |
|
| 1108 |
+
| cosine_precision@3 | 0.3619 |
|
| 1109 |
+
| cosine_precision@5 | 0.3517 |
|
| 1110 |
+
| cosine_precision@10 | 0.3177 |
|
| 1111 |
+
| cosine_recall@1 | 0.0435 |
|
| 1112 |
+
| cosine_recall@3 | 0.1276 |
|
| 1113 |
+
| cosine_recall@5 | 0.192 |
|
| 1114 |
+
| cosine_recall@10 | 0.281 |
|
| 1115 |
+
| **cosine_ndcg@10** | **0.3859** |
|
| 1116 |
+
| cosine_mrr@10 | 0.3743 |
|
| 1117 |
+
| cosine_map@100 | 0.4539 |
|
| 1118 |
+
|
| 1119 |
+
#### Information Retrieval
|
| 1120 |
+
|
| 1121 |
+
* Dataset: `dim_768`
|
| 1122 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
|
| 1123 |
+
```json
|
| 1124 |
+
{
|
| 1125 |
+
"truncate_dim": 768
|
| 1126 |
+
}
|
| 1127 |
+
```
|
| 1128 |
+
|
| 1129 |
+
| Metric | Value |
|
| 1130 |
+
|:--------------------|:-----------|
|
| 1131 |
+
| cosine_accuracy@1 | 0.3592 |
|
| 1132 |
+
| cosine_accuracy@3 | 0.3649 |
|
| 1133 |
+
| cosine_accuracy@5 | 0.3892 |
|
| 1134 |
+
| cosine_accuracy@10 | 0.4334 |
|
| 1135 |
+
| cosine_precision@1 | 0.3592 |
|
| 1136 |
+
| cosine_precision@3 | 0.3587 |
|
| 1137 |
+
| cosine_precision@5 | 0.3485 |
|
| 1138 |
+
| cosine_precision@10 | 0.3167 |
|
| 1139 |
+
| cosine_recall@1 | 0.0425 |
|
| 1140 |
+
| cosine_recall@3 | 0.1246 |
|
| 1141 |
+
| cosine_recall@5 | 0.1875 |
|
| 1142 |
+
| cosine_recall@10 | 0.277 |
|
| 1143 |
+
| **cosine_ndcg@10** | **0.3831** |
|
| 1144 |
+
| cosine_mrr@10 | 0.3715 |
|
| 1145 |
+
| cosine_map@100 | 0.4493 |
|
| 1146 |
+
|
| 1147 |
+
#### Information Retrieval
|
| 1148 |
+
|
| 1149 |
+
* Dataset: `dim_512`
|
| 1150 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
|
| 1151 |
+
```json
|
| 1152 |
+
{
|
| 1153 |
+
"truncate_dim": 512
|
| 1154 |
+
}
|
| 1155 |
+
```
|
| 1156 |
+
|
| 1157 |
+
| Metric | Value |
|
| 1158 |
+
|:--------------------|:-----------|
|
| 1159 |
+
| cosine_accuracy@1 | 0.3598 |
|
| 1160 |
+
| cosine_accuracy@3 | 0.3656 |
|
| 1161 |
+
| cosine_accuracy@5 | 0.3912 |
|
| 1162 |
+
| cosine_accuracy@10 | 0.4334 |
|
| 1163 |
+
| cosine_precision@1 | 0.3598 |
|
| 1164 |
+
| cosine_precision@3 | 0.3594 |
|
| 1165 |
+
| cosine_precision@5 | 0.3497 |
|
| 1166 |
+
| cosine_precision@10 | 0.3173 |
|
| 1167 |
+
| cosine_recall@1 | 0.0427 |
|
| 1168 |
+
| cosine_recall@3 | 0.1252 |
|
| 1169 |
+
| cosine_recall@5 | 0.1891 |
|
| 1170 |
+
| cosine_recall@10 | 0.2782 |
|
| 1171 |
+
| **cosine_ndcg@10** | **0.3844** |
|
| 1172 |
+
| cosine_mrr@10 | 0.3721 |
|
| 1173 |
+
| cosine_map@100 | 0.4477 |
|
| 1174 |
+
|
| 1175 |
+
#### Information Retrieval
|
| 1176 |
+
|
| 1177 |
+
* Dataset: `dim_256`
|
| 1178 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
|
| 1179 |
+
```json
|
| 1180 |
+
{
|
| 1181 |
+
"truncate_dim": 256
|
| 1182 |
+
}
|
| 1183 |
+
```
|
| 1184 |
+
|
| 1185 |
+
| Metric | Value |
|
| 1186 |
+
|:--------------------|:----------|
|
| 1187 |
+
| cosine_accuracy@1 | 0.3438 |
|
| 1188 |
+
| cosine_accuracy@3 | 0.3476 |
|
| 1189 |
+
| cosine_accuracy@5 | 0.3764 |
|
| 1190 |
+
| cosine_accuracy@10 | 0.411 |
|
| 1191 |
+
| cosine_precision@1 | 0.3438 |
|
| 1192 |
+
| cosine_precision@3 | 0.3429 |
|
| 1193 |
+
| cosine_precision@5 | 0.3346 |
|
| 1194 |
+
| cosine_precision@10 | 0.3041 |
|
| 1195 |
+
| cosine_recall@1 | 0.0401 |
|
| 1196 |
+
| cosine_recall@3 | 0.1177 |
|
| 1197 |
+
| cosine_recall@5 | 0.1784 |
|
| 1198 |
+
| cosine_recall@10 | 0.2649 |
|
| 1199 |
+
| **cosine_ndcg@10** | **0.367** |
|
| 1200 |
+
| cosine_mrr@10 | 0.3551 |
|
| 1201 |
+
| cosine_map@100 | 0.4299 |
|
| 1202 |
+
|
| 1203 |
+
#### Information Retrieval
|
| 1204 |
+
|
| 1205 |
+
* Dataset: `dim_128`
|
| 1206 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
|
| 1207 |
+
```json
|
| 1208 |
+
{
|
| 1209 |
+
"truncate_dim": 128
|
| 1210 |
+
}
|
| 1211 |
+
```
|
| 1212 |
+
|
| 1213 |
+
| Metric | Value |
|
| 1214 |
+
|:--------------------|:-----------|
|
| 1215 |
+
| cosine_accuracy@1 | 0.3086 |
|
| 1216 |
+
| cosine_accuracy@3 | 0.3124 |
|
| 1217 |
+
| cosine_accuracy@5 | 0.3361 |
|
| 1218 |
+
| cosine_accuracy@10 | 0.3796 |
|
| 1219 |
+
| cosine_precision@1 | 0.3086 |
|
| 1220 |
+
| cosine_precision@3 | 0.3079 |
|
| 1221 |
+
| cosine_precision@5 | 0.2996 |
|
| 1222 |
+
| cosine_precision@10 | 0.2753 |
|
| 1223 |
+
| cosine_recall@1 | 0.0363 |
|
| 1224 |
+
| cosine_recall@3 | 0.1064 |
|
| 1225 |
+
| cosine_recall@5 | 0.1603 |
|
| 1226 |
+
| cosine_recall@10 | 0.24 |
|
| 1227 |
+
| **cosine_ndcg@10** | **0.3312** |
|
| 1228 |
+
| cosine_mrr@10 | 0.32 |
|
| 1229 |
+
| cosine_map@100 | 0.3963 |
|
| 1230 |
+
|
| 1231 |
+
#### Information Retrieval
|
| 1232 |
+
|
| 1233 |
+
* Dataset: `dim_64`
|
| 1234 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
|
| 1235 |
+
```json
|
| 1236 |
+
{
|
| 1237 |
+
"truncate_dim": 64
|
| 1238 |
+
}
|
| 1239 |
+
```
|
| 1240 |
+
|
| 1241 |
+
| Metric | Value |
|
| 1242 |
+
|:--------------------|:----------|
|
| 1243 |
+
| cosine_accuracy@1 | 0.274 |
|
| 1244 |
+
| cosine_accuracy@3 | 0.2785 |
|
| 1245 |
+
| cosine_accuracy@5 | 0.3015 |
|
| 1246 |
+
| cosine_accuracy@10 | 0.3355 |
|
| 1247 |
+
| cosine_precision@1 | 0.274 |
|
| 1248 |
+
| cosine_precision@3 | 0.2732 |
|
| 1249 |
+
| cosine_precision@5 | 0.2671 |
|
| 1250 |
+
| cosine_precision@10 | 0.2457 |
|
| 1251 |
+
| cosine_recall@1 | 0.0317 |
|
| 1252 |
+
| cosine_recall@3 | 0.0927 |
|
| 1253 |
+
| cosine_recall@5 | 0.1405 |
|
| 1254 |
+
| cosine_recall@10 | 0.2109 |
|
| 1255 |
+
| **cosine_ndcg@10** | **0.294** |
|
| 1256 |
+
| cosine_mrr@10 | 0.2843 |
|
| 1257 |
+
| cosine_map@100 | 0.3539 |
|
| 1258 |
+
|
| 1259 |
+
<!--
|
| 1260 |
+
## Bias, Risks and Limitations
|
| 1261 |
+
|
| 1262 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 1263 |
+
-->
|
| 1264 |
+
|
| 1265 |
+
<!--
|
| 1266 |
+
### Recommendations
|
| 1267 |
+
|
| 1268 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 1269 |
+
-->
|
| 1270 |
+
|
| 1271 |
+
## Training Details
|
| 1272 |
+
|
| 1273 |
+
### Training Dataset
|
| 1274 |
+
|
| 1275 |
+
#### Unnamed Dataset
|
| 1276 |
+
|
| 1277 |
+
* Size: 391 training samples
|
| 1278 |
+
* Columns: <code>anchor</code> and <code>positive</code>
|
| 1279 |
+
* Approximate statistics based on the first 391 samples:
|
| 1280 |
+
| | anchor | positive |
|
| 1281 |
+
|:--------|:---------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
| 1282 |
+
| type | string | string |
|
| 1283 |
+
| details | <ul><li>min: 8 tokens</li><li>mean: 16.9 tokens</li><li>max: 30 tokens</li></ul> | <ul><li>min: 27 tokens</li><li>mean: 372.91 tokens</li><li>max: 512 tokens</li></ul> |
|
| 1284 |
+
* Samples:
|
| 1285 |
+
| anchor | positive |
|
| 1286 |
+
|:-----------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 1287 |
+
| <code>On what date did the act occur?</code> | <code>Court (Civil/Criminal): Civil <br>Provisions: Directive 2015/366, Law 4537/2018 <br>Time of the act: 31.08.2022 <br>Outcome (not guilty, guilty): Partially accepts the claim. <br>Reasoning: The Athens Peace Court ordered the bank to return the amount that was withdrawn from the plaintiffs' account and to pay additional compensation for the moral damage they suffered. <br>Facts: The case concerns plaintiffs who fell victim to electronic fraud via phishing, resulting in the withdrawal of money from their bank account. The plaintiffs claimed that the bank did not take the necessary security measures to protect their accounts and sought compensation for the financial loss and moral damage they suffered. The court determined that the bank is responsible for the loss of the money, as it did not prove that the transactions were authorized by the plaintiffs. Furthermore, the court recognized that the bank's refusal to return the funds constitutes an infringement of the plaintiffs' personal rights, as it...</code> |
|
| 1288 |
+
| <code>For what purposes can more specific rules be provided regarding the employment context?</code> | <code>1.Member States may, by law or by collective agreements, provide for more specific rules to ensure the protection of the rights and freedoms in respect of the processing of employees' personal data in the employment context, in particular for the purposes of the recruitment, the performance of the contract of employment, including discharge of obligations laid down by law or by collective agreements, management, planning and organisation of work, equality and diversity in the workplace, health and safety at work, protection of employer's or customer's property and for the purposes of the exercise and enjoyment, on an individual or collective basis, of rights and benefits related to employment, and for the purpose of the termination of the employment relationship.<br>2.Those rules shall include suitable and specific measures to safeguard the data subject's human dignity, legitimate interests and fundamental rights, with particular regard to the transparency of processing, the transfer of p...</code> |
|
| 1289 |
+
| <code>On which date were transactions detailed in the provided text conducted?</code> | <code>**Court (Civil/Criminal): Civil**<br><br>**Provisions:**<br><br>**Time of commission of the act:**<br><br>**Outcome (not guilty, guilty):**<br><br>**Rationale:**<br><br>**Facts:**<br>The plaintiff holds credit card number ............ with the defendant banking corporation. Based on the application for alternative networks dated 19/7/2015 with number ......... submitted at a branch of the defendant, he was granted access to the electronic banking service (e-banking) to conduct banking transactions (debit, credit, updates, payments) remotely. On 30/11/2020, the plaintiff fell victim to electronic fraud through the "phishing" method, whereby an unknown perpetrator managed to withdraw a total amount of €3,121.75 from the aforementioned credit card. Specifically, the plaintiff received an email at 1:35 PM on 29/11/2020 from sender ...... with address ........, informing him that due to an impending system change, he needed to verify the mobile phone number linked to the credit card, urging him to complete the verification...</code> |
|
| 1290 |
+
* Loss: [<code>MatryoshkaLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters:
|
| 1291 |
+
```json
|
| 1292 |
+
{
|
| 1293 |
+
"loss": "MultipleNegativesRankingLoss",
|
| 1294 |
+
"matryoshka_dims": [
|
| 1295 |
+
1024,
|
| 1296 |
+
768,
|
| 1297 |
+
512,
|
| 1298 |
+
256,
|
| 1299 |
+
128,
|
| 1300 |
+
64
|
| 1301 |
+
],
|
| 1302 |
+
"matryoshka_weights": [
|
| 1303 |
+
1,
|
| 1304 |
+
1,
|
| 1305 |
+
1,
|
| 1306 |
+
1,
|
| 1307 |
+
1,
|
| 1308 |
+
1
|
| 1309 |
+
],
|
| 1310 |
+
"n_dims_per_step": -1
|
| 1311 |
+
}
|
| 1312 |
+
```
|
| 1313 |
+
|
| 1314 |
+
### Training Hyperparameters
|
| 1315 |
+
#### Non-Default Hyperparameters
|
| 1316 |
+
|
| 1317 |
+
- `eval_strategy`: epoch
|
| 1318 |
+
- `per_device_train_batch_size`: 2
|
| 1319 |
+
- `per_device_eval_batch_size`: 2
|
| 1320 |
+
- `gradient_accumulation_steps`: 2
|
| 1321 |
+
- `learning_rate`: 2e-05
|
| 1322 |
+
- `num_train_epochs`: 20
|
| 1323 |
+
- `lr_scheduler_type`: cosine
|
| 1324 |
+
- `warmup_ratio`: 0.1
|
| 1325 |
+
- `bf16`: True
|
| 1326 |
+
- `load_best_model_at_end`: True
|
| 1327 |
+
- `optim`: adamw_torch_fused
|
| 1328 |
+
- `batch_sampler`: no_duplicates
|
| 1329 |
+
|
| 1330 |
+
#### All Hyperparameters
|
| 1331 |
+
<details><summary>Click to expand</summary>
|
| 1332 |
+
|
| 1333 |
+
- `overwrite_output_dir`: False
|
| 1334 |
+
- `do_predict`: False
|
| 1335 |
+
- `eval_strategy`: epoch
|
| 1336 |
+
- `prediction_loss_only`: True
|
| 1337 |
+
- `per_device_train_batch_size`: 2
|
| 1338 |
+
- `per_device_eval_batch_size`: 2
|
| 1339 |
+
- `per_gpu_train_batch_size`: None
|
| 1340 |
+
- `per_gpu_eval_batch_size`: None
|
| 1341 |
+
- `gradient_accumulation_steps`: 2
|
| 1342 |
+
- `eval_accumulation_steps`: None
|
| 1343 |
+
- `torch_empty_cache_steps`: None
|
| 1344 |
+
- `learning_rate`: 2e-05
|
| 1345 |
+
- `weight_decay`: 0.0
|
| 1346 |
+
- `adam_beta1`: 0.9
|
| 1347 |
+
- `adam_beta2`: 0.999
|
| 1348 |
+
- `adam_epsilon`: 1e-08
|
| 1349 |
+
- `max_grad_norm`: 1.0
|
| 1350 |
+
- `num_train_epochs`: 20
|
| 1351 |
+
- `max_steps`: -1
|
| 1352 |
+
- `lr_scheduler_type`: cosine
|
| 1353 |
+
- `lr_scheduler_kwargs`: {}
|
| 1354 |
+
- `warmup_ratio`: 0.1
|
| 1355 |
+
- `warmup_steps`: 0
|
| 1356 |
+
- `log_level`: passive
|
| 1357 |
+
- `log_level_replica`: warning
|
| 1358 |
+
- `log_on_each_node`: True
|
| 1359 |
+
- `logging_nan_inf_filter`: True
|
| 1360 |
+
- `save_safetensors`: True
|
| 1361 |
+
- `save_on_each_node`: False
|
| 1362 |
+
- `save_only_model`: False
|
| 1363 |
+
- `restore_callback_states_from_checkpoint`: False
|
| 1364 |
+
- `no_cuda`: False
|
| 1365 |
+
- `use_cpu`: False
|
| 1366 |
+
- `use_mps_device`: False
|
| 1367 |
+
- `seed`: 42
|
| 1368 |
+
- `data_seed`: None
|
| 1369 |
+
- `jit_mode_eval`: False
|
| 1370 |
+
- `use_ipex`: False
|
| 1371 |
+
- `bf16`: True
|
| 1372 |
+
- `fp16`: False
|
| 1373 |
+
- `fp16_opt_level`: O1
|
| 1374 |
+
- `half_precision_backend`: auto
|
| 1375 |
+
- `bf16_full_eval`: False
|
| 1376 |
+
- `fp16_full_eval`: False
|
| 1377 |
+
- `tf32`: None
|
| 1378 |
+
- `local_rank`: 0
|
| 1379 |
+
- `ddp_backend`: None
|
| 1380 |
+
- `tpu_num_cores`: None
|
| 1381 |
+
- `tpu_metrics_debug`: False
|
| 1382 |
+
- `debug`: []
|
| 1383 |
+
- `dataloader_drop_last`: False
|
| 1384 |
+
- `dataloader_num_workers`: 0
|
| 1385 |
+
- `dataloader_prefetch_factor`: None
|
| 1386 |
+
- `past_index`: -1
|
| 1387 |
+
- `disable_tqdm`: False
|
| 1388 |
+
- `remove_unused_columns`: True
|
| 1389 |
+
- `label_names`: None
|
| 1390 |
+
- `load_best_model_at_end`: True
|
| 1391 |
+
- `ignore_data_skip`: False
|
| 1392 |
+
- `fsdp`: []
|
| 1393 |
+
- `fsdp_min_num_params`: 0
|
| 1394 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 1395 |
+
- `tp_size`: 0
|
| 1396 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 1397 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 1398 |
+
- `deepspeed`: None
|
| 1399 |
+
- `label_smoothing_factor`: 0.0
|
| 1400 |
+
- `optim`: adamw_torch_fused
|
| 1401 |
+
- `optim_args`: None
|
| 1402 |
+
- `adafactor`: False
|
| 1403 |
+
- `group_by_length`: False
|
| 1404 |
+
- `length_column_name`: length
|
| 1405 |
+
- `ddp_find_unused_parameters`: None
|
| 1406 |
+
- `ddp_bucket_cap_mb`: None
|
| 1407 |
+
- `ddp_broadcast_buffers`: False
|
| 1408 |
+
- `dataloader_pin_memory`: True
|
| 1409 |
+
- `dataloader_persistent_workers`: False
|
| 1410 |
+
- `skip_memory_metrics`: True
|
| 1411 |
+
- `use_legacy_prediction_loop`: False
|
| 1412 |
+
- `push_to_hub`: False
|
| 1413 |
+
- `resume_from_checkpoint`: None
|
| 1414 |
+
- `hub_model_id`: None
|
| 1415 |
+
- `hub_strategy`: every_save
|
| 1416 |
+
- `hub_private_repo`: None
|
| 1417 |
+
- `hub_always_push`: False
|
| 1418 |
+
- `gradient_checkpointing`: False
|
| 1419 |
+
- `gradient_checkpointing_kwargs`: None
|
| 1420 |
+
- `include_inputs_for_metrics`: False
|
| 1421 |
+
- `include_for_metrics`: []
|
| 1422 |
+
- `eval_do_concat_batches`: True
|
| 1423 |
+
- `fp16_backend`: auto
|
| 1424 |
+
- `push_to_hub_model_id`: None
|
| 1425 |
+
- `push_to_hub_organization`: None
|
| 1426 |
+
- `mp_parameters`:
|
| 1427 |
+
- `auto_find_batch_size`: False
|
| 1428 |
+
- `full_determinism`: False
|
| 1429 |
+
- `torchdynamo`: None
|
| 1430 |
+
- `ray_scope`: last
|
| 1431 |
+
- `ddp_timeout`: 1800
|
| 1432 |
+
- `torch_compile`: False
|
| 1433 |
+
- `torch_compile_backend`: None
|
| 1434 |
+
- `torch_compile_mode`: None
|
| 1435 |
+
- `include_tokens_per_second`: False
|
| 1436 |
+
- `include_num_input_tokens_seen`: False
|
| 1437 |
+
- `neftune_noise_alpha`: None
|
| 1438 |
+
- `optim_target_modules`: None
|
| 1439 |
+
- `batch_eval_metrics`: False
|
| 1440 |
+
- `eval_on_start`: False
|
| 1441 |
+
- `use_liger_kernel`: False
|
| 1442 |
+
- `eval_use_gather_object`: False
|
| 1443 |
+
- `average_tokens_across_devices`: False
|
| 1444 |
+
- `prompts`: None
|
| 1445 |
+
- `batch_sampler`: no_duplicates
|
| 1446 |
+
- `multi_dataset_batch_sampler`: proportional
|
| 1447 |
+
- `router_mapping`: {}
|
| 1448 |
+
- `learning_rate_mapping`: {}
|
| 1449 |
+
|
| 1450 |
+
</details>
|
| 1451 |
+
|
| 1452 |
+
### Training Logs
|
| 1453 |
+
| Epoch | Step | Training Loss | dim_1024_cosine_ndcg@10 | dim_768_cosine_ndcg@10 | dim_512_cosine_ndcg@10 | dim_256_cosine_ndcg@10 | dim_128_cosine_ndcg@10 | dim_64_cosine_ndcg@10 |
|
| 1454 |
+
|:------:|:----:|:-------------:|:-----------------------:|:----------------------:|:----------------------:|:----------------------:|:----------------------:|:---------------------:|
|
| 1455 |
+
| 0.0102 | 1 | 15.8588 | - | - | - | - | - | - |
|
| 1456 |
+
| 0.0204 | 2 | 10.7411 | - | - | - | - | - | - |
|
| 1457 |
+
| 0.0306 | 3 | 1.3873 | - | - | - | - | - | - |
|
| 1458 |
+
| 0.0408 | 4 | 0.9088 | - | - | - | - | - | - |
|
| 1459 |
+
| 0.0510 | 5 | 0.0077 | - | - | - | - | - | - |
|
| 1460 |
+
| 0.0612 | 6 | 0.6016 | - | - | - | - | - | - |
|
| 1461 |
+
| 0.0714 | 7 | 1.6714 | - | - | - | - | - | - |
|
| 1462 |
+
| 0.0816 | 8 | 0.4211 | - | - | - | - | - | - |
|
| 1463 |
+
| 0.0918 | 9 | 0.1996 | - | - | - | - | - | - |
|
| 1464 |
+
| 0.1020 | 10 | 0.1895 | - | - | - | - | - | - |
|
| 1465 |
+
| 0.1122 | 11 | 0.1358 | - | - | - | - | - | - |
|
| 1466 |
+
| 0.1224 | 12 | 0.5552 | - | - | - | - | - | - |
|
| 1467 |
+
| 0.1327 | 13 | 0.5141 | - | - | - | - | - | - |
|
| 1468 |
+
| 0.1429 | 14 | 0.1955 | - | - | - | - | - | - |
|
| 1469 |
+
| 0.1531 | 15 | 1.9114 | - | - | - | - | - | - |
|
| 1470 |
+
| 0.1633 | 16 | 0.2645 | - | - | - | - | - | - |
|
| 1471 |
+
| 0.1735 | 17 | 7.5545 | - | - | - | - | - | - |
|
| 1472 |
+
| 0.1837 | 18 | 0.4297 | - | - | - | - | - | - |
|
| 1473 |
+
| 0.1939 | 19 | 0.678 | - | - | - | - | - | - |
|
| 1474 |
+
| 0.2041 | 20 | 0.4634 | - | - | - | - | - | - |
|
| 1475 |
+
| 0.2143 | 21 | 4.2252 | - | - | - | - | - | - |
|
| 1476 |
+
| 0.2245 | 22 | 3.9985 | - | - | - | - | - | - |
|
| 1477 |
+
| 0.2347 | 23 | 1.9242 | - | - | - | - | - | - |
|
| 1478 |
+
| 0.2449 | 24 | 3.2716 | - | - | - | - | - | - |
|
| 1479 |
+
| 0.2551 | 25 | 0.123 | - | - | - | - | - | - |
|
| 1480 |
+
| 0.2653 | 26 | 1.0011 | - | - | - | - | - | - |
|
| 1481 |
+
| 0.2755 | 27 | 3.5846 | - | - | - | - | - | - |
|
| 1482 |
+
| 0.2857 | 28 | 1.1365 | - | - | - | - | - | - |
|
| 1483 |
+
| 0.2959 | 29 | 0.7149 | - | - | - | - | - | - |
|
| 1484 |
+
| 0.3061 | 30 | 1.2629 | - | - | - | - | - | - |
|
| 1485 |
+
| 0.3163 | 31 | 0.6459 | - | - | - | - | - | - |
|
| 1486 |
+
| 0.3265 | 32 | 0.1934 | - | - | - | - | - | - |
|
| 1487 |
+
| 0.3367 | 33 | 1.4897 | - | - | - | - | - | - |
|
| 1488 |
+
| 0.3469 | 34 | 0.8561 | - | - | - | - | - | - |
|
| 1489 |
+
| 0.3571 | 35 | 0.0128 | - | - | - | - | - | - |
|
| 1490 |
+
| 0.3673 | 36 | 1.4952 | - | - | - | - | - | - |
|
| 1491 |
+
| 0.3776 | 37 | 0.3181 | - | - | - | - | - | - |
|
| 1492 |
+
| 0.3878 | 38 | 6.3681 | - | - | - | - | - | - |
|
| 1493 |
+
| 0.3980 | 39 | 1.4487 | - | - | - | - | - | - |
|
| 1494 |
+
| 0.4082 | 40 | 0.1702 | - | - | - | - | - | - |
|
| 1495 |
+
| 0.4184 | 41 | 0.2513 | - | - | - | - | - | - |
|
| 1496 |
+
| 0.4286 | 42 | 4.1595 | - | - | - | - | - | - |
|
| 1497 |
+
| 0.4388 | 43 | 2.7347 | - | - | - | - | - | - |
|
| 1498 |
+
| 0.4490 | 44 | 2.3182 | - | - | - | - | - | - |
|
| 1499 |
+
| 0.4592 | 45 | 1.3285 | - | - | - | - | - | - |
|
| 1500 |
+
| 0.4694 | 46 | 2.1155 | - | - | - | - | - | - |
|
| 1501 |
+
| 0.4796 | 47 | 0.0645 | - | - | - | - | - | - |
|
| 1502 |
+
| 0.4898 | 48 | 7.1283 | - | - | - | - | - | - |
|
| 1503 |
+
| 0.5 | 49 | 0.711 | - | - | - | - | - | - |
|
| 1504 |
+
| 0.5102 | 50 | 0.4716 | - | - | - | - | - | - |
|
| 1505 |
+
| 0.5204 | 51 | 2.2895 | - | - | - | - | - | - |
|
| 1506 |
+
| 0.5306 | 52 | 1.9235 | - | - | - | - | - | - |
|
| 1507 |
+
| 0.5408 | 53 | 0.8777 | - | - | - | - | - | - |
|
| 1508 |
+
| 0.5510 | 54 | 0.0038 | - | - | - | - | - | - |
|
| 1509 |
+
| 0.5612 | 55 | 1.5598 | - | - | - | - | - | - |
|
| 1510 |
+
| 0.5714 | 56 | 0.0177 | - | - | - | - | - | - |
|
| 1511 |
+
| 0.5816 | 57 | 0.0837 | - | - | - | - | - | - |
|
| 1512 |
+
| 0.5918 | 58 | 0.0429 | - | - | - | - | - | - |
|
| 1513 |
+
| 0.6020 | 59 | 0.0071 | - | - | - | - | - | - |
|
| 1514 |
+
| 0.6122 | 60 | 2.7217 | - | - | - | - | - | - |
|
| 1515 |
+
| 0.6224 | 61 | 3.9013 | - | - | - | - | - | - |
|
| 1516 |
+
| 0.6327 | 62 | 1.417 | - | - | - | - | - | - |
|
| 1517 |
+
| 0.6429 | 63 | 3.5854 | - | - | - | - | - | - |
|
| 1518 |
+
| 0.6531 | 64 | 12.918 | - | - | - | - | - | - |
|
| 1519 |
+
| 0.6633 | 65 | 7.1566 | - | - | - | - | - | - |
|
| 1520 |
+
| 0.6735 | 66 | 3.9897 | - | - | - | - | - | - |
|
| 1521 |
+
| 0.6837 | 67 | 8.1139 | - | - | - | - | - | - |
|
| 1522 |
+
| 0.6939 | 68 | 5.7005 | - | - | - | - | - | - |
|
| 1523 |
+
| 0.7041 | 69 | 0.1219 | - | - | - | - | - | - |
|
| 1524 |
+
| 0.7143 | 70 | 5.7849 | - | - | - | - | - | - |
|
| 1525 |
+
| 0.7245 | 71 | 1.0726 | - | - | - | - | - | - |
|
| 1526 |
+
| 0.7347 | 72 | 1.2599 | - | - | - | - | - | - |
|
| 1527 |
+
| 0.7449 | 73 | 0.6473 | - | - | - | - | - | - |
|
| 1528 |
+
| 0.7551 | 74 | 1.0397 | - | - | - | - | - | - |
|
| 1529 |
+
| 0.7653 | 75 | 1.5555 | - | - | - | - | - | - |
|
| 1530 |
+
| 0.7755 | 76 | 0.0078 | - | - | - | - | - | - |
|
| 1531 |
+
| 0.7857 | 77 | 0.0048 | - | - | - | - | - | - |
|
| 1532 |
+
| 0.7959 | 78 | 0.0323 | - | - | - | - | - | - |
|
| 1533 |
+
| 0.8061 | 79 | 1.7425 | - | - | - | - | - | - |
|
| 1534 |
+
| 0.8163 | 80 | 0.0035 | - | - | - | - | - | - |
|
| 1535 |
+
| 0.8265 | 81 | 6.4849 | - | - | - | - | - | - |
|
| 1536 |
+
| 0.8367 | 82 | 4.3767 | - | - | - | - | - | - |
|
| 1537 |
+
| 0.8469 | 83 | 0.0186 | - | - | - | - | - | - |
|
| 1538 |
+
| 0.8571 | 84 | 0.0008 | - | - | - | - | - | - |
|
| 1539 |
+
| 0.8673 | 85 | 0.8354 | - | - | - | - | - | - |
|
| 1540 |
+
| 0.8776 | 86 | 0.0162 | - | - | - | - | - | - |
|
| 1541 |
+
| 0.8878 | 87 | 0.1282 | - | - | - | - | - | - |
|
| 1542 |
+
| 0.8980 | 88 | 0.4514 | - | - | - | - | - | - |
|
| 1543 |
+
| 0.9082 | 89 | 4.9103 | - | - | - | - | - | - |
|
| 1544 |
+
| 0.9184 | 90 | 0.0762 | - | - | - | - | - | - |
|
| 1545 |
+
| 0.9286 | 91 | 0.0444 | - | - | - | - | - | - |
|
| 1546 |
+
| 0.9388 | 92 | 1.8609 | - | - | - | - | - | - |
|
| 1547 |
+
| 0.9490 | 93 | 0.1489 | - | - | - | - | - | - |
|
| 1548 |
+
| 0.9592 | 94 | 0.5926 | - | - | - | - | - | - |
|
| 1549 |
+
| 0.9694 | 95 | 0.5344 | - | - | - | - | - | - |
|
| 1550 |
+
| 0.9796 | 96 | 0.4693 | - | - | - | - | - | - |
|
| 1551 |
+
| 0.9898 | 97 | 9.2282 | - | - | - | - | - | - |
|
| 1552 |
+
| 1.0 | 98 | 4.6238 | 0.3859 | 0.3831 | 0.3844 | 0.3670 | 0.3312 | 0.2940 |
|
| 1553 |
+
|
| 1554 |
+
|
| 1555 |
+
### Framework Versions
|
| 1556 |
+
- Python: 3.12.11
|
| 1557 |
+
- Sentence Transformers: 5.1.0
|
| 1558 |
+
- Transformers: 4.51.3
|
| 1559 |
+
- PyTorch: 2.8.0+cu126
|
| 1560 |
+
- Accelerate: 1.10.1
|
| 1561 |
+
- Datasets: 4.0.0
|
| 1562 |
+
- Tokenizers: 0.21.4
|
| 1563 |
+
|
| 1564 |
+
## Citation
|
| 1565 |
+
|
| 1566 |
+
### BibTeX
|
| 1567 |
+
|
| 1568 |
+
#### Sentence Transformers
|
| 1569 |
+
```bibtex
|
| 1570 |
+
@inproceedings{reimers-2019-sentence-bert,
|
| 1571 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
| 1572 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
| 1573 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
| 1574 |
+
month = "11",
|
| 1575 |
+
year = "2019",
|
| 1576 |
+
publisher = "Association for Computational Linguistics",
|
| 1577 |
+
url = "https://arxiv.org/abs/1908.10084",
|
| 1578 |
+
}
|
| 1579 |
+
```
|
| 1580 |
+
|
| 1581 |
+
#### MatryoshkaLoss
|
| 1582 |
+
```bibtex
|
| 1583 |
+
@misc{kusupati2024matryoshka,
|
| 1584 |
+
title={Matryoshka Representation Learning},
|
| 1585 |
+
author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
|
| 1586 |
+
year={2024},
|
| 1587 |
+
eprint={2205.13147},
|
| 1588 |
+
archivePrefix={arXiv},
|
| 1589 |
+
primaryClass={cs.LG}
|
| 1590 |
+
}
|
| 1591 |
+
```
|
| 1592 |
+
|
| 1593 |
+
#### MultipleNegativesRankingLoss
|
| 1594 |
+
```bibtex
|
| 1595 |
+
@misc{henderson2017efficient,
|
| 1596 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
| 1597 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
| 1598 |
+
year={2017},
|
| 1599 |
+
eprint={1705.00652},
|
| 1600 |
+
archivePrefix={arXiv},
|
| 1601 |
+
primaryClass={cs.CL}
|
| 1602 |
+
}
|
| 1603 |
+
```
|
| 1604 |
+
|
| 1605 |
+
<!--
|
| 1606 |
+
## Glossary
|
| 1607 |
+
|
| 1608 |
+
*Clearly define terms in order to be accessible across audiences.*
|
| 1609 |
+
-->
|
| 1610 |
+
|
| 1611 |
+
<!--
|
| 1612 |
+
## Model Card Authors
|
| 1613 |
+
|
| 1614 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
| 1615 |
+
-->
|
| 1616 |
+
|
| 1617 |
+
<!--
|
| 1618 |
+
## Model Card Contact
|
| 1619 |
+
|
| 1620 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 1621 |
+
-->
|
checkpoint-98/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"XLMRobertaModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"eos_token_id": 2,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 1024,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 4096,
|
| 14 |
+
"layer_norm_eps": 1e-05,
|
| 15 |
+
"max_position_embeddings": 514,
|
| 16 |
+
"model_type": "xlm-roberta",
|
| 17 |
+
"num_attention_heads": 16,
|
| 18 |
+
"num_hidden_layers": 24,
|
| 19 |
+
"output_past": true,
|
| 20 |
+
"pad_token_id": 1,
|
| 21 |
+
"position_embedding_type": "absolute",
|
| 22 |
+
"torch_dtype": "float32",
|
| 23 |
+
"transformers_version": "4.51.3",
|
| 24 |
+
"type_vocab_size": 1,
|
| 25 |
+
"use_cache": true,
|
| 26 |
+
"vocab_size": 250002
|
| 27 |
+
}
|
checkpoint-98/config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_type": "SentenceTransformer",
|
| 3 |
+
"__version__": {
|
| 4 |
+
"sentence_transformers": "5.1.0",
|
| 5 |
+
"transformers": "4.51.3",
|
| 6 |
+
"pytorch": "2.8.0+cu126"
|
| 7 |
+
},
|
| 8 |
+
"prompts": {
|
| 9 |
+
"query": "",
|
| 10 |
+
"document": ""
|
| 11 |
+
},
|
| 12 |
+
"default_prompt_name": null,
|
| 13 |
+
"similarity_fn_name": "cosine"
|
| 14 |
+
}
|
checkpoint-98/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b8b8678a200e1ec3a97ec08f700f81cc6660e581d09862b47b576834736c0668
|
| 3 |
+
size 2239607176
|
checkpoint-98/modules.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"idx": 2,
|
| 16 |
+
"name": "2",
|
| 17 |
+
"path": "2_Normalize",
|
| 18 |
+
"type": "sentence_transformers.models.Normalize"
|
| 19 |
+
}
|
| 20 |
+
]
|
checkpoint-98/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43981d3b1c8c7efd9d147726925594fec137b1d2137148a81f15c7a1d493486a
|
| 3 |
+
size 4471067142
|
checkpoint-98/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a4f05f697e2a026dbb8be0397c5f3215957e05bbf5897dea20c686e5f8917f13
|
| 3 |
+
size 14645
|
checkpoint-98/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8050407fb4fa517140d91f016be515b027290100821411e470b937a3a98f10c3
|
| 3 |
+
size 1465
|
checkpoint-98/sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 512,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
checkpoint-98/sentencepiece.bpe.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
|
| 3 |
+
size 5069051
|
checkpoint-98/special_tokens_map.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"cls_token": {
|
| 10 |
+
"content": "<s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"eos_token": {
|
| 17 |
+
"content": "</s>",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"mask_token": {
|
| 24 |
+
"content": "<mask>",
|
| 25 |
+
"lstrip": true,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"pad_token": {
|
| 31 |
+
"content": "<pad>",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
},
|
| 37 |
+
"sep_token": {
|
| 38 |
+
"content": "</s>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false
|
| 43 |
+
},
|
| 44 |
+
"unk_token": {
|
| 45 |
+
"content": "<unk>",
|
| 46 |
+
"lstrip": false,
|
| 47 |
+
"normalized": false,
|
| 48 |
+
"rstrip": false,
|
| 49 |
+
"single_word": false
|
| 50 |
+
}
|
| 51 |
+
}
|
checkpoint-98/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:883b037111086fd4dfebbbc9b7cee11e1517b5e0c0514879478661440f137085
|
| 3 |
+
size 17082987
|
checkpoint-98/tokenizer_config.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<s>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<pad>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "</s>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"250001": {
|
| 36 |
+
"content": "<mask>",
|
| 37 |
+
"lstrip": true,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"bos_token": "<s>",
|
| 45 |
+
"clean_up_tokenization_spaces": true,
|
| 46 |
+
"cls_token": "<s>",
|
| 47 |
+
"eos_token": "</s>",
|
| 48 |
+
"extra_special_tokens": {},
|
| 49 |
+
"mask_token": "<mask>",
|
| 50 |
+
"max_length": 512,
|
| 51 |
+
"model_max_length": 512,
|
| 52 |
+
"pad_to_multiple_of": null,
|
| 53 |
+
"pad_token": "<pad>",
|
| 54 |
+
"pad_token_type_id": 0,
|
| 55 |
+
"padding_side": "right",
|
| 56 |
+
"sep_token": "</s>",
|
| 57 |
+
"stride": 0,
|
| 58 |
+
"tokenizer_class": "XLMRobertaTokenizer",
|
| 59 |
+
"truncation_side": "right",
|
| 60 |
+
"truncation_strategy": "longest_first",
|
| 61 |
+
"unk_token": "<unk>"
|
| 62 |
+
}
|
checkpoint-98/trainer_state.json
ADDED
|
@@ -0,0 +1,827 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 98,
|
| 3 |
+
"best_metric": 0.3312285498294292,
|
| 4 |
+
"best_model_checkpoint": "intfloat/multilingual-e5-large/checkpoint-98",
|
| 5 |
+
"epoch": 1.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 98,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.01020408163265306,
|
| 14 |
+
"grad_norm": 973.273681640625,
|
| 15 |
+
"learning_rate": 0.0,
|
| 16 |
+
"loss": 15.8588,
|
| 17 |
+
"step": 1
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.02040816326530612,
|
| 21 |
+
"grad_norm": 1016.8517456054688,
|
| 22 |
+
"learning_rate": 1.0204081632653061e-07,
|
| 23 |
+
"loss": 10.7411,
|
| 24 |
+
"step": 2
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 0.030612244897959183,
|
| 28 |
+
"grad_norm": 166.88465881347656,
|
| 29 |
+
"learning_rate": 2.0408163265306121e-07,
|
| 30 |
+
"loss": 1.3873,
|
| 31 |
+
"step": 3
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 0.04081632653061224,
|
| 35 |
+
"grad_norm": 108.06741333007812,
|
| 36 |
+
"learning_rate": 3.0612244897959183e-07,
|
| 37 |
+
"loss": 0.9088,
|
| 38 |
+
"step": 4
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"epoch": 0.05102040816326531,
|
| 42 |
+
"grad_norm": 1.1959134340286255,
|
| 43 |
+
"learning_rate": 4.0816326530612243e-07,
|
| 44 |
+
"loss": 0.0077,
|
| 45 |
+
"step": 5
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"epoch": 0.061224489795918366,
|
| 49 |
+
"grad_norm": 130.83908081054688,
|
| 50 |
+
"learning_rate": 5.102040816326531e-07,
|
| 51 |
+
"loss": 0.6016,
|
| 52 |
+
"step": 6
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"epoch": 0.07142857142857142,
|
| 56 |
+
"grad_norm": 318.3863525390625,
|
| 57 |
+
"learning_rate": 6.122448979591837e-07,
|
| 58 |
+
"loss": 1.6714,
|
| 59 |
+
"step": 7
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"epoch": 0.08163265306122448,
|
| 63 |
+
"grad_norm": 74.26002502441406,
|
| 64 |
+
"learning_rate": 7.142857142857143e-07,
|
| 65 |
+
"loss": 0.4211,
|
| 66 |
+
"step": 8
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"epoch": 0.09183673469387756,
|
| 70 |
+
"grad_norm": 32.4500846862793,
|
| 71 |
+
"learning_rate": 8.163265306122449e-07,
|
| 72 |
+
"loss": 0.1996,
|
| 73 |
+
"step": 9
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"epoch": 0.10204081632653061,
|
| 77 |
+
"grad_norm": 41.27345275878906,
|
| 78 |
+
"learning_rate": 9.183673469387756e-07,
|
| 79 |
+
"loss": 0.1895,
|
| 80 |
+
"step": 10
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"epoch": 0.11224489795918367,
|
| 84 |
+
"grad_norm": 27.35291862487793,
|
| 85 |
+
"learning_rate": 1.0204081632653063e-06,
|
| 86 |
+
"loss": 0.1358,
|
| 87 |
+
"step": 11
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"epoch": 0.12244897959183673,
|
| 91 |
+
"grad_norm": 103.75244903564453,
|
| 92 |
+
"learning_rate": 1.122448979591837e-06,
|
| 93 |
+
"loss": 0.5552,
|
| 94 |
+
"step": 12
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"epoch": 0.1326530612244898,
|
| 98 |
+
"grad_norm": 155.97923278808594,
|
| 99 |
+
"learning_rate": 1.2244897959183673e-06,
|
| 100 |
+
"loss": 0.5141,
|
| 101 |
+
"step": 13
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"epoch": 0.14285714285714285,
|
| 105 |
+
"grad_norm": 53.757484436035156,
|
| 106 |
+
"learning_rate": 1.3265306122448982e-06,
|
| 107 |
+
"loss": 0.1955,
|
| 108 |
+
"step": 14
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"epoch": 0.15306122448979592,
|
| 112 |
+
"grad_norm": 175.17491149902344,
|
| 113 |
+
"learning_rate": 1.4285714285714286e-06,
|
| 114 |
+
"loss": 1.9114,
|
| 115 |
+
"step": 15
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"epoch": 0.16326530612244897,
|
| 119 |
+
"grad_norm": 49.02252197265625,
|
| 120 |
+
"learning_rate": 1.5306122448979593e-06,
|
| 121 |
+
"loss": 0.2645,
|
| 122 |
+
"step": 16
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"epoch": 0.17346938775510204,
|
| 126 |
+
"grad_norm": 999.3756103515625,
|
| 127 |
+
"learning_rate": 1.6326530612244897e-06,
|
| 128 |
+
"loss": 7.5545,
|
| 129 |
+
"step": 17
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"epoch": 0.1836734693877551,
|
| 133 |
+
"grad_norm": 149.2627410888672,
|
| 134 |
+
"learning_rate": 1.7346938775510206e-06,
|
| 135 |
+
"loss": 0.4297,
|
| 136 |
+
"step": 18
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"epoch": 0.19387755102040816,
|
| 140 |
+
"grad_norm": 204.95181274414062,
|
| 141 |
+
"learning_rate": 1.8367346938775512e-06,
|
| 142 |
+
"loss": 0.678,
|
| 143 |
+
"step": 19
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"epoch": 0.20408163265306123,
|
| 147 |
+
"grad_norm": 103.94851684570312,
|
| 148 |
+
"learning_rate": 1.938775510204082e-06,
|
| 149 |
+
"loss": 0.4634,
|
| 150 |
+
"step": 20
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"epoch": 0.21428571428571427,
|
| 154 |
+
"grad_norm": 536.7100219726562,
|
| 155 |
+
"learning_rate": 2.0408163265306125e-06,
|
| 156 |
+
"loss": 4.2252,
|
| 157 |
+
"step": 21
|
| 158 |
+
},
|
| 159 |
+
{
|
| 160 |
+
"epoch": 0.22448979591836735,
|
| 161 |
+
"grad_norm": 444.44805908203125,
|
| 162 |
+
"learning_rate": 2.1428571428571427e-06,
|
| 163 |
+
"loss": 3.9985,
|
| 164 |
+
"step": 22
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
"epoch": 0.23469387755102042,
|
| 168 |
+
"grad_norm": 170.50369262695312,
|
| 169 |
+
"learning_rate": 2.244897959183674e-06,
|
| 170 |
+
"loss": 1.9242,
|
| 171 |
+
"step": 23
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"epoch": 0.24489795918367346,
|
| 175 |
+
"grad_norm": 626.5487060546875,
|
| 176 |
+
"learning_rate": 2.3469387755102044e-06,
|
| 177 |
+
"loss": 3.2716,
|
| 178 |
+
"step": 24
|
| 179 |
+
},
|
| 180 |
+
{
|
| 181 |
+
"epoch": 0.25510204081632654,
|
| 182 |
+
"grad_norm": 51.353050231933594,
|
| 183 |
+
"learning_rate": 2.4489795918367347e-06,
|
| 184 |
+
"loss": 0.123,
|
| 185 |
+
"step": 25
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"epoch": 0.2653061224489796,
|
| 189 |
+
"grad_norm": 108.25341796875,
|
| 190 |
+
"learning_rate": 2.5510204081632657e-06,
|
| 191 |
+
"loss": 1.0011,
|
| 192 |
+
"step": 26
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"epoch": 0.2755102040816326,
|
| 196 |
+
"grad_norm": 322.83502197265625,
|
| 197 |
+
"learning_rate": 2.6530612244897964e-06,
|
| 198 |
+
"loss": 3.5846,
|
| 199 |
+
"step": 27
|
| 200 |
+
},
|
| 201 |
+
{
|
| 202 |
+
"epoch": 0.2857142857142857,
|
| 203 |
+
"grad_norm": 203.38458251953125,
|
| 204 |
+
"learning_rate": 2.7551020408163266e-06,
|
| 205 |
+
"loss": 1.1365,
|
| 206 |
+
"step": 28
|
| 207 |
+
},
|
| 208 |
+
{
|
| 209 |
+
"epoch": 0.29591836734693877,
|
| 210 |
+
"grad_norm": 127.78427124023438,
|
| 211 |
+
"learning_rate": 2.8571428571428573e-06,
|
| 212 |
+
"loss": 0.7149,
|
| 213 |
+
"step": 29
|
| 214 |
+
},
|
| 215 |
+
{
|
| 216 |
+
"epoch": 0.30612244897959184,
|
| 217 |
+
"grad_norm": 283.67645263671875,
|
| 218 |
+
"learning_rate": 2.959183673469388e-06,
|
| 219 |
+
"loss": 1.2629,
|
| 220 |
+
"step": 30
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"epoch": 0.3163265306122449,
|
| 224 |
+
"grad_norm": 82.65542602539062,
|
| 225 |
+
"learning_rate": 3.0612244897959185e-06,
|
| 226 |
+
"loss": 0.6459,
|
| 227 |
+
"step": 31
|
| 228 |
+
},
|
| 229 |
+
{
|
| 230 |
+
"epoch": 0.32653061224489793,
|
| 231 |
+
"grad_norm": 42.66185760498047,
|
| 232 |
+
"learning_rate": 3.1632653061224496e-06,
|
| 233 |
+
"loss": 0.1934,
|
| 234 |
+
"step": 32
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"epoch": 0.336734693877551,
|
| 238 |
+
"grad_norm": 212.1294708251953,
|
| 239 |
+
"learning_rate": 3.2653061224489794e-06,
|
| 240 |
+
"loss": 1.4897,
|
| 241 |
+
"step": 33
|
| 242 |
+
},
|
| 243 |
+
{
|
| 244 |
+
"epoch": 0.3469387755102041,
|
| 245 |
+
"grad_norm": 188.0417022705078,
|
| 246 |
+
"learning_rate": 3.3673469387755105e-06,
|
| 247 |
+
"loss": 0.8561,
|
| 248 |
+
"step": 34
|
| 249 |
+
},
|
| 250 |
+
{
|
| 251 |
+
"epoch": 0.35714285714285715,
|
| 252 |
+
"grad_norm": 2.0467610359191895,
|
| 253 |
+
"learning_rate": 3.469387755102041e-06,
|
| 254 |
+
"loss": 0.0128,
|
| 255 |
+
"step": 35
|
| 256 |
+
},
|
| 257 |
+
{
|
| 258 |
+
"epoch": 0.3673469387755102,
|
| 259 |
+
"grad_norm": 283.3966979980469,
|
| 260 |
+
"learning_rate": 3.5714285714285718e-06,
|
| 261 |
+
"loss": 1.4952,
|
| 262 |
+
"step": 36
|
| 263 |
+
},
|
| 264 |
+
{
|
| 265 |
+
"epoch": 0.37755102040816324,
|
| 266 |
+
"grad_norm": 60.74869155883789,
|
| 267 |
+
"learning_rate": 3.6734693877551024e-06,
|
| 268 |
+
"loss": 0.3181,
|
| 269 |
+
"step": 37
|
| 270 |
+
},
|
| 271 |
+
{
|
| 272 |
+
"epoch": 0.3877551020408163,
|
| 273 |
+
"grad_norm": 824.6165771484375,
|
| 274 |
+
"learning_rate": 3.7755102040816327e-06,
|
| 275 |
+
"loss": 6.3681,
|
| 276 |
+
"step": 38
|
| 277 |
+
},
|
| 278 |
+
{
|
| 279 |
+
"epoch": 0.3979591836734694,
|
| 280 |
+
"grad_norm": 231.1636962890625,
|
| 281 |
+
"learning_rate": 3.877551020408164e-06,
|
| 282 |
+
"loss": 1.4487,
|
| 283 |
+
"step": 39
|
| 284 |
+
},
|
| 285 |
+
{
|
| 286 |
+
"epoch": 0.40816326530612246,
|
| 287 |
+
"grad_norm": 26.46611785888672,
|
| 288 |
+
"learning_rate": 3.979591836734694e-06,
|
| 289 |
+
"loss": 0.1702,
|
| 290 |
+
"step": 40
|
| 291 |
+
},
|
| 292 |
+
{
|
| 293 |
+
"epoch": 0.41836734693877553,
|
| 294 |
+
"grad_norm": 75.88525390625,
|
| 295 |
+
"learning_rate": 4.081632653061225e-06,
|
| 296 |
+
"loss": 0.2513,
|
| 297 |
+
"step": 41
|
| 298 |
+
},
|
| 299 |
+
{
|
| 300 |
+
"epoch": 0.42857142857142855,
|
| 301 |
+
"grad_norm": 465.83392333984375,
|
| 302 |
+
"learning_rate": 4.183673469387755e-06,
|
| 303 |
+
"loss": 4.1595,
|
| 304 |
+
"step": 42
|
| 305 |
+
},
|
| 306 |
+
{
|
| 307 |
+
"epoch": 0.4387755102040816,
|
| 308 |
+
"grad_norm": 306.2772521972656,
|
| 309 |
+
"learning_rate": 4.2857142857142855e-06,
|
| 310 |
+
"loss": 2.7347,
|
| 311 |
+
"step": 43
|
| 312 |
+
},
|
| 313 |
+
{
|
| 314 |
+
"epoch": 0.4489795918367347,
|
| 315 |
+
"grad_norm": 488.9759521484375,
|
| 316 |
+
"learning_rate": 4.3877551020408165e-06,
|
| 317 |
+
"loss": 2.3182,
|
| 318 |
+
"step": 44
|
| 319 |
+
},
|
| 320 |
+
{
|
| 321 |
+
"epoch": 0.45918367346938777,
|
| 322 |
+
"grad_norm": 355.1698913574219,
|
| 323 |
+
"learning_rate": 4.489795918367348e-06,
|
| 324 |
+
"loss": 1.3285,
|
| 325 |
+
"step": 45
|
| 326 |
+
},
|
| 327 |
+
{
|
| 328 |
+
"epoch": 0.46938775510204084,
|
| 329 |
+
"grad_norm": 263.558349609375,
|
| 330 |
+
"learning_rate": 4.591836734693878e-06,
|
| 331 |
+
"loss": 2.1155,
|
| 332 |
+
"step": 46
|
| 333 |
+
},
|
| 334 |
+
{
|
| 335 |
+
"epoch": 0.47959183673469385,
|
| 336 |
+
"grad_norm": 9.667963981628418,
|
| 337 |
+
"learning_rate": 4.693877551020409e-06,
|
| 338 |
+
"loss": 0.0645,
|
| 339 |
+
"step": 47
|
| 340 |
+
},
|
| 341 |
+
{
|
| 342 |
+
"epoch": 0.4897959183673469,
|
| 343 |
+
"grad_norm": 957.79345703125,
|
| 344 |
+
"learning_rate": 4.795918367346939e-06,
|
| 345 |
+
"loss": 7.1283,
|
| 346 |
+
"step": 48
|
| 347 |
+
},
|
| 348 |
+
{
|
| 349 |
+
"epoch": 0.5,
|
| 350 |
+
"grad_norm": 160.0965118408203,
|
| 351 |
+
"learning_rate": 4.897959183673469e-06,
|
| 352 |
+
"loss": 0.711,
|
| 353 |
+
"step": 49
|
| 354 |
+
},
|
| 355 |
+
{
|
| 356 |
+
"epoch": 0.5102040816326531,
|
| 357 |
+
"grad_norm": 93.697265625,
|
| 358 |
+
"learning_rate": 5e-06,
|
| 359 |
+
"loss": 0.4716,
|
| 360 |
+
"step": 50
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"epoch": 0.5204081632653061,
|
| 364 |
+
"grad_norm": 292.9518737792969,
|
| 365 |
+
"learning_rate": 5.1020408163265315e-06,
|
| 366 |
+
"loss": 2.2895,
|
| 367 |
+
"step": 51
|
| 368 |
+
},
|
| 369 |
+
{
|
| 370 |
+
"epoch": 0.5306122448979592,
|
| 371 |
+
"grad_norm": 335.4564514160156,
|
| 372 |
+
"learning_rate": 5.204081632653062e-06,
|
| 373 |
+
"loss": 1.9235,
|
| 374 |
+
"step": 52
|
| 375 |
+
},
|
| 376 |
+
{
|
| 377 |
+
"epoch": 0.5408163265306123,
|
| 378 |
+
"grad_norm": 138.63575744628906,
|
| 379 |
+
"learning_rate": 5.306122448979593e-06,
|
| 380 |
+
"loss": 0.8777,
|
| 381 |
+
"step": 53
|
| 382 |
+
},
|
| 383 |
+
{
|
| 384 |
+
"epoch": 0.5510204081632653,
|
| 385 |
+
"grad_norm": 1.011594533920288,
|
| 386 |
+
"learning_rate": 5.408163265306123e-06,
|
| 387 |
+
"loss": 0.0038,
|
| 388 |
+
"step": 54
|
| 389 |
+
},
|
| 390 |
+
{
|
| 391 |
+
"epoch": 0.5612244897959183,
|
| 392 |
+
"grad_norm": 506.25152587890625,
|
| 393 |
+
"learning_rate": 5.510204081632653e-06,
|
| 394 |
+
"loss": 1.5598,
|
| 395 |
+
"step": 55
|
| 396 |
+
},
|
| 397 |
+
{
|
| 398 |
+
"epoch": 0.5714285714285714,
|
| 399 |
+
"grad_norm": 2.2550530433654785,
|
| 400 |
+
"learning_rate": 5.6122448979591834e-06,
|
| 401 |
+
"loss": 0.0177,
|
| 402 |
+
"step": 56
|
| 403 |
+
},
|
| 404 |
+
{
|
| 405 |
+
"epoch": 0.5816326530612245,
|
| 406 |
+
"grad_norm": 13.93323802947998,
|
| 407 |
+
"learning_rate": 5.7142857142857145e-06,
|
| 408 |
+
"loss": 0.0837,
|
| 409 |
+
"step": 57
|
| 410 |
+
},
|
| 411 |
+
{
|
| 412 |
+
"epoch": 0.5918367346938775,
|
| 413 |
+
"grad_norm": 7.279649257659912,
|
| 414 |
+
"learning_rate": 5.816326530612246e-06,
|
| 415 |
+
"loss": 0.0429,
|
| 416 |
+
"step": 58
|
| 417 |
+
},
|
| 418 |
+
{
|
| 419 |
+
"epoch": 0.6020408163265306,
|
| 420 |
+
"grad_norm": 0.9923371076583862,
|
| 421 |
+
"learning_rate": 5.918367346938776e-06,
|
| 422 |
+
"loss": 0.0071,
|
| 423 |
+
"step": 59
|
| 424 |
+
},
|
| 425 |
+
{
|
| 426 |
+
"epoch": 0.6122448979591837,
|
| 427 |
+
"grad_norm": 743.8301391601562,
|
| 428 |
+
"learning_rate": 6.020408163265307e-06,
|
| 429 |
+
"loss": 2.7217,
|
| 430 |
+
"step": 60
|
| 431 |
+
},
|
| 432 |
+
{
|
| 433 |
+
"epoch": 0.6224489795918368,
|
| 434 |
+
"grad_norm": 227.04403686523438,
|
| 435 |
+
"learning_rate": 6.122448979591837e-06,
|
| 436 |
+
"loss": 3.9013,
|
| 437 |
+
"step": 61
|
| 438 |
+
},
|
| 439 |
+
{
|
| 440 |
+
"epoch": 0.6326530612244898,
|
| 441 |
+
"grad_norm": 193.12701416015625,
|
| 442 |
+
"learning_rate": 6.224489795918368e-06,
|
| 443 |
+
"loss": 1.417,
|
| 444 |
+
"step": 62
|
| 445 |
+
},
|
| 446 |
+
{
|
| 447 |
+
"epoch": 0.6428571428571429,
|
| 448 |
+
"grad_norm": 642.7814331054688,
|
| 449 |
+
"learning_rate": 6.326530612244899e-06,
|
| 450 |
+
"loss": 3.5854,
|
| 451 |
+
"step": 63
|
| 452 |
+
},
|
| 453 |
+
{
|
| 454 |
+
"epoch": 0.6530612244897959,
|
| 455 |
+
"grad_norm": 1007.544189453125,
|
| 456 |
+
"learning_rate": 6.4285714285714295e-06,
|
| 457 |
+
"loss": 12.918,
|
| 458 |
+
"step": 64
|
| 459 |
+
},
|
| 460 |
+
{
|
| 461 |
+
"epoch": 0.6632653061224489,
|
| 462 |
+
"grad_norm": 1310.942138671875,
|
| 463 |
+
"learning_rate": 6.530612244897959e-06,
|
| 464 |
+
"loss": 7.1566,
|
| 465 |
+
"step": 65
|
| 466 |
+
},
|
| 467 |
+
{
|
| 468 |
+
"epoch": 0.673469387755102,
|
| 469 |
+
"grad_norm": 810.1301879882812,
|
| 470 |
+
"learning_rate": 6.63265306122449e-06,
|
| 471 |
+
"loss": 3.9897,
|
| 472 |
+
"step": 66
|
| 473 |
+
},
|
| 474 |
+
{
|
| 475 |
+
"epoch": 0.6836734693877551,
|
| 476 |
+
"grad_norm": 513.1759643554688,
|
| 477 |
+
"learning_rate": 6.734693877551021e-06,
|
| 478 |
+
"loss": 8.1139,
|
| 479 |
+
"step": 67
|
| 480 |
+
},
|
| 481 |
+
{
|
| 482 |
+
"epoch": 0.6938775510204082,
|
| 483 |
+
"grad_norm": 1414.8878173828125,
|
| 484 |
+
"learning_rate": 6.836734693877551e-06,
|
| 485 |
+
"loss": 5.7005,
|
| 486 |
+
"step": 68
|
| 487 |
+
},
|
| 488 |
+
{
|
| 489 |
+
"epoch": 0.7040816326530612,
|
| 490 |
+
"grad_norm": 31.607126235961914,
|
| 491 |
+
"learning_rate": 6.938775510204082e-06,
|
| 492 |
+
"loss": 0.1219,
|
| 493 |
+
"step": 69
|
| 494 |
+
},
|
| 495 |
+
{
|
| 496 |
+
"epoch": 0.7142857142857143,
|
| 497 |
+
"grad_norm": 799.9751586914062,
|
| 498 |
+
"learning_rate": 7.0408163265306125e-06,
|
| 499 |
+
"loss": 5.7849,
|
| 500 |
+
"step": 70
|
| 501 |
+
},
|
| 502 |
+
{
|
| 503 |
+
"epoch": 0.7244897959183674,
|
| 504 |
+
"grad_norm": 132.71778869628906,
|
| 505 |
+
"learning_rate": 7.1428571428571436e-06,
|
| 506 |
+
"loss": 1.0726,
|
| 507 |
+
"step": 71
|
| 508 |
+
},
|
| 509 |
+
{
|
| 510 |
+
"epoch": 0.7346938775510204,
|
| 511 |
+
"grad_norm": 256.61041259765625,
|
| 512 |
+
"learning_rate": 7.244897959183675e-06,
|
| 513 |
+
"loss": 1.2599,
|
| 514 |
+
"step": 72
|
| 515 |
+
},
|
| 516 |
+
{
|
| 517 |
+
"epoch": 0.7448979591836735,
|
| 518 |
+
"grad_norm": 192.0435333251953,
|
| 519 |
+
"learning_rate": 7.346938775510205e-06,
|
| 520 |
+
"loss": 0.6473,
|
| 521 |
+
"step": 73
|
| 522 |
+
},
|
| 523 |
+
{
|
| 524 |
+
"epoch": 0.7551020408163265,
|
| 525 |
+
"grad_norm": 293.7915954589844,
|
| 526 |
+
"learning_rate": 7.448979591836736e-06,
|
| 527 |
+
"loss": 1.0397,
|
| 528 |
+
"step": 74
|
| 529 |
+
},
|
| 530 |
+
{
|
| 531 |
+
"epoch": 0.7653061224489796,
|
| 532 |
+
"grad_norm": 312.2645263671875,
|
| 533 |
+
"learning_rate": 7.551020408163265e-06,
|
| 534 |
+
"loss": 1.5555,
|
| 535 |
+
"step": 75
|
| 536 |
+
},
|
| 537 |
+
{
|
| 538 |
+
"epoch": 0.7755102040816326,
|
| 539 |
+
"grad_norm": 1.417815923690796,
|
| 540 |
+
"learning_rate": 7.653061224489796e-06,
|
| 541 |
+
"loss": 0.0078,
|
| 542 |
+
"step": 76
|
| 543 |
+
},
|
| 544 |
+
{
|
| 545 |
+
"epoch": 0.7857142857142857,
|
| 546 |
+
"grad_norm": 1.4391653537750244,
|
| 547 |
+
"learning_rate": 7.755102040816327e-06,
|
| 548 |
+
"loss": 0.0048,
|
| 549 |
+
"step": 77
|
| 550 |
+
},
|
| 551 |
+
{
|
| 552 |
+
"epoch": 0.7959183673469388,
|
| 553 |
+
"grad_norm": 5.628185749053955,
|
| 554 |
+
"learning_rate": 7.857142857142858e-06,
|
| 555 |
+
"loss": 0.0323,
|
| 556 |
+
"step": 78
|
| 557 |
+
},
|
| 558 |
+
{
|
| 559 |
+
"epoch": 0.8061224489795918,
|
| 560 |
+
"grad_norm": 264.5353698730469,
|
| 561 |
+
"learning_rate": 7.959183673469388e-06,
|
| 562 |
+
"loss": 1.7425,
|
| 563 |
+
"step": 79
|
| 564 |
+
},
|
| 565 |
+
{
|
| 566 |
+
"epoch": 0.8163265306122449,
|
| 567 |
+
"grad_norm": 1.5278851985931396,
|
| 568 |
+
"learning_rate": 8.06122448979592e-06,
|
| 569 |
+
"loss": 0.0035,
|
| 570 |
+
"step": 80
|
| 571 |
+
},
|
| 572 |
+
{
|
| 573 |
+
"epoch": 0.826530612244898,
|
| 574 |
+
"grad_norm": 932.3336181640625,
|
| 575 |
+
"learning_rate": 8.16326530612245e-06,
|
| 576 |
+
"loss": 6.4849,
|
| 577 |
+
"step": 81
|
| 578 |
+
},
|
| 579 |
+
{
|
| 580 |
+
"epoch": 0.8367346938775511,
|
| 581 |
+
"grad_norm": 635.4749145507812,
|
| 582 |
+
"learning_rate": 8.26530612244898e-06,
|
| 583 |
+
"loss": 4.3767,
|
| 584 |
+
"step": 82
|
| 585 |
+
},
|
| 586 |
+
{
|
| 587 |
+
"epoch": 0.8469387755102041,
|
| 588 |
+
"grad_norm": 8.875201225280762,
|
| 589 |
+
"learning_rate": 8.36734693877551e-06,
|
| 590 |
+
"loss": 0.0186,
|
| 591 |
+
"step": 83
|
| 592 |
+
},
|
| 593 |
+
{
|
| 594 |
+
"epoch": 0.8571428571428571,
|
| 595 |
+
"grad_norm": 0.15500876307487488,
|
| 596 |
+
"learning_rate": 8.469387755102042e-06,
|
| 597 |
+
"loss": 0.0008,
|
| 598 |
+
"step": 84
|
| 599 |
+
},
|
| 600 |
+
{
|
| 601 |
+
"epoch": 0.8673469387755102,
|
| 602 |
+
"grad_norm": 269.5357666015625,
|
| 603 |
+
"learning_rate": 8.571428571428571e-06,
|
| 604 |
+
"loss": 0.8354,
|
| 605 |
+
"step": 85
|
| 606 |
+
},
|
| 607 |
+
{
|
| 608 |
+
"epoch": 0.8775510204081632,
|
| 609 |
+
"grad_norm": 5.054287910461426,
|
| 610 |
+
"learning_rate": 8.673469387755103e-06,
|
| 611 |
+
"loss": 0.0162,
|
| 612 |
+
"step": 86
|
| 613 |
+
},
|
| 614 |
+
{
|
| 615 |
+
"epoch": 0.8877551020408163,
|
| 616 |
+
"grad_norm": 84.90735626220703,
|
| 617 |
+
"learning_rate": 8.775510204081633e-06,
|
| 618 |
+
"loss": 0.1282,
|
| 619 |
+
"step": 87
|
| 620 |
+
},
|
| 621 |
+
{
|
| 622 |
+
"epoch": 0.8979591836734694,
|
| 623 |
+
"grad_norm": 81.53719329833984,
|
| 624 |
+
"learning_rate": 8.877551020408163e-06,
|
| 625 |
+
"loss": 0.4514,
|
| 626 |
+
"step": 88
|
| 627 |
+
},
|
| 628 |
+
{
|
| 629 |
+
"epoch": 0.9081632653061225,
|
| 630 |
+
"grad_norm": 547.4005126953125,
|
| 631 |
+
"learning_rate": 8.979591836734695e-06,
|
| 632 |
+
"loss": 4.9103,
|
| 633 |
+
"step": 89
|
| 634 |
+
},
|
| 635 |
+
{
|
| 636 |
+
"epoch": 0.9183673469387755,
|
| 637 |
+
"grad_norm": 25.792213439941406,
|
| 638 |
+
"learning_rate": 9.081632653061225e-06,
|
| 639 |
+
"loss": 0.0762,
|
| 640 |
+
"step": 90
|
| 641 |
+
},
|
| 642 |
+
{
|
| 643 |
+
"epoch": 0.9285714285714286,
|
| 644 |
+
"grad_norm": 10.455421447753906,
|
| 645 |
+
"learning_rate": 9.183673469387756e-06,
|
| 646 |
+
"loss": 0.0444,
|
| 647 |
+
"step": 91
|
| 648 |
+
},
|
| 649 |
+
{
|
| 650 |
+
"epoch": 0.9387755102040817,
|
| 651 |
+
"grad_norm": 472.54376220703125,
|
| 652 |
+
"learning_rate": 9.285714285714288e-06,
|
| 653 |
+
"loss": 1.8609,
|
| 654 |
+
"step": 92
|
| 655 |
+
},
|
| 656 |
+
{
|
| 657 |
+
"epoch": 0.9489795918367347,
|
| 658 |
+
"grad_norm": 31.092357635498047,
|
| 659 |
+
"learning_rate": 9.387755102040818e-06,
|
| 660 |
+
"loss": 0.1489,
|
| 661 |
+
"step": 93
|
| 662 |
+
},
|
| 663 |
+
{
|
| 664 |
+
"epoch": 0.9591836734693877,
|
| 665 |
+
"grad_norm": 231.94151306152344,
|
| 666 |
+
"learning_rate": 9.489795918367348e-06,
|
| 667 |
+
"loss": 0.5926,
|
| 668 |
+
"step": 94
|
| 669 |
+
},
|
| 670 |
+
{
|
| 671 |
+
"epoch": 0.9693877551020408,
|
| 672 |
+
"grad_norm": 211.05117797851562,
|
| 673 |
+
"learning_rate": 9.591836734693878e-06,
|
| 674 |
+
"loss": 0.5344,
|
| 675 |
+
"step": 95
|
| 676 |
+
},
|
| 677 |
+
{
|
| 678 |
+
"epoch": 0.9795918367346939,
|
| 679 |
+
"grad_norm": 217.01339721679688,
|
| 680 |
+
"learning_rate": 9.693877551020408e-06,
|
| 681 |
+
"loss": 0.4693,
|
| 682 |
+
"step": 96
|
| 683 |
+
},
|
| 684 |
+
{
|
| 685 |
+
"epoch": 0.9897959183673469,
|
| 686 |
+
"grad_norm": 1123.96484375,
|
| 687 |
+
"learning_rate": 9.795918367346939e-06,
|
| 688 |
+
"loss": 9.2282,
|
| 689 |
+
"step": 97
|
| 690 |
+
},
|
| 691 |
+
{
|
| 692 |
+
"epoch": 1.0,
|
| 693 |
+
"grad_norm": 741.597412109375,
|
| 694 |
+
"learning_rate": 9.89795918367347e-06,
|
| 695 |
+
"loss": 4.6238,
|
| 696 |
+
"step": 98
|
| 697 |
+
},
|
| 698 |
+
{
|
| 699 |
+
"epoch": 1.0,
|
| 700 |
+
"eval_dim_1024_cosine_accuracy@1": 0.36235595390524966,
|
| 701 |
+
"eval_dim_1024_cosine_accuracy@10": 0.4334186939820743,
|
| 702 |
+
"eval_dim_1024_cosine_accuracy@3": 0.3681177976952625,
|
| 703 |
+
"eval_dim_1024_cosine_accuracy@5": 0.39308578745198464,
|
| 704 |
+
"eval_dim_1024_cosine_map@100": 0.45394800707643057,
|
| 705 |
+
"eval_dim_1024_cosine_mrr@10": 0.37430415828303115,
|
| 706 |
+
"eval_dim_1024_cosine_ndcg@10": 0.3858809020056271,
|
| 707 |
+
"eval_dim_1024_cosine_precision@1": 0.36235595390524966,
|
| 708 |
+
"eval_dim_1024_cosine_precision@10": 0.3176696542893726,
|
| 709 |
+
"eval_dim_1024_cosine_precision@3": 0.36192915066154496,
|
| 710 |
+
"eval_dim_1024_cosine_precision@5": 0.35172855313700385,
|
| 711 |
+
"eval_dim_1024_cosine_recall@1": 0.04346309464734114,
|
| 712 |
+
"eval_dim_1024_cosine_recall@10": 0.28096984500258326,
|
| 713 |
+
"eval_dim_1024_cosine_recall@3": 0.12757812796185336,
|
| 714 |
+
"eval_dim_1024_cosine_recall@5": 0.19200836801442767,
|
| 715 |
+
"eval_dim_128_cosine_accuracy@1": 0.3085787451984635,
|
| 716 |
+
"eval_dim_128_cosine_accuracy@10": 0.37964148527528807,
|
| 717 |
+
"eval_dim_128_cosine_accuracy@3": 0.31241997439180536,
|
| 718 |
+
"eval_dim_128_cosine_accuracy@5": 0.3361075544174136,
|
| 719 |
+
"eval_dim_128_cosine_map@100": 0.3963095303049961,
|
| 720 |
+
"eval_dim_128_cosine_mrr@10": 0.3199812511432227,
|
| 721 |
+
"eval_dim_128_cosine_ndcg@10": 0.3312285498294292,
|
| 722 |
+
"eval_dim_128_cosine_precision@1": 0.3085787451984635,
|
| 723 |
+
"eval_dim_128_cosine_precision@10": 0.2752880921895006,
|
| 724 |
+
"eval_dim_128_cosine_precision@3": 0.3079385403329065,
|
| 725 |
+
"eval_dim_128_cosine_precision@5": 0.29961587708066584,
|
| 726 |
+
"eval_dim_128_cosine_recall@1": 0.036297623853982414,
|
| 727 |
+
"eval_dim_128_cosine_recall@10": 0.24000960695821508,
|
| 728 |
+
"eval_dim_128_cosine_recall@3": 0.10638786483158841,
|
| 729 |
+
"eval_dim_128_cosine_recall@5": 0.16032639984514846,
|
| 730 |
+
"eval_dim_256_cosine_accuracy@1": 0.3437900128040973,
|
| 731 |
+
"eval_dim_256_cosine_accuracy@10": 0.41101152368758004,
|
| 732 |
+
"eval_dim_256_cosine_accuracy@3": 0.34763124199743917,
|
| 733 |
+
"eval_dim_256_cosine_accuracy@5": 0.3764404609475032,
|
| 734 |
+
"eval_dim_256_cosine_map@100": 0.4298669852983799,
|
| 735 |
+
"eval_dim_256_cosine_mrr@10": 0.3551361197487955,
|
| 736 |
+
"eval_dim_256_cosine_ndcg@10": 0.3670052960875804,
|
| 737 |
+
"eval_dim_256_cosine_precision@1": 0.3437900128040973,
|
| 738 |
+
"eval_dim_256_cosine_precision@10": 0.3040973111395647,
|
| 739 |
+
"eval_dim_256_cosine_precision@3": 0.342936406316688,
|
| 740 |
+
"eval_dim_256_cosine_precision@5": 0.33457106274007686,
|
| 741 |
+
"eval_dim_256_cosine_recall@1": 0.04013102608834382,
|
| 742 |
+
"eval_dim_256_cosine_recall@10": 0.2648598688529433,
|
| 743 |
+
"eval_dim_256_cosine_recall@3": 0.11771735023719074,
|
| 744 |
+
"eval_dim_256_cosine_recall@5": 0.17837935755014916,
|
| 745 |
+
"eval_dim_512_cosine_accuracy@1": 0.35979513444302175,
|
| 746 |
+
"eval_dim_512_cosine_accuracy@10": 0.4334186939820743,
|
| 747 |
+
"eval_dim_512_cosine_accuracy@3": 0.36555697823303457,
|
| 748 |
+
"eval_dim_512_cosine_accuracy@5": 0.3911651728553137,
|
| 749 |
+
"eval_dim_512_cosine_map@100": 0.4476805587612892,
|
| 750 |
+
"eval_dim_512_cosine_mrr@10": 0.37212542934373866,
|
| 751 |
+
"eval_dim_512_cosine_ndcg@10": 0.3843750966464458,
|
| 752 |
+
"eval_dim_512_cosine_precision@1": 0.35979513444302175,
|
| 753 |
+
"eval_dim_512_cosine_precision@10": 0.3173495518565941,
|
| 754 |
+
"eval_dim_512_cosine_precision@3": 0.35936833119931705,
|
| 755 |
+
"eval_dim_512_cosine_precision@5": 0.34967989756722156,
|
| 756 |
+
"eval_dim_512_cosine_recall@1": 0.04265405128130224,
|
| 757 |
+
"eval_dim_512_cosine_recall@10": 0.2781876565001863,
|
| 758 |
+
"eval_dim_512_cosine_recall@3": 0.12523102347193127,
|
| 759 |
+
"eval_dim_512_cosine_recall@5": 0.18912519336740205,
|
| 760 |
+
"eval_dim_64_cosine_accuracy@1": 0.2740076824583867,
|
| 761 |
+
"eval_dim_64_cosine_accuracy@10": 0.3354673495518566,
|
| 762 |
+
"eval_dim_64_cosine_accuracy@3": 0.27848911651728553,
|
| 763 |
+
"eval_dim_64_cosine_accuracy@5": 0.30153649167733676,
|
| 764 |
+
"eval_dim_64_cosine_map@100": 0.3539045084602349,
|
| 765 |
+
"eval_dim_64_cosine_mrr@10": 0.28429414873076814,
|
| 766 |
+
"eval_dim_64_cosine_ndcg@10": 0.29402896525927075,
|
| 767 |
+
"eval_dim_64_cosine_precision@1": 0.2740076824583867,
|
| 768 |
+
"eval_dim_64_cosine_precision@10": 0.24571062740076827,
|
| 769 |
+
"eval_dim_64_cosine_precision@3": 0.27315407597097735,
|
| 770 |
+
"eval_dim_64_cosine_precision@5": 0.2670934699103713,
|
| 771 |
+
"eval_dim_64_cosine_recall@1": 0.03167890172057568,
|
| 772 |
+
"eval_dim_64_cosine_recall@10": 0.21092883720941633,
|
| 773 |
+
"eval_dim_64_cosine_recall@3": 0.09267023360511464,
|
| 774 |
+
"eval_dim_64_cosine_recall@5": 0.14048625468314752,
|
| 775 |
+
"eval_dim_768_cosine_accuracy@1": 0.3591549295774648,
|
| 776 |
+
"eval_dim_768_cosine_accuracy@10": 0.4334186939820743,
|
| 777 |
+
"eval_dim_768_cosine_accuracy@3": 0.3649167733674776,
|
| 778 |
+
"eval_dim_768_cosine_accuracy@5": 0.3892445582586428,
|
| 779 |
+
"eval_dim_768_cosine_map@100": 0.4493001842217619,
|
| 780 |
+
"eval_dim_768_cosine_mrr@10": 0.37149335406377615,
|
| 781 |
+
"eval_dim_768_cosine_ndcg@10": 0.38308181752122755,
|
| 782 |
+
"eval_dim_768_cosine_precision@1": 0.3591549295774648,
|
| 783 |
+
"eval_dim_768_cosine_precision@10": 0.31670934699103714,
|
| 784 |
+
"eval_dim_768_cosine_precision@3": 0.3587281263337601,
|
| 785 |
+
"eval_dim_768_cosine_precision@5": 0.34852752880921894,
|
| 786 |
+
"eval_dim_768_cosine_recall@1": 0.04250079684114586,
|
| 787 |
+
"eval_dim_768_cosine_recall@10": 0.27695909667507057,
|
| 788 |
+
"eval_dim_768_cosine_recall@3": 0.12462187901616553,
|
| 789 |
+
"eval_dim_768_cosine_recall@5": 0.1875478484365334,
|
| 790 |
+
"eval_runtime": 99.0843,
|
| 791 |
+
"eval_samples_per_second": 0.0,
|
| 792 |
+
"eval_sequential_score": 0.29402896525927075,
|
| 793 |
+
"eval_steps_per_second": 0.0,
|
| 794 |
+
"step": 98
|
| 795 |
+
}
|
| 796 |
+
],
|
| 797 |
+
"logging_steps": 1,
|
| 798 |
+
"max_steps": 1960,
|
| 799 |
+
"num_input_tokens_seen": 0,
|
| 800 |
+
"num_train_epochs": 20,
|
| 801 |
+
"save_steps": 500,
|
| 802 |
+
"stateful_callbacks": {
|
| 803 |
+
"EarlyStoppingCallback": {
|
| 804 |
+
"args": {
|
| 805 |
+
"early_stopping_patience": 2,
|
| 806 |
+
"early_stopping_threshold": 0.0
|
| 807 |
+
},
|
| 808 |
+
"attributes": {
|
| 809 |
+
"early_stopping_patience_counter": 0
|
| 810 |
+
}
|
| 811 |
+
},
|
| 812 |
+
"TrainerControl": {
|
| 813 |
+
"args": {
|
| 814 |
+
"should_epoch_stop": false,
|
| 815 |
+
"should_evaluate": false,
|
| 816 |
+
"should_log": false,
|
| 817 |
+
"should_save": true,
|
| 818 |
+
"should_training_stop": false
|
| 819 |
+
},
|
| 820 |
+
"attributes": {}
|
| 821 |
+
}
|
| 822 |
+
},
|
| 823 |
+
"total_flos": 0.0,
|
| 824 |
+
"train_batch_size": 2,
|
| 825 |
+
"trial_name": null,
|
| 826 |
+
"trial_params": null
|
| 827 |
+
}
|