IoannisKat1 commited on
Commit
731a827
·
verified ·
1 Parent(s): 8a19feb

Add finetuned model

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +4 -0
  2. 1_Pooling/config.json +10 -0
  3. README.md +0 -0
  4. checkpoint-196/1_Pooling/config.json +10 -0
  5. checkpoint-196/README.md +0 -0
  6. checkpoint-196/config.json +27 -0
  7. checkpoint-196/config_sentence_transformers.json +14 -0
  8. checkpoint-196/model.safetensors +3 -0
  9. checkpoint-196/modules.json +20 -0
  10. checkpoint-196/optimizer.pt +3 -0
  11. checkpoint-196/rng_state.pth +3 -0
  12. checkpoint-196/scheduler.pt +3 -0
  13. checkpoint-196/sentence_bert_config.json +4 -0
  14. checkpoint-196/sentencepiece.bpe.model +3 -0
  15. checkpoint-196/special_tokens_map.json +51 -0
  16. checkpoint-196/tokenizer.json +3 -0
  17. checkpoint-196/tokenizer_config.json +62 -0
  18. checkpoint-196/trainer_state.json +1611 -0
  19. checkpoint-196/training_args.bin +3 -0
  20. checkpoint-294/1_Pooling/config.json +10 -0
  21. checkpoint-294/README.md +0 -0
  22. checkpoint-294/config.json +27 -0
  23. checkpoint-294/config_sentence_transformers.json +14 -0
  24. checkpoint-294/model.safetensors +3 -0
  25. checkpoint-294/modules.json +20 -0
  26. checkpoint-294/optimizer.pt +3 -0
  27. checkpoint-294/rng_state.pth +3 -0
  28. checkpoint-294/scheduler.pt +3 -0
  29. checkpoint-294/sentence_bert_config.json +4 -0
  30. checkpoint-294/sentencepiece.bpe.model +3 -0
  31. checkpoint-294/special_tokens_map.json +51 -0
  32. checkpoint-294/tokenizer.json +3 -0
  33. checkpoint-294/tokenizer_config.json +62 -0
  34. checkpoint-294/trainer_state.json +2395 -0
  35. checkpoint-294/training_args.bin +3 -0
  36. checkpoint-98/1_Pooling/config.json +10 -0
  37. checkpoint-98/README.md +1621 -0
  38. checkpoint-98/config.json +27 -0
  39. checkpoint-98/config_sentence_transformers.json +14 -0
  40. checkpoint-98/model.safetensors +3 -0
  41. checkpoint-98/modules.json +20 -0
  42. checkpoint-98/optimizer.pt +3 -0
  43. checkpoint-98/rng_state.pth +3 -0
  44. checkpoint-98/scheduler.pt +3 -0
  45. checkpoint-98/sentence_bert_config.json +4 -0
  46. checkpoint-98/sentencepiece.bpe.model +3 -0
  47. checkpoint-98/special_tokens_map.json +51 -0
  48. checkpoint-98/tokenizer.json +3 -0
  49. checkpoint-98/tokenizer_config.json +62 -0
  50. checkpoint-98/trainer_state.json +827 -0
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ checkpoint-196/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ checkpoint-294/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ checkpoint-98/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 1024,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-196/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 1024,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-196/README.md ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-196/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "XLMRobertaModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 4096,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 514,
16
+ "model_type": "xlm-roberta",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 24,
19
+ "output_past": true,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.51.3",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 250002
27
+ }
checkpoint-196/config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "SentenceTransformer",
3
+ "__version__": {
4
+ "sentence_transformers": "5.1.0",
5
+ "transformers": "4.51.3",
6
+ "pytorch": "2.8.0+cu126"
7
+ },
8
+ "prompts": {
9
+ "query": "",
10
+ "document": ""
11
+ },
12
+ "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
14
+ }
checkpoint-196/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86fee9b378922f1db9f68cf51a4941e02dfb183276ac89a16c8edbed98e30b9e
3
+ size 2239607176
checkpoint-196/modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
checkpoint-196/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c03ddaba15f7dd601cacfb32507cfd2ceb73de3b6a0540a718091ec7eb2e678
3
+ size 4471067142
checkpoint-196/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa08bd9dd367cde376d15e8b982d14cd6729eae58ce75d651531d783eb6f5977
3
+ size 14645
checkpoint-196/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9df0dbf279a8d0448c4e72f58a6993855330283a91196a4153f7f62130d091c0
3
+ size 1465
checkpoint-196/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
checkpoint-196/sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
checkpoint-196/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
checkpoint-196/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:883b037111086fd4dfebbbc9b7cee11e1517b5e0c0514879478661440f137085
3
+ size 17082987
checkpoint-196/tokenizer_config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "extra_special_tokens": {},
49
+ "mask_token": "<mask>",
50
+ "max_length": 512,
51
+ "model_max_length": 512,
52
+ "pad_to_multiple_of": null,
53
+ "pad_token": "<pad>",
54
+ "pad_token_type_id": 0,
55
+ "padding_side": "right",
56
+ "sep_token": "</s>",
57
+ "stride": 0,
58
+ "tokenizer_class": "XLMRobertaTokenizer",
59
+ "truncation_side": "right",
60
+ "truncation_strategy": "longest_first",
61
+ "unk_token": "<unk>"
62
+ }
checkpoint-196/trainer_state.json ADDED
@@ -0,0 +1,1611 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 98,
3
+ "best_metric": 0.3312285498294292,
4
+ "best_model_checkpoint": "intfloat/multilingual-e5-large/checkpoint-98",
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 196,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.01020408163265306,
14
+ "grad_norm": 973.273681640625,
15
+ "learning_rate": 0.0,
16
+ "loss": 15.8588,
17
+ "step": 1
18
+ },
19
+ {
20
+ "epoch": 0.02040816326530612,
21
+ "grad_norm": 1016.8517456054688,
22
+ "learning_rate": 1.0204081632653061e-07,
23
+ "loss": 10.7411,
24
+ "step": 2
25
+ },
26
+ {
27
+ "epoch": 0.030612244897959183,
28
+ "grad_norm": 166.88465881347656,
29
+ "learning_rate": 2.0408163265306121e-07,
30
+ "loss": 1.3873,
31
+ "step": 3
32
+ },
33
+ {
34
+ "epoch": 0.04081632653061224,
35
+ "grad_norm": 108.06741333007812,
36
+ "learning_rate": 3.0612244897959183e-07,
37
+ "loss": 0.9088,
38
+ "step": 4
39
+ },
40
+ {
41
+ "epoch": 0.05102040816326531,
42
+ "grad_norm": 1.1959134340286255,
43
+ "learning_rate": 4.0816326530612243e-07,
44
+ "loss": 0.0077,
45
+ "step": 5
46
+ },
47
+ {
48
+ "epoch": 0.061224489795918366,
49
+ "grad_norm": 130.83908081054688,
50
+ "learning_rate": 5.102040816326531e-07,
51
+ "loss": 0.6016,
52
+ "step": 6
53
+ },
54
+ {
55
+ "epoch": 0.07142857142857142,
56
+ "grad_norm": 318.3863525390625,
57
+ "learning_rate": 6.122448979591837e-07,
58
+ "loss": 1.6714,
59
+ "step": 7
60
+ },
61
+ {
62
+ "epoch": 0.08163265306122448,
63
+ "grad_norm": 74.26002502441406,
64
+ "learning_rate": 7.142857142857143e-07,
65
+ "loss": 0.4211,
66
+ "step": 8
67
+ },
68
+ {
69
+ "epoch": 0.09183673469387756,
70
+ "grad_norm": 32.4500846862793,
71
+ "learning_rate": 8.163265306122449e-07,
72
+ "loss": 0.1996,
73
+ "step": 9
74
+ },
75
+ {
76
+ "epoch": 0.10204081632653061,
77
+ "grad_norm": 41.27345275878906,
78
+ "learning_rate": 9.183673469387756e-07,
79
+ "loss": 0.1895,
80
+ "step": 10
81
+ },
82
+ {
83
+ "epoch": 0.11224489795918367,
84
+ "grad_norm": 27.35291862487793,
85
+ "learning_rate": 1.0204081632653063e-06,
86
+ "loss": 0.1358,
87
+ "step": 11
88
+ },
89
+ {
90
+ "epoch": 0.12244897959183673,
91
+ "grad_norm": 103.75244903564453,
92
+ "learning_rate": 1.122448979591837e-06,
93
+ "loss": 0.5552,
94
+ "step": 12
95
+ },
96
+ {
97
+ "epoch": 0.1326530612244898,
98
+ "grad_norm": 155.97923278808594,
99
+ "learning_rate": 1.2244897959183673e-06,
100
+ "loss": 0.5141,
101
+ "step": 13
102
+ },
103
+ {
104
+ "epoch": 0.14285714285714285,
105
+ "grad_norm": 53.757484436035156,
106
+ "learning_rate": 1.3265306122448982e-06,
107
+ "loss": 0.1955,
108
+ "step": 14
109
+ },
110
+ {
111
+ "epoch": 0.15306122448979592,
112
+ "grad_norm": 175.17491149902344,
113
+ "learning_rate": 1.4285714285714286e-06,
114
+ "loss": 1.9114,
115
+ "step": 15
116
+ },
117
+ {
118
+ "epoch": 0.16326530612244897,
119
+ "grad_norm": 49.02252197265625,
120
+ "learning_rate": 1.5306122448979593e-06,
121
+ "loss": 0.2645,
122
+ "step": 16
123
+ },
124
+ {
125
+ "epoch": 0.17346938775510204,
126
+ "grad_norm": 999.3756103515625,
127
+ "learning_rate": 1.6326530612244897e-06,
128
+ "loss": 7.5545,
129
+ "step": 17
130
+ },
131
+ {
132
+ "epoch": 0.1836734693877551,
133
+ "grad_norm": 149.2627410888672,
134
+ "learning_rate": 1.7346938775510206e-06,
135
+ "loss": 0.4297,
136
+ "step": 18
137
+ },
138
+ {
139
+ "epoch": 0.19387755102040816,
140
+ "grad_norm": 204.95181274414062,
141
+ "learning_rate": 1.8367346938775512e-06,
142
+ "loss": 0.678,
143
+ "step": 19
144
+ },
145
+ {
146
+ "epoch": 0.20408163265306123,
147
+ "grad_norm": 103.94851684570312,
148
+ "learning_rate": 1.938775510204082e-06,
149
+ "loss": 0.4634,
150
+ "step": 20
151
+ },
152
+ {
153
+ "epoch": 0.21428571428571427,
154
+ "grad_norm": 536.7100219726562,
155
+ "learning_rate": 2.0408163265306125e-06,
156
+ "loss": 4.2252,
157
+ "step": 21
158
+ },
159
+ {
160
+ "epoch": 0.22448979591836735,
161
+ "grad_norm": 444.44805908203125,
162
+ "learning_rate": 2.1428571428571427e-06,
163
+ "loss": 3.9985,
164
+ "step": 22
165
+ },
166
+ {
167
+ "epoch": 0.23469387755102042,
168
+ "grad_norm": 170.50369262695312,
169
+ "learning_rate": 2.244897959183674e-06,
170
+ "loss": 1.9242,
171
+ "step": 23
172
+ },
173
+ {
174
+ "epoch": 0.24489795918367346,
175
+ "grad_norm": 626.5487060546875,
176
+ "learning_rate": 2.3469387755102044e-06,
177
+ "loss": 3.2716,
178
+ "step": 24
179
+ },
180
+ {
181
+ "epoch": 0.25510204081632654,
182
+ "grad_norm": 51.353050231933594,
183
+ "learning_rate": 2.4489795918367347e-06,
184
+ "loss": 0.123,
185
+ "step": 25
186
+ },
187
+ {
188
+ "epoch": 0.2653061224489796,
189
+ "grad_norm": 108.25341796875,
190
+ "learning_rate": 2.5510204081632657e-06,
191
+ "loss": 1.0011,
192
+ "step": 26
193
+ },
194
+ {
195
+ "epoch": 0.2755102040816326,
196
+ "grad_norm": 322.83502197265625,
197
+ "learning_rate": 2.6530612244897964e-06,
198
+ "loss": 3.5846,
199
+ "step": 27
200
+ },
201
+ {
202
+ "epoch": 0.2857142857142857,
203
+ "grad_norm": 203.38458251953125,
204
+ "learning_rate": 2.7551020408163266e-06,
205
+ "loss": 1.1365,
206
+ "step": 28
207
+ },
208
+ {
209
+ "epoch": 0.29591836734693877,
210
+ "grad_norm": 127.78427124023438,
211
+ "learning_rate": 2.8571428571428573e-06,
212
+ "loss": 0.7149,
213
+ "step": 29
214
+ },
215
+ {
216
+ "epoch": 0.30612244897959184,
217
+ "grad_norm": 283.67645263671875,
218
+ "learning_rate": 2.959183673469388e-06,
219
+ "loss": 1.2629,
220
+ "step": 30
221
+ },
222
+ {
223
+ "epoch": 0.3163265306122449,
224
+ "grad_norm": 82.65542602539062,
225
+ "learning_rate": 3.0612244897959185e-06,
226
+ "loss": 0.6459,
227
+ "step": 31
228
+ },
229
+ {
230
+ "epoch": 0.32653061224489793,
231
+ "grad_norm": 42.66185760498047,
232
+ "learning_rate": 3.1632653061224496e-06,
233
+ "loss": 0.1934,
234
+ "step": 32
235
+ },
236
+ {
237
+ "epoch": 0.336734693877551,
238
+ "grad_norm": 212.1294708251953,
239
+ "learning_rate": 3.2653061224489794e-06,
240
+ "loss": 1.4897,
241
+ "step": 33
242
+ },
243
+ {
244
+ "epoch": 0.3469387755102041,
245
+ "grad_norm": 188.0417022705078,
246
+ "learning_rate": 3.3673469387755105e-06,
247
+ "loss": 0.8561,
248
+ "step": 34
249
+ },
250
+ {
251
+ "epoch": 0.35714285714285715,
252
+ "grad_norm": 2.0467610359191895,
253
+ "learning_rate": 3.469387755102041e-06,
254
+ "loss": 0.0128,
255
+ "step": 35
256
+ },
257
+ {
258
+ "epoch": 0.3673469387755102,
259
+ "grad_norm": 283.3966979980469,
260
+ "learning_rate": 3.5714285714285718e-06,
261
+ "loss": 1.4952,
262
+ "step": 36
263
+ },
264
+ {
265
+ "epoch": 0.37755102040816324,
266
+ "grad_norm": 60.74869155883789,
267
+ "learning_rate": 3.6734693877551024e-06,
268
+ "loss": 0.3181,
269
+ "step": 37
270
+ },
271
+ {
272
+ "epoch": 0.3877551020408163,
273
+ "grad_norm": 824.6165771484375,
274
+ "learning_rate": 3.7755102040816327e-06,
275
+ "loss": 6.3681,
276
+ "step": 38
277
+ },
278
+ {
279
+ "epoch": 0.3979591836734694,
280
+ "grad_norm": 231.1636962890625,
281
+ "learning_rate": 3.877551020408164e-06,
282
+ "loss": 1.4487,
283
+ "step": 39
284
+ },
285
+ {
286
+ "epoch": 0.40816326530612246,
287
+ "grad_norm": 26.46611785888672,
288
+ "learning_rate": 3.979591836734694e-06,
289
+ "loss": 0.1702,
290
+ "step": 40
291
+ },
292
+ {
293
+ "epoch": 0.41836734693877553,
294
+ "grad_norm": 75.88525390625,
295
+ "learning_rate": 4.081632653061225e-06,
296
+ "loss": 0.2513,
297
+ "step": 41
298
+ },
299
+ {
300
+ "epoch": 0.42857142857142855,
301
+ "grad_norm": 465.83392333984375,
302
+ "learning_rate": 4.183673469387755e-06,
303
+ "loss": 4.1595,
304
+ "step": 42
305
+ },
306
+ {
307
+ "epoch": 0.4387755102040816,
308
+ "grad_norm": 306.2772521972656,
309
+ "learning_rate": 4.2857142857142855e-06,
310
+ "loss": 2.7347,
311
+ "step": 43
312
+ },
313
+ {
314
+ "epoch": 0.4489795918367347,
315
+ "grad_norm": 488.9759521484375,
316
+ "learning_rate": 4.3877551020408165e-06,
317
+ "loss": 2.3182,
318
+ "step": 44
319
+ },
320
+ {
321
+ "epoch": 0.45918367346938777,
322
+ "grad_norm": 355.1698913574219,
323
+ "learning_rate": 4.489795918367348e-06,
324
+ "loss": 1.3285,
325
+ "step": 45
326
+ },
327
+ {
328
+ "epoch": 0.46938775510204084,
329
+ "grad_norm": 263.558349609375,
330
+ "learning_rate": 4.591836734693878e-06,
331
+ "loss": 2.1155,
332
+ "step": 46
333
+ },
334
+ {
335
+ "epoch": 0.47959183673469385,
336
+ "grad_norm": 9.667963981628418,
337
+ "learning_rate": 4.693877551020409e-06,
338
+ "loss": 0.0645,
339
+ "step": 47
340
+ },
341
+ {
342
+ "epoch": 0.4897959183673469,
343
+ "grad_norm": 957.79345703125,
344
+ "learning_rate": 4.795918367346939e-06,
345
+ "loss": 7.1283,
346
+ "step": 48
347
+ },
348
+ {
349
+ "epoch": 0.5,
350
+ "grad_norm": 160.0965118408203,
351
+ "learning_rate": 4.897959183673469e-06,
352
+ "loss": 0.711,
353
+ "step": 49
354
+ },
355
+ {
356
+ "epoch": 0.5102040816326531,
357
+ "grad_norm": 93.697265625,
358
+ "learning_rate": 5e-06,
359
+ "loss": 0.4716,
360
+ "step": 50
361
+ },
362
+ {
363
+ "epoch": 0.5204081632653061,
364
+ "grad_norm": 292.9518737792969,
365
+ "learning_rate": 5.1020408163265315e-06,
366
+ "loss": 2.2895,
367
+ "step": 51
368
+ },
369
+ {
370
+ "epoch": 0.5306122448979592,
371
+ "grad_norm": 335.4564514160156,
372
+ "learning_rate": 5.204081632653062e-06,
373
+ "loss": 1.9235,
374
+ "step": 52
375
+ },
376
+ {
377
+ "epoch": 0.5408163265306123,
378
+ "grad_norm": 138.63575744628906,
379
+ "learning_rate": 5.306122448979593e-06,
380
+ "loss": 0.8777,
381
+ "step": 53
382
+ },
383
+ {
384
+ "epoch": 0.5510204081632653,
385
+ "grad_norm": 1.011594533920288,
386
+ "learning_rate": 5.408163265306123e-06,
387
+ "loss": 0.0038,
388
+ "step": 54
389
+ },
390
+ {
391
+ "epoch": 0.5612244897959183,
392
+ "grad_norm": 506.25152587890625,
393
+ "learning_rate": 5.510204081632653e-06,
394
+ "loss": 1.5598,
395
+ "step": 55
396
+ },
397
+ {
398
+ "epoch": 0.5714285714285714,
399
+ "grad_norm": 2.2550530433654785,
400
+ "learning_rate": 5.6122448979591834e-06,
401
+ "loss": 0.0177,
402
+ "step": 56
403
+ },
404
+ {
405
+ "epoch": 0.5816326530612245,
406
+ "grad_norm": 13.93323802947998,
407
+ "learning_rate": 5.7142857142857145e-06,
408
+ "loss": 0.0837,
409
+ "step": 57
410
+ },
411
+ {
412
+ "epoch": 0.5918367346938775,
413
+ "grad_norm": 7.279649257659912,
414
+ "learning_rate": 5.816326530612246e-06,
415
+ "loss": 0.0429,
416
+ "step": 58
417
+ },
418
+ {
419
+ "epoch": 0.6020408163265306,
420
+ "grad_norm": 0.9923371076583862,
421
+ "learning_rate": 5.918367346938776e-06,
422
+ "loss": 0.0071,
423
+ "step": 59
424
+ },
425
+ {
426
+ "epoch": 0.6122448979591837,
427
+ "grad_norm": 743.8301391601562,
428
+ "learning_rate": 6.020408163265307e-06,
429
+ "loss": 2.7217,
430
+ "step": 60
431
+ },
432
+ {
433
+ "epoch": 0.6224489795918368,
434
+ "grad_norm": 227.04403686523438,
435
+ "learning_rate": 6.122448979591837e-06,
436
+ "loss": 3.9013,
437
+ "step": 61
438
+ },
439
+ {
440
+ "epoch": 0.6326530612244898,
441
+ "grad_norm": 193.12701416015625,
442
+ "learning_rate": 6.224489795918368e-06,
443
+ "loss": 1.417,
444
+ "step": 62
445
+ },
446
+ {
447
+ "epoch": 0.6428571428571429,
448
+ "grad_norm": 642.7814331054688,
449
+ "learning_rate": 6.326530612244899e-06,
450
+ "loss": 3.5854,
451
+ "step": 63
452
+ },
453
+ {
454
+ "epoch": 0.6530612244897959,
455
+ "grad_norm": 1007.544189453125,
456
+ "learning_rate": 6.4285714285714295e-06,
457
+ "loss": 12.918,
458
+ "step": 64
459
+ },
460
+ {
461
+ "epoch": 0.6632653061224489,
462
+ "grad_norm": 1310.942138671875,
463
+ "learning_rate": 6.530612244897959e-06,
464
+ "loss": 7.1566,
465
+ "step": 65
466
+ },
467
+ {
468
+ "epoch": 0.673469387755102,
469
+ "grad_norm": 810.1301879882812,
470
+ "learning_rate": 6.63265306122449e-06,
471
+ "loss": 3.9897,
472
+ "step": 66
473
+ },
474
+ {
475
+ "epoch": 0.6836734693877551,
476
+ "grad_norm": 513.1759643554688,
477
+ "learning_rate": 6.734693877551021e-06,
478
+ "loss": 8.1139,
479
+ "step": 67
480
+ },
481
+ {
482
+ "epoch": 0.6938775510204082,
483
+ "grad_norm": 1414.8878173828125,
484
+ "learning_rate": 6.836734693877551e-06,
485
+ "loss": 5.7005,
486
+ "step": 68
487
+ },
488
+ {
489
+ "epoch": 0.7040816326530612,
490
+ "grad_norm": 31.607126235961914,
491
+ "learning_rate": 6.938775510204082e-06,
492
+ "loss": 0.1219,
493
+ "step": 69
494
+ },
495
+ {
496
+ "epoch": 0.7142857142857143,
497
+ "grad_norm": 799.9751586914062,
498
+ "learning_rate": 7.0408163265306125e-06,
499
+ "loss": 5.7849,
500
+ "step": 70
501
+ },
502
+ {
503
+ "epoch": 0.7244897959183674,
504
+ "grad_norm": 132.71778869628906,
505
+ "learning_rate": 7.1428571428571436e-06,
506
+ "loss": 1.0726,
507
+ "step": 71
508
+ },
509
+ {
510
+ "epoch": 0.7346938775510204,
511
+ "grad_norm": 256.61041259765625,
512
+ "learning_rate": 7.244897959183675e-06,
513
+ "loss": 1.2599,
514
+ "step": 72
515
+ },
516
+ {
517
+ "epoch": 0.7448979591836735,
518
+ "grad_norm": 192.0435333251953,
519
+ "learning_rate": 7.346938775510205e-06,
520
+ "loss": 0.6473,
521
+ "step": 73
522
+ },
523
+ {
524
+ "epoch": 0.7551020408163265,
525
+ "grad_norm": 293.7915954589844,
526
+ "learning_rate": 7.448979591836736e-06,
527
+ "loss": 1.0397,
528
+ "step": 74
529
+ },
530
+ {
531
+ "epoch": 0.7653061224489796,
532
+ "grad_norm": 312.2645263671875,
533
+ "learning_rate": 7.551020408163265e-06,
534
+ "loss": 1.5555,
535
+ "step": 75
536
+ },
537
+ {
538
+ "epoch": 0.7755102040816326,
539
+ "grad_norm": 1.417815923690796,
540
+ "learning_rate": 7.653061224489796e-06,
541
+ "loss": 0.0078,
542
+ "step": 76
543
+ },
544
+ {
545
+ "epoch": 0.7857142857142857,
546
+ "grad_norm": 1.4391653537750244,
547
+ "learning_rate": 7.755102040816327e-06,
548
+ "loss": 0.0048,
549
+ "step": 77
550
+ },
551
+ {
552
+ "epoch": 0.7959183673469388,
553
+ "grad_norm": 5.628185749053955,
554
+ "learning_rate": 7.857142857142858e-06,
555
+ "loss": 0.0323,
556
+ "step": 78
557
+ },
558
+ {
559
+ "epoch": 0.8061224489795918,
560
+ "grad_norm": 264.5353698730469,
561
+ "learning_rate": 7.959183673469388e-06,
562
+ "loss": 1.7425,
563
+ "step": 79
564
+ },
565
+ {
566
+ "epoch": 0.8163265306122449,
567
+ "grad_norm": 1.5278851985931396,
568
+ "learning_rate": 8.06122448979592e-06,
569
+ "loss": 0.0035,
570
+ "step": 80
571
+ },
572
+ {
573
+ "epoch": 0.826530612244898,
574
+ "grad_norm": 932.3336181640625,
575
+ "learning_rate": 8.16326530612245e-06,
576
+ "loss": 6.4849,
577
+ "step": 81
578
+ },
579
+ {
580
+ "epoch": 0.8367346938775511,
581
+ "grad_norm": 635.4749145507812,
582
+ "learning_rate": 8.26530612244898e-06,
583
+ "loss": 4.3767,
584
+ "step": 82
585
+ },
586
+ {
587
+ "epoch": 0.8469387755102041,
588
+ "grad_norm": 8.875201225280762,
589
+ "learning_rate": 8.36734693877551e-06,
590
+ "loss": 0.0186,
591
+ "step": 83
592
+ },
593
+ {
594
+ "epoch": 0.8571428571428571,
595
+ "grad_norm": 0.15500876307487488,
596
+ "learning_rate": 8.469387755102042e-06,
597
+ "loss": 0.0008,
598
+ "step": 84
599
+ },
600
+ {
601
+ "epoch": 0.8673469387755102,
602
+ "grad_norm": 269.5357666015625,
603
+ "learning_rate": 8.571428571428571e-06,
604
+ "loss": 0.8354,
605
+ "step": 85
606
+ },
607
+ {
608
+ "epoch": 0.8775510204081632,
609
+ "grad_norm": 5.054287910461426,
610
+ "learning_rate": 8.673469387755103e-06,
611
+ "loss": 0.0162,
612
+ "step": 86
613
+ },
614
+ {
615
+ "epoch": 0.8877551020408163,
616
+ "grad_norm": 84.90735626220703,
617
+ "learning_rate": 8.775510204081633e-06,
618
+ "loss": 0.1282,
619
+ "step": 87
620
+ },
621
+ {
622
+ "epoch": 0.8979591836734694,
623
+ "grad_norm": 81.53719329833984,
624
+ "learning_rate": 8.877551020408163e-06,
625
+ "loss": 0.4514,
626
+ "step": 88
627
+ },
628
+ {
629
+ "epoch": 0.9081632653061225,
630
+ "grad_norm": 547.4005126953125,
631
+ "learning_rate": 8.979591836734695e-06,
632
+ "loss": 4.9103,
633
+ "step": 89
634
+ },
635
+ {
636
+ "epoch": 0.9183673469387755,
637
+ "grad_norm": 25.792213439941406,
638
+ "learning_rate": 9.081632653061225e-06,
639
+ "loss": 0.0762,
640
+ "step": 90
641
+ },
642
+ {
643
+ "epoch": 0.9285714285714286,
644
+ "grad_norm": 10.455421447753906,
645
+ "learning_rate": 9.183673469387756e-06,
646
+ "loss": 0.0444,
647
+ "step": 91
648
+ },
649
+ {
650
+ "epoch": 0.9387755102040817,
651
+ "grad_norm": 472.54376220703125,
652
+ "learning_rate": 9.285714285714288e-06,
653
+ "loss": 1.8609,
654
+ "step": 92
655
+ },
656
+ {
657
+ "epoch": 0.9489795918367347,
658
+ "grad_norm": 31.092357635498047,
659
+ "learning_rate": 9.387755102040818e-06,
660
+ "loss": 0.1489,
661
+ "step": 93
662
+ },
663
+ {
664
+ "epoch": 0.9591836734693877,
665
+ "grad_norm": 231.94151306152344,
666
+ "learning_rate": 9.489795918367348e-06,
667
+ "loss": 0.5926,
668
+ "step": 94
669
+ },
670
+ {
671
+ "epoch": 0.9693877551020408,
672
+ "grad_norm": 211.05117797851562,
673
+ "learning_rate": 9.591836734693878e-06,
674
+ "loss": 0.5344,
675
+ "step": 95
676
+ },
677
+ {
678
+ "epoch": 0.9795918367346939,
679
+ "grad_norm": 217.01339721679688,
680
+ "learning_rate": 9.693877551020408e-06,
681
+ "loss": 0.4693,
682
+ "step": 96
683
+ },
684
+ {
685
+ "epoch": 0.9897959183673469,
686
+ "grad_norm": 1123.96484375,
687
+ "learning_rate": 9.795918367346939e-06,
688
+ "loss": 9.2282,
689
+ "step": 97
690
+ },
691
+ {
692
+ "epoch": 1.0,
693
+ "grad_norm": 741.597412109375,
694
+ "learning_rate": 9.89795918367347e-06,
695
+ "loss": 4.6238,
696
+ "step": 98
697
+ },
698
+ {
699
+ "epoch": 1.0,
700
+ "eval_dim_1024_cosine_accuracy@1": 0.36235595390524966,
701
+ "eval_dim_1024_cosine_accuracy@10": 0.4334186939820743,
702
+ "eval_dim_1024_cosine_accuracy@3": 0.3681177976952625,
703
+ "eval_dim_1024_cosine_accuracy@5": 0.39308578745198464,
704
+ "eval_dim_1024_cosine_map@100": 0.45394800707643057,
705
+ "eval_dim_1024_cosine_mrr@10": 0.37430415828303115,
706
+ "eval_dim_1024_cosine_ndcg@10": 0.3858809020056271,
707
+ "eval_dim_1024_cosine_precision@1": 0.36235595390524966,
708
+ "eval_dim_1024_cosine_precision@10": 0.3176696542893726,
709
+ "eval_dim_1024_cosine_precision@3": 0.36192915066154496,
710
+ "eval_dim_1024_cosine_precision@5": 0.35172855313700385,
711
+ "eval_dim_1024_cosine_recall@1": 0.04346309464734114,
712
+ "eval_dim_1024_cosine_recall@10": 0.28096984500258326,
713
+ "eval_dim_1024_cosine_recall@3": 0.12757812796185336,
714
+ "eval_dim_1024_cosine_recall@5": 0.19200836801442767,
715
+ "eval_dim_128_cosine_accuracy@1": 0.3085787451984635,
716
+ "eval_dim_128_cosine_accuracy@10": 0.37964148527528807,
717
+ "eval_dim_128_cosine_accuracy@3": 0.31241997439180536,
718
+ "eval_dim_128_cosine_accuracy@5": 0.3361075544174136,
719
+ "eval_dim_128_cosine_map@100": 0.3963095303049961,
720
+ "eval_dim_128_cosine_mrr@10": 0.3199812511432227,
721
+ "eval_dim_128_cosine_ndcg@10": 0.3312285498294292,
722
+ "eval_dim_128_cosine_precision@1": 0.3085787451984635,
723
+ "eval_dim_128_cosine_precision@10": 0.2752880921895006,
724
+ "eval_dim_128_cosine_precision@3": 0.3079385403329065,
725
+ "eval_dim_128_cosine_precision@5": 0.29961587708066584,
726
+ "eval_dim_128_cosine_recall@1": 0.036297623853982414,
727
+ "eval_dim_128_cosine_recall@10": 0.24000960695821508,
728
+ "eval_dim_128_cosine_recall@3": 0.10638786483158841,
729
+ "eval_dim_128_cosine_recall@5": 0.16032639984514846,
730
+ "eval_dim_256_cosine_accuracy@1": 0.3437900128040973,
731
+ "eval_dim_256_cosine_accuracy@10": 0.41101152368758004,
732
+ "eval_dim_256_cosine_accuracy@3": 0.34763124199743917,
733
+ "eval_dim_256_cosine_accuracy@5": 0.3764404609475032,
734
+ "eval_dim_256_cosine_map@100": 0.4298669852983799,
735
+ "eval_dim_256_cosine_mrr@10": 0.3551361197487955,
736
+ "eval_dim_256_cosine_ndcg@10": 0.3670052960875804,
737
+ "eval_dim_256_cosine_precision@1": 0.3437900128040973,
738
+ "eval_dim_256_cosine_precision@10": 0.3040973111395647,
739
+ "eval_dim_256_cosine_precision@3": 0.342936406316688,
740
+ "eval_dim_256_cosine_precision@5": 0.33457106274007686,
741
+ "eval_dim_256_cosine_recall@1": 0.04013102608834382,
742
+ "eval_dim_256_cosine_recall@10": 0.2648598688529433,
743
+ "eval_dim_256_cosine_recall@3": 0.11771735023719074,
744
+ "eval_dim_256_cosine_recall@5": 0.17837935755014916,
745
+ "eval_dim_512_cosine_accuracy@1": 0.35979513444302175,
746
+ "eval_dim_512_cosine_accuracy@10": 0.4334186939820743,
747
+ "eval_dim_512_cosine_accuracy@3": 0.36555697823303457,
748
+ "eval_dim_512_cosine_accuracy@5": 0.3911651728553137,
749
+ "eval_dim_512_cosine_map@100": 0.4476805587612892,
750
+ "eval_dim_512_cosine_mrr@10": 0.37212542934373866,
751
+ "eval_dim_512_cosine_ndcg@10": 0.3843750966464458,
752
+ "eval_dim_512_cosine_precision@1": 0.35979513444302175,
753
+ "eval_dim_512_cosine_precision@10": 0.3173495518565941,
754
+ "eval_dim_512_cosine_precision@3": 0.35936833119931705,
755
+ "eval_dim_512_cosine_precision@5": 0.34967989756722156,
756
+ "eval_dim_512_cosine_recall@1": 0.04265405128130224,
757
+ "eval_dim_512_cosine_recall@10": 0.2781876565001863,
758
+ "eval_dim_512_cosine_recall@3": 0.12523102347193127,
759
+ "eval_dim_512_cosine_recall@5": 0.18912519336740205,
760
+ "eval_dim_64_cosine_accuracy@1": 0.2740076824583867,
761
+ "eval_dim_64_cosine_accuracy@10": 0.3354673495518566,
762
+ "eval_dim_64_cosine_accuracy@3": 0.27848911651728553,
763
+ "eval_dim_64_cosine_accuracy@5": 0.30153649167733676,
764
+ "eval_dim_64_cosine_map@100": 0.3539045084602349,
765
+ "eval_dim_64_cosine_mrr@10": 0.28429414873076814,
766
+ "eval_dim_64_cosine_ndcg@10": 0.29402896525927075,
767
+ "eval_dim_64_cosine_precision@1": 0.2740076824583867,
768
+ "eval_dim_64_cosine_precision@10": 0.24571062740076827,
769
+ "eval_dim_64_cosine_precision@3": 0.27315407597097735,
770
+ "eval_dim_64_cosine_precision@5": 0.2670934699103713,
771
+ "eval_dim_64_cosine_recall@1": 0.03167890172057568,
772
+ "eval_dim_64_cosine_recall@10": 0.21092883720941633,
773
+ "eval_dim_64_cosine_recall@3": 0.09267023360511464,
774
+ "eval_dim_64_cosine_recall@5": 0.14048625468314752,
775
+ "eval_dim_768_cosine_accuracy@1": 0.3591549295774648,
776
+ "eval_dim_768_cosine_accuracy@10": 0.4334186939820743,
777
+ "eval_dim_768_cosine_accuracy@3": 0.3649167733674776,
778
+ "eval_dim_768_cosine_accuracy@5": 0.3892445582586428,
779
+ "eval_dim_768_cosine_map@100": 0.4493001842217619,
780
+ "eval_dim_768_cosine_mrr@10": 0.37149335406377615,
781
+ "eval_dim_768_cosine_ndcg@10": 0.38308181752122755,
782
+ "eval_dim_768_cosine_precision@1": 0.3591549295774648,
783
+ "eval_dim_768_cosine_precision@10": 0.31670934699103714,
784
+ "eval_dim_768_cosine_precision@3": 0.3587281263337601,
785
+ "eval_dim_768_cosine_precision@5": 0.34852752880921894,
786
+ "eval_dim_768_cosine_recall@1": 0.04250079684114586,
787
+ "eval_dim_768_cosine_recall@10": 0.27695909667507057,
788
+ "eval_dim_768_cosine_recall@3": 0.12462187901616553,
789
+ "eval_dim_768_cosine_recall@5": 0.1875478484365334,
790
+ "eval_runtime": 99.0843,
791
+ "eval_samples_per_second": 0.0,
792
+ "eval_sequential_score": 0.29402896525927075,
793
+ "eval_steps_per_second": 0.0,
794
+ "step": 98
795
+ },
796
+ {
797
+ "epoch": 1.010204081632653,
798
+ "grad_norm": 342.861328125,
799
+ "learning_rate": 1e-05,
800
+ "loss": 1.9644,
801
+ "step": 99
802
+ },
803
+ {
804
+ "epoch": 1.0204081632653061,
805
+ "grad_norm": 761.8235473632812,
806
+ "learning_rate": 1.0102040816326531e-05,
807
+ "loss": 7.4242,
808
+ "step": 100
809
+ },
810
+ {
811
+ "epoch": 1.030612244897959,
812
+ "grad_norm": 146.39175415039062,
813
+ "learning_rate": 1.0204081632653063e-05,
814
+ "loss": 0.9592,
815
+ "step": 101
816
+ },
817
+ {
818
+ "epoch": 1.0408163265306123,
819
+ "grad_norm": 69.37447357177734,
820
+ "learning_rate": 1.0306122448979591e-05,
821
+ "loss": 0.3051,
822
+ "step": 102
823
+ },
824
+ {
825
+ "epoch": 1.0510204081632653,
826
+ "grad_norm": 241.93687438964844,
827
+ "learning_rate": 1.0408163265306123e-05,
828
+ "loss": 0.926,
829
+ "step": 103
830
+ },
831
+ {
832
+ "epoch": 1.0612244897959184,
833
+ "grad_norm": 13.75313949584961,
834
+ "learning_rate": 1.0510204081632654e-05,
835
+ "loss": 0.0751,
836
+ "step": 104
837
+ },
838
+ {
839
+ "epoch": 1.0714285714285714,
840
+ "grad_norm": 1.861573576927185,
841
+ "learning_rate": 1.0612244897959186e-05,
842
+ "loss": 0.0111,
843
+ "step": 105
844
+ },
845
+ {
846
+ "epoch": 1.0816326530612246,
847
+ "grad_norm": 1.4446377754211426,
848
+ "learning_rate": 1.0714285714285714e-05,
849
+ "loss": 0.0072,
850
+ "step": 106
851
+ },
852
+ {
853
+ "epoch": 1.0918367346938775,
854
+ "grad_norm": 2.217988967895508,
855
+ "learning_rate": 1.0816326530612246e-05,
856
+ "loss": 0.0107,
857
+ "step": 107
858
+ },
859
+ {
860
+ "epoch": 1.1020408163265305,
861
+ "grad_norm": 620.331787109375,
862
+ "learning_rate": 1.0918367346938776e-05,
863
+ "loss": 3.4505,
864
+ "step": 108
865
+ },
866
+ {
867
+ "epoch": 1.1122448979591837,
868
+ "grad_norm": 1.4038218259811401,
869
+ "learning_rate": 1.1020408163265306e-05,
870
+ "loss": 0.005,
871
+ "step": 109
872
+ },
873
+ {
874
+ "epoch": 1.1224489795918366,
875
+ "grad_norm": 46.48203659057617,
876
+ "learning_rate": 1.1122448979591838e-05,
877
+ "loss": 0.1701,
878
+ "step": 110
879
+ },
880
+ {
881
+ "epoch": 1.1326530612244898,
882
+ "grad_norm": 6.003911972045898,
883
+ "learning_rate": 1.1224489795918367e-05,
884
+ "loss": 0.027,
885
+ "step": 111
886
+ },
887
+ {
888
+ "epoch": 1.1428571428571428,
889
+ "grad_norm": 379.09527587890625,
890
+ "learning_rate": 1.1326530612244899e-05,
891
+ "loss": 1.3824,
892
+ "step": 112
893
+ },
894
+ {
895
+ "epoch": 1.153061224489796,
896
+ "grad_norm": 1103.1077880859375,
897
+ "learning_rate": 1.1428571428571429e-05,
898
+ "loss": 8.1459,
899
+ "step": 113
900
+ },
901
+ {
902
+ "epoch": 1.163265306122449,
903
+ "grad_norm": 29.499439239501953,
904
+ "learning_rate": 1.1530612244897961e-05,
905
+ "loss": 0.0917,
906
+ "step": 114
907
+ },
908
+ {
909
+ "epoch": 1.1734693877551021,
910
+ "grad_norm": 0.06352390348911285,
911
+ "learning_rate": 1.1632653061224491e-05,
912
+ "loss": 0.0003,
913
+ "step": 115
914
+ },
915
+ {
916
+ "epoch": 1.183673469387755,
917
+ "grad_norm": 111.55418395996094,
918
+ "learning_rate": 1.1734693877551021e-05,
919
+ "loss": 0.3716,
920
+ "step": 116
921
+ },
922
+ {
923
+ "epoch": 1.193877551020408,
924
+ "grad_norm": 32.166500091552734,
925
+ "learning_rate": 1.1836734693877552e-05,
926
+ "loss": 0.1704,
927
+ "step": 117
928
+ },
929
+ {
930
+ "epoch": 1.2040816326530612,
931
+ "grad_norm": 870.0745239257812,
932
+ "learning_rate": 1.1938775510204084e-05,
933
+ "loss": 9.8059,
934
+ "step": 118
935
+ },
936
+ {
937
+ "epoch": 1.2142857142857142,
938
+ "grad_norm": 139.17662048339844,
939
+ "learning_rate": 1.2040816326530614e-05,
940
+ "loss": 0.5882,
941
+ "step": 119
942
+ },
943
+ {
944
+ "epoch": 1.2244897959183674,
945
+ "grad_norm": 28.489713668823242,
946
+ "learning_rate": 1.2142857142857142e-05,
947
+ "loss": 0.0531,
948
+ "step": 120
949
+ },
950
+ {
951
+ "epoch": 1.2346938775510203,
952
+ "grad_norm": 0.18062859773635864,
953
+ "learning_rate": 1.2244897959183674e-05,
954
+ "loss": 0.0005,
955
+ "step": 121
956
+ },
957
+ {
958
+ "epoch": 1.2448979591836735,
959
+ "grad_norm": 8.26645565032959,
960
+ "learning_rate": 1.2346938775510204e-05,
961
+ "loss": 0.0314,
962
+ "step": 122
963
+ },
964
+ {
965
+ "epoch": 1.2551020408163265,
966
+ "grad_norm": 64.67955017089844,
967
+ "learning_rate": 1.2448979591836736e-05,
968
+ "loss": 0.1811,
969
+ "step": 123
970
+ },
971
+ {
972
+ "epoch": 1.2653061224489797,
973
+ "grad_norm": 420.44439697265625,
974
+ "learning_rate": 1.2551020408163267e-05,
975
+ "loss": 2.6136,
976
+ "step": 124
977
+ },
978
+ {
979
+ "epoch": 1.2755102040816326,
980
+ "grad_norm": 3.5323660373687744,
981
+ "learning_rate": 1.2653061224489798e-05,
982
+ "loss": 0.0087,
983
+ "step": 125
984
+ },
985
+ {
986
+ "epoch": 1.2857142857142856,
987
+ "grad_norm": 52.854801177978516,
988
+ "learning_rate": 1.2755102040816327e-05,
989
+ "loss": 0.1269,
990
+ "step": 126
991
+ },
992
+ {
993
+ "epoch": 1.2959183673469388,
994
+ "grad_norm": 4.583413124084473,
995
+ "learning_rate": 1.2857142857142859e-05,
996
+ "loss": 0.0091,
997
+ "step": 127
998
+ },
999
+ {
1000
+ "epoch": 1.306122448979592,
1001
+ "grad_norm": 17.20958137512207,
1002
+ "learning_rate": 1.2959183673469389e-05,
1003
+ "loss": 0.0467,
1004
+ "step": 128
1005
+ },
1006
+ {
1007
+ "epoch": 1.316326530612245,
1008
+ "grad_norm": 8.821357727050781,
1009
+ "learning_rate": 1.3061224489795918e-05,
1010
+ "loss": 0.0282,
1011
+ "step": 129
1012
+ },
1013
+ {
1014
+ "epoch": 1.3265306122448979,
1015
+ "grad_norm": 0.3024923503398895,
1016
+ "learning_rate": 1.316326530612245e-05,
1017
+ "loss": 0.0012,
1018
+ "step": 130
1019
+ },
1020
+ {
1021
+ "epoch": 1.336734693877551,
1022
+ "grad_norm": 1110.76513671875,
1023
+ "learning_rate": 1.326530612244898e-05,
1024
+ "loss": 3.5135,
1025
+ "step": 131
1026
+ },
1027
+ {
1028
+ "epoch": 1.346938775510204,
1029
+ "grad_norm": 4.655632495880127,
1030
+ "learning_rate": 1.3367346938775512e-05,
1031
+ "loss": 0.0186,
1032
+ "step": 132
1033
+ },
1034
+ {
1035
+ "epoch": 1.3571428571428572,
1036
+ "grad_norm": 641.764404296875,
1037
+ "learning_rate": 1.3469387755102042e-05,
1038
+ "loss": 3.2599,
1039
+ "step": 133
1040
+ },
1041
+ {
1042
+ "epoch": 1.3673469387755102,
1043
+ "grad_norm": 1076.8260498046875,
1044
+ "learning_rate": 1.3571428571428574e-05,
1045
+ "loss": 5.5417,
1046
+ "step": 134
1047
+ },
1048
+ {
1049
+ "epoch": 1.3775510204081631,
1050
+ "grad_norm": 0.5416738390922546,
1051
+ "learning_rate": 1.3673469387755102e-05,
1052
+ "loss": 0.0019,
1053
+ "step": 135
1054
+ },
1055
+ {
1056
+ "epoch": 1.3877551020408163,
1057
+ "grad_norm": 200.03311157226562,
1058
+ "learning_rate": 1.3775510204081634e-05,
1059
+ "loss": 0.5649,
1060
+ "step": 136
1061
+ },
1062
+ {
1063
+ "epoch": 1.3979591836734695,
1064
+ "grad_norm": 35.22038650512695,
1065
+ "learning_rate": 1.3877551020408165e-05,
1066
+ "loss": 0.084,
1067
+ "step": 137
1068
+ },
1069
+ {
1070
+ "epoch": 1.4081632653061225,
1071
+ "grad_norm": 141.9106903076172,
1072
+ "learning_rate": 1.3979591836734696e-05,
1073
+ "loss": 0.6062,
1074
+ "step": 138
1075
+ },
1076
+ {
1077
+ "epoch": 1.4183673469387754,
1078
+ "grad_norm": 15.920783996582031,
1079
+ "learning_rate": 1.4081632653061225e-05,
1080
+ "loss": 0.0639,
1081
+ "step": 139
1082
+ },
1083
+ {
1084
+ "epoch": 1.4285714285714286,
1085
+ "grad_norm": 206.33274841308594,
1086
+ "learning_rate": 1.4183673469387755e-05,
1087
+ "loss": 0.4069,
1088
+ "step": 140
1089
+ },
1090
+ {
1091
+ "epoch": 1.4387755102040816,
1092
+ "grad_norm": 51.149173736572266,
1093
+ "learning_rate": 1.4285714285714287e-05,
1094
+ "loss": 0.2462,
1095
+ "step": 141
1096
+ },
1097
+ {
1098
+ "epoch": 1.4489795918367347,
1099
+ "grad_norm": 658.653564453125,
1100
+ "learning_rate": 1.4387755102040817e-05,
1101
+ "loss": 4.9288,
1102
+ "step": 142
1103
+ },
1104
+ {
1105
+ "epoch": 1.4591836734693877,
1106
+ "grad_norm": 63.49065399169922,
1107
+ "learning_rate": 1.448979591836735e-05,
1108
+ "loss": 0.1852,
1109
+ "step": 143
1110
+ },
1111
+ {
1112
+ "epoch": 1.469387755102041,
1113
+ "grad_norm": 1453.699462890625,
1114
+ "learning_rate": 1.4591836734693878e-05,
1115
+ "loss": 3.0971,
1116
+ "step": 144
1117
+ },
1118
+ {
1119
+ "epoch": 1.4795918367346939,
1120
+ "grad_norm": 499.0628662109375,
1121
+ "learning_rate": 1.469387755102041e-05,
1122
+ "loss": 3.787,
1123
+ "step": 145
1124
+ },
1125
+ {
1126
+ "epoch": 1.489795918367347,
1127
+ "grad_norm": 253.33152770996094,
1128
+ "learning_rate": 1.479591836734694e-05,
1129
+ "loss": 0.8474,
1130
+ "step": 146
1131
+ },
1132
+ {
1133
+ "epoch": 1.5,
1134
+ "grad_norm": 0.8343175649642944,
1135
+ "learning_rate": 1.4897959183673472e-05,
1136
+ "loss": 0.0028,
1137
+ "step": 147
1138
+ },
1139
+ {
1140
+ "epoch": 1.510204081632653,
1141
+ "grad_norm": 38.5785026550293,
1142
+ "learning_rate": 1.5000000000000002e-05,
1143
+ "loss": 0.0931,
1144
+ "step": 148
1145
+ },
1146
+ {
1147
+ "epoch": 1.5204081632653061,
1148
+ "grad_norm": 563.4974365234375,
1149
+ "learning_rate": 1.510204081632653e-05,
1150
+ "loss": 1.8378,
1151
+ "step": 149
1152
+ },
1153
+ {
1154
+ "epoch": 1.5306122448979593,
1155
+ "grad_norm": 749.0945434570312,
1156
+ "learning_rate": 1.5204081632653063e-05,
1157
+ "loss": 2.6074,
1158
+ "step": 150
1159
+ },
1160
+ {
1161
+ "epoch": 1.5408163265306123,
1162
+ "grad_norm": 62.52786636352539,
1163
+ "learning_rate": 1.530612244897959e-05,
1164
+ "loss": 0.1441,
1165
+ "step": 151
1166
+ },
1167
+ {
1168
+ "epoch": 1.5510204081632653,
1169
+ "grad_norm": 281.54400634765625,
1170
+ "learning_rate": 1.5408163265306123e-05,
1171
+ "loss": 0.5622,
1172
+ "step": 152
1173
+ },
1174
+ {
1175
+ "epoch": 1.5612244897959182,
1176
+ "grad_norm": 1.1233166456222534,
1177
+ "learning_rate": 1.5510204081632655e-05,
1178
+ "loss": 0.0049,
1179
+ "step": 153
1180
+ },
1181
+ {
1182
+ "epoch": 1.5714285714285714,
1183
+ "grad_norm": 9.458003044128418,
1184
+ "learning_rate": 1.5612244897959187e-05,
1185
+ "loss": 0.0268,
1186
+ "step": 154
1187
+ },
1188
+ {
1189
+ "epoch": 1.5816326530612246,
1190
+ "grad_norm": 7.9042439460754395,
1191
+ "learning_rate": 1.5714285714285715e-05,
1192
+ "loss": 0.0281,
1193
+ "step": 155
1194
+ },
1195
+ {
1196
+ "epoch": 1.5918367346938775,
1197
+ "grad_norm": 402.8667907714844,
1198
+ "learning_rate": 1.5816326530612247e-05,
1199
+ "loss": 2.9755,
1200
+ "step": 156
1201
+ },
1202
+ {
1203
+ "epoch": 1.6020408163265305,
1204
+ "grad_norm": 359.3101806640625,
1205
+ "learning_rate": 1.5918367346938776e-05,
1206
+ "loss": 1.0982,
1207
+ "step": 157
1208
+ },
1209
+ {
1210
+ "epoch": 1.6122448979591837,
1211
+ "grad_norm": 26.466707229614258,
1212
+ "learning_rate": 1.6020408163265308e-05,
1213
+ "loss": 0.0621,
1214
+ "step": 158
1215
+ },
1216
+ {
1217
+ "epoch": 1.6224489795918369,
1218
+ "grad_norm": 472.1581726074219,
1219
+ "learning_rate": 1.612244897959184e-05,
1220
+ "loss": 6.9631,
1221
+ "step": 159
1222
+ },
1223
+ {
1224
+ "epoch": 1.6326530612244898,
1225
+ "grad_norm": 812.54638671875,
1226
+ "learning_rate": 1.6224489795918368e-05,
1227
+ "loss": 4.7216,
1228
+ "step": 160
1229
+ },
1230
+ {
1231
+ "epoch": 1.6428571428571428,
1232
+ "grad_norm": 252.12796020507812,
1233
+ "learning_rate": 1.63265306122449e-05,
1234
+ "loss": 0.848,
1235
+ "step": 161
1236
+ },
1237
+ {
1238
+ "epoch": 1.6530612244897958,
1239
+ "grad_norm": 1087.48828125,
1240
+ "learning_rate": 1.642857142857143e-05,
1241
+ "loss": 5.6006,
1242
+ "step": 162
1243
+ },
1244
+ {
1245
+ "epoch": 1.663265306122449,
1246
+ "grad_norm": 280.405517578125,
1247
+ "learning_rate": 1.653061224489796e-05,
1248
+ "loss": 4.299,
1249
+ "step": 163
1250
+ },
1251
+ {
1252
+ "epoch": 1.6734693877551021,
1253
+ "grad_norm": 457.81494140625,
1254
+ "learning_rate": 1.6632653061224492e-05,
1255
+ "loss": 2.042,
1256
+ "step": 164
1257
+ },
1258
+ {
1259
+ "epoch": 1.683673469387755,
1260
+ "grad_norm": 511.0380859375,
1261
+ "learning_rate": 1.673469387755102e-05,
1262
+ "loss": 2.4823,
1263
+ "step": 165
1264
+ },
1265
+ {
1266
+ "epoch": 1.693877551020408,
1267
+ "grad_norm": 7.505221366882324,
1268
+ "learning_rate": 1.6836734693877553e-05,
1269
+ "loss": 0.0189,
1270
+ "step": 166
1271
+ },
1272
+ {
1273
+ "epoch": 1.7040816326530612,
1274
+ "grad_norm": 1.01173734664917,
1275
+ "learning_rate": 1.6938775510204085e-05,
1276
+ "loss": 0.0039,
1277
+ "step": 167
1278
+ },
1279
+ {
1280
+ "epoch": 1.7142857142857144,
1281
+ "grad_norm": 0.5971992015838623,
1282
+ "learning_rate": 1.7040816326530613e-05,
1283
+ "loss": 0.0024,
1284
+ "step": 168
1285
+ },
1286
+ {
1287
+ "epoch": 1.7244897959183674,
1288
+ "grad_norm": 505.6401672363281,
1289
+ "learning_rate": 1.7142857142857142e-05,
1290
+ "loss": 2.0453,
1291
+ "step": 169
1292
+ },
1293
+ {
1294
+ "epoch": 1.7346938775510203,
1295
+ "grad_norm": 4.466002464294434,
1296
+ "learning_rate": 1.7244897959183674e-05,
1297
+ "loss": 0.0092,
1298
+ "step": 170
1299
+ },
1300
+ {
1301
+ "epoch": 1.7448979591836735,
1302
+ "grad_norm": 1.1195125579833984,
1303
+ "learning_rate": 1.7346938775510206e-05,
1304
+ "loss": 0.0029,
1305
+ "step": 171
1306
+ },
1307
+ {
1308
+ "epoch": 1.7551020408163265,
1309
+ "grad_norm": 104.82202911376953,
1310
+ "learning_rate": 1.7448979591836738e-05,
1311
+ "loss": 0.3271,
1312
+ "step": 172
1313
+ },
1314
+ {
1315
+ "epoch": 1.7653061224489797,
1316
+ "grad_norm": 1.860406756401062,
1317
+ "learning_rate": 1.7551020408163266e-05,
1318
+ "loss": 0.0054,
1319
+ "step": 173
1320
+ },
1321
+ {
1322
+ "epoch": 1.7755102040816326,
1323
+ "grad_norm": 0.044311508536338806,
1324
+ "learning_rate": 1.7653061224489798e-05,
1325
+ "loss": 0.0002,
1326
+ "step": 174
1327
+ },
1328
+ {
1329
+ "epoch": 1.7857142857142856,
1330
+ "grad_norm": 40.70656204223633,
1331
+ "learning_rate": 1.7755102040816327e-05,
1332
+ "loss": 0.0685,
1333
+ "step": 175
1334
+ },
1335
+ {
1336
+ "epoch": 1.7959183673469388,
1337
+ "grad_norm": 395.348388671875,
1338
+ "learning_rate": 1.785714285714286e-05,
1339
+ "loss": 1.3097,
1340
+ "step": 176
1341
+ },
1342
+ {
1343
+ "epoch": 1.806122448979592,
1344
+ "grad_norm": 326.2778015136719,
1345
+ "learning_rate": 1.795918367346939e-05,
1346
+ "loss": 1.8817,
1347
+ "step": 177
1348
+ },
1349
+ {
1350
+ "epoch": 1.816326530612245,
1351
+ "grad_norm": 41.05072784423828,
1352
+ "learning_rate": 1.806122448979592e-05,
1353
+ "loss": 0.2497,
1354
+ "step": 178
1355
+ },
1356
+ {
1357
+ "epoch": 1.8265306122448979,
1358
+ "grad_norm": 121.29589080810547,
1359
+ "learning_rate": 1.816326530612245e-05,
1360
+ "loss": 0.5822,
1361
+ "step": 179
1362
+ },
1363
+ {
1364
+ "epoch": 1.836734693877551,
1365
+ "grad_norm": 711.2618408203125,
1366
+ "learning_rate": 1.826530612244898e-05,
1367
+ "loss": 1.8103,
1368
+ "step": 180
1369
+ },
1370
+ {
1371
+ "epoch": 1.8469387755102042,
1372
+ "grad_norm": 500.7347106933594,
1373
+ "learning_rate": 1.836734693877551e-05,
1374
+ "loss": 1.5506,
1375
+ "step": 181
1376
+ },
1377
+ {
1378
+ "epoch": 1.8571428571428572,
1379
+ "grad_norm": 252.05322265625,
1380
+ "learning_rate": 1.8469387755102043e-05,
1381
+ "loss": 1.281,
1382
+ "step": 182
1383
+ },
1384
+ {
1385
+ "epoch": 1.8673469387755102,
1386
+ "grad_norm": 370.9935302734375,
1387
+ "learning_rate": 1.8571428571428575e-05,
1388
+ "loss": 2.8616,
1389
+ "step": 183
1390
+ },
1391
+ {
1392
+ "epoch": 1.8775510204081631,
1393
+ "grad_norm": 4.682647705078125,
1394
+ "learning_rate": 1.8673469387755104e-05,
1395
+ "loss": 0.0118,
1396
+ "step": 184
1397
+ },
1398
+ {
1399
+ "epoch": 1.8877551020408163,
1400
+ "grad_norm": 2.143557548522949,
1401
+ "learning_rate": 1.8775510204081636e-05,
1402
+ "loss": 0.0038,
1403
+ "step": 185
1404
+ },
1405
+ {
1406
+ "epoch": 1.8979591836734695,
1407
+ "grad_norm": 6.499508857727051,
1408
+ "learning_rate": 1.8877551020408164e-05,
1409
+ "loss": 0.0331,
1410
+ "step": 186
1411
+ },
1412
+ {
1413
+ "epoch": 1.9081632653061225,
1414
+ "grad_norm": 7.2162089347839355,
1415
+ "learning_rate": 1.8979591836734696e-05,
1416
+ "loss": 0.0273,
1417
+ "step": 187
1418
+ },
1419
+ {
1420
+ "epoch": 1.9183673469387754,
1421
+ "grad_norm": 23.073841094970703,
1422
+ "learning_rate": 1.9081632653061225e-05,
1423
+ "loss": 0.1026,
1424
+ "step": 188
1425
+ },
1426
+ {
1427
+ "epoch": 1.9285714285714286,
1428
+ "grad_norm": 48.74525833129883,
1429
+ "learning_rate": 1.9183673469387756e-05,
1430
+ "loss": 0.1942,
1431
+ "step": 189
1432
+ },
1433
+ {
1434
+ "epoch": 1.9387755102040818,
1435
+ "grad_norm": 384.64678955078125,
1436
+ "learning_rate": 1.928571428571429e-05,
1437
+ "loss": 3.4886,
1438
+ "step": 190
1439
+ },
1440
+ {
1441
+ "epoch": 1.9489795918367347,
1442
+ "grad_norm": 103.53422546386719,
1443
+ "learning_rate": 1.9387755102040817e-05,
1444
+ "loss": 0.628,
1445
+ "step": 191
1446
+ },
1447
+ {
1448
+ "epoch": 1.9591836734693877,
1449
+ "grad_norm": 42.5008544921875,
1450
+ "learning_rate": 1.948979591836735e-05,
1451
+ "loss": 0.1967,
1452
+ "step": 192
1453
+ },
1454
+ {
1455
+ "epoch": 1.9693877551020407,
1456
+ "grad_norm": 145.1553955078125,
1457
+ "learning_rate": 1.9591836734693877e-05,
1458
+ "loss": 3.9822,
1459
+ "step": 193
1460
+ },
1461
+ {
1462
+ "epoch": 1.9795918367346939,
1463
+ "grad_norm": 0.07428821176290512,
1464
+ "learning_rate": 1.969387755102041e-05,
1465
+ "loss": 0.0003,
1466
+ "step": 194
1467
+ },
1468
+ {
1469
+ "epoch": 1.989795918367347,
1470
+ "grad_norm": 545.6088256835938,
1471
+ "learning_rate": 1.979591836734694e-05,
1472
+ "loss": 3.7309,
1473
+ "step": 195
1474
+ },
1475
+ {
1476
+ "epoch": 2.0,
1477
+ "grad_norm": 0.5490627288818359,
1478
+ "learning_rate": 1.9897959183673473e-05,
1479
+ "loss": 0.0024,
1480
+ "step": 196
1481
+ },
1482
+ {
1483
+ "epoch": 2.0,
1484
+ "eval_dim_1024_cosine_accuracy@1": 0.32522407170294493,
1485
+ "eval_dim_1024_cosine_accuracy@10": 0.3969270166453265,
1486
+ "eval_dim_1024_cosine_accuracy@3": 0.33290653008962867,
1487
+ "eval_dim_1024_cosine_accuracy@5": 0.36043533930857874,
1488
+ "eval_dim_1024_cosine_map@100": 0.4164888021641558,
1489
+ "eval_dim_1024_cosine_mrr@10": 0.33769460195516493,
1490
+ "eval_dim_1024_cosine_ndcg@10": 0.34986350069216465,
1491
+ "eval_dim_1024_cosine_precision@1": 0.32522407170294493,
1492
+ "eval_dim_1024_cosine_precision@10": 0.28361075544174136,
1493
+ "eval_dim_1024_cosine_precision@3": 0.3254374733247973,
1494
+ "eval_dim_1024_cosine_precision@5": 0.31626120358514725,
1495
+ "eval_dim_1024_cosine_recall@1": 0.04113491331982186,
1496
+ "eval_dim_1024_cosine_recall@10": 0.2664549051060991,
1497
+ "eval_dim_1024_cosine_recall@3": 0.12080229545561262,
1498
+ "eval_dim_1024_cosine_recall@5": 0.18183789253196145,
1499
+ "eval_dim_128_cosine_accuracy@1": 0.30217669654289375,
1500
+ "eval_dim_128_cosine_accuracy@10": 0.3546734955185659,
1501
+ "eval_dim_128_cosine_accuracy@3": 0.3072983354673495,
1502
+ "eval_dim_128_cosine_accuracy@5": 0.3265044814340589,
1503
+ "eval_dim_128_cosine_map@100": 0.38014172959059034,
1504
+ "eval_dim_128_cosine_mrr@10": 0.3112729406743488,
1505
+ "eval_dim_128_cosine_ndcg@10": 0.32071443787836906,
1506
+ "eval_dim_128_cosine_precision@1": 0.30217669654289375,
1507
+ "eval_dim_128_cosine_precision@10": 0.26312419974391804,
1508
+ "eval_dim_128_cosine_precision@3": 0.30239009816474605,
1509
+ "eval_dim_128_cosine_precision@5": 0.29359795134443023,
1510
+ "eval_dim_128_cosine_recall@1": 0.03603846894598867,
1511
+ "eval_dim_128_cosine_recall@10": 0.23664446759855584,
1512
+ "eval_dim_128_cosine_recall@3": 0.10607255532328354,
1513
+ "eval_dim_128_cosine_recall@5": 0.15998840334482403,
1514
+ "eval_dim_256_cosine_accuracy@1": 0.31049935979513443,
1515
+ "eval_dim_256_cosine_accuracy@10": 0.3725992317541613,
1516
+ "eval_dim_256_cosine_accuracy@3": 0.31882202304737517,
1517
+ "eval_dim_256_cosine_accuracy@5": 0.34571062740076824,
1518
+ "eval_dim_256_cosine_map@100": 0.3940538127924734,
1519
+ "eval_dim_256_cosine_mrr@10": 0.3219094872263883,
1520
+ "eval_dim_256_cosine_ndcg@10": 0.33365785011470184,
1521
+ "eval_dim_256_cosine_precision@1": 0.31049935979513443,
1522
+ "eval_dim_256_cosine_precision@10": 0.2727272727272727,
1523
+ "eval_dim_256_cosine_precision@3": 0.3109261630388391,
1524
+ "eval_dim_256_cosine_precision@5": 0.3035851472471191,
1525
+ "eval_dim_256_cosine_recall@1": 0.0379038673811849,
1526
+ "eval_dim_256_cosine_recall@10": 0.25061548215235363,
1527
+ "eval_dim_256_cosine_recall@3": 0.11184662439829526,
1528
+ "eval_dim_256_cosine_recall@5": 0.16972372403865282,
1529
+ "eval_dim_512_cosine_accuracy@1": 0.32842509603072984,
1530
+ "eval_dim_512_cosine_accuracy@10": 0.39564660691421255,
1531
+ "eval_dim_512_cosine_accuracy@3": 0.33418693982074266,
1532
+ "eval_dim_512_cosine_accuracy@5": 0.36555697823303457,
1533
+ "eval_dim_512_cosine_map@100": 0.4125328284000196,
1534
+ "eval_dim_512_cosine_mrr@10": 0.34027168058858154,
1535
+ "eval_dim_512_cosine_ndcg@10": 0.3525488928748249,
1536
+ "eval_dim_512_cosine_precision@1": 0.32842509603072984,
1537
+ "eval_dim_512_cosine_precision@10": 0.28693982074263763,
1538
+ "eval_dim_512_cosine_precision@3": 0.3282116944088775,
1539
+ "eval_dim_512_cosine_precision@5": 0.31997439180537773,
1540
+ "eval_dim_512_cosine_recall@1": 0.04071091183465321,
1541
+ "eval_dim_512_cosine_recall@10": 0.2638449444559509,
1542
+ "eval_dim_512_cosine_recall@3": 0.11970757850133786,
1543
+ "eval_dim_512_cosine_recall@5": 0.1806811237454132,
1544
+ "eval_dim_64_cosine_accuracy@1": 0.28040973111395645,
1545
+ "eval_dim_64_cosine_accuracy@10": 0.3348271446862996,
1546
+ "eval_dim_64_cosine_accuracy@3": 0.28297055057618437,
1547
+ "eval_dim_64_cosine_accuracy@5": 0.3072983354673495,
1548
+ "eval_dim_64_cosine_map@100": 0.35085623648833997,
1549
+ "eval_dim_64_cosine_mrr@10": 0.28944678170030247,
1550
+ "eval_dim_64_cosine_ndcg@10": 0.2991224720529457,
1551
+ "eval_dim_64_cosine_precision@1": 0.28040973111395645,
1552
+ "eval_dim_64_cosine_precision@10": 0.24878361075544175,
1553
+ "eval_dim_64_cosine_precision@3": 0.27955612462654716,
1554
+ "eval_dim_64_cosine_precision@5": 0.27247119078105,
1555
+ "eval_dim_64_cosine_recall@1": 0.03187808455878807,
1556
+ "eval_dim_64_cosine_recall@10": 0.2128007008801171,
1557
+ "eval_dim_64_cosine_recall@3": 0.09363361347149868,
1558
+ "eval_dim_64_cosine_recall@5": 0.14192536615474802,
1559
+ "eval_dim_768_cosine_accuracy@1": 0.32970550576184376,
1560
+ "eval_dim_768_cosine_accuracy@10": 0.3994878361075544,
1561
+ "eval_dim_768_cosine_accuracy@3": 0.33418693982074266,
1562
+ "eval_dim_768_cosine_accuracy@5": 0.36427656850192064,
1563
+ "eval_dim_768_cosine_map@100": 0.4160652625925415,
1564
+ "eval_dim_768_cosine_mrr@10": 0.3415124585899229,
1565
+ "eval_dim_768_cosine_ndcg@10": 0.35370573856938964,
1566
+ "eval_dim_768_cosine_precision@1": 0.32970550576184376,
1567
+ "eval_dim_768_cosine_precision@10": 0.2877720870678617,
1568
+ "eval_dim_768_cosine_precision@3": 0.3288518992744345,
1569
+ "eval_dim_768_cosine_precision@5": 0.31997439180537773,
1570
+ "eval_dim_768_cosine_recall@1": 0.040955758827011135,
1571
+ "eval_dim_768_cosine_recall@10": 0.26685683005601735,
1572
+ "eval_dim_768_cosine_recall@3": 0.12009305539695316,
1573
+ "eval_dim_768_cosine_recall@5": 0.18142212378067016,
1574
+ "eval_runtime": 99.167,
1575
+ "eval_samples_per_second": 0.0,
1576
+ "eval_sequential_score": 0.2991224720529457,
1577
+ "eval_steps_per_second": 0.0,
1578
+ "step": 196
1579
+ }
1580
+ ],
1581
+ "logging_steps": 1,
1582
+ "max_steps": 1960,
1583
+ "num_input_tokens_seen": 0,
1584
+ "num_train_epochs": 20,
1585
+ "save_steps": 500,
1586
+ "stateful_callbacks": {
1587
+ "EarlyStoppingCallback": {
1588
+ "args": {
1589
+ "early_stopping_patience": 2,
1590
+ "early_stopping_threshold": 0.0
1591
+ },
1592
+ "attributes": {
1593
+ "early_stopping_patience_counter": 1
1594
+ }
1595
+ },
1596
+ "TrainerControl": {
1597
+ "args": {
1598
+ "should_epoch_stop": false,
1599
+ "should_evaluate": false,
1600
+ "should_log": false,
1601
+ "should_save": true,
1602
+ "should_training_stop": false
1603
+ },
1604
+ "attributes": {}
1605
+ }
1606
+ },
1607
+ "total_flos": 0.0,
1608
+ "train_batch_size": 2,
1609
+ "trial_name": null,
1610
+ "trial_params": null
1611
+ }
checkpoint-196/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:739b4a0a62fdf782034d2ababe5e5ea588023ed6263f2604e31385fc77a8faab
3
+ size 6097
checkpoint-294/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 1024,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-294/README.md ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-294/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "XLMRobertaModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 4096,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 514,
16
+ "model_type": "xlm-roberta",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 24,
19
+ "output_past": true,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.51.3",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 250002
27
+ }
checkpoint-294/config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "SentenceTransformer",
3
+ "__version__": {
4
+ "sentence_transformers": "5.1.0",
5
+ "transformers": "4.51.3",
6
+ "pytorch": "2.8.0+cu126"
7
+ },
8
+ "prompts": {
9
+ "query": "",
10
+ "document": ""
11
+ },
12
+ "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
14
+ }
checkpoint-294/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43c4fc720dc2e3977a3cc3e3497ee115851c08f432984ab97bfaf724a6b3d666
3
+ size 2239607176
checkpoint-294/modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
checkpoint-294/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0ddd633ecdd2f1e6c3de318b7ceb44fe2f43af2a873c9089a62cd07aaeb5c74
3
+ size 4471067142
checkpoint-294/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bdc8f1e2d846953d00ba606f4cf92976f5653cd22fea2aacf347840fdb304ea
3
+ size 14645
checkpoint-294/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91b1f870feb8cc60c3e97ad9856efed23cb494b46c65f244929c7c13ceca58cc
3
+ size 1465
checkpoint-294/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
checkpoint-294/sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
checkpoint-294/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
checkpoint-294/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:883b037111086fd4dfebbbc9b7cee11e1517b5e0c0514879478661440f137085
3
+ size 17082987
checkpoint-294/tokenizer_config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "extra_special_tokens": {},
49
+ "mask_token": "<mask>",
50
+ "max_length": 512,
51
+ "model_max_length": 512,
52
+ "pad_to_multiple_of": null,
53
+ "pad_token": "<pad>",
54
+ "pad_token_type_id": 0,
55
+ "padding_side": "right",
56
+ "sep_token": "</s>",
57
+ "stride": 0,
58
+ "tokenizer_class": "XLMRobertaTokenizer",
59
+ "truncation_side": "right",
60
+ "truncation_strategy": "longest_first",
61
+ "unk_token": "<unk>"
62
+ }
checkpoint-294/trainer_state.json ADDED
@@ -0,0 +1,2395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 98,
3
+ "best_metric": 0.3312285498294292,
4
+ "best_model_checkpoint": "intfloat/multilingual-e5-large/checkpoint-98",
5
+ "epoch": 3.0,
6
+ "eval_steps": 500,
7
+ "global_step": 294,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.01020408163265306,
14
+ "grad_norm": 973.273681640625,
15
+ "learning_rate": 0.0,
16
+ "loss": 15.8588,
17
+ "step": 1
18
+ },
19
+ {
20
+ "epoch": 0.02040816326530612,
21
+ "grad_norm": 1016.8517456054688,
22
+ "learning_rate": 1.0204081632653061e-07,
23
+ "loss": 10.7411,
24
+ "step": 2
25
+ },
26
+ {
27
+ "epoch": 0.030612244897959183,
28
+ "grad_norm": 166.88465881347656,
29
+ "learning_rate": 2.0408163265306121e-07,
30
+ "loss": 1.3873,
31
+ "step": 3
32
+ },
33
+ {
34
+ "epoch": 0.04081632653061224,
35
+ "grad_norm": 108.06741333007812,
36
+ "learning_rate": 3.0612244897959183e-07,
37
+ "loss": 0.9088,
38
+ "step": 4
39
+ },
40
+ {
41
+ "epoch": 0.05102040816326531,
42
+ "grad_norm": 1.1959134340286255,
43
+ "learning_rate": 4.0816326530612243e-07,
44
+ "loss": 0.0077,
45
+ "step": 5
46
+ },
47
+ {
48
+ "epoch": 0.061224489795918366,
49
+ "grad_norm": 130.83908081054688,
50
+ "learning_rate": 5.102040816326531e-07,
51
+ "loss": 0.6016,
52
+ "step": 6
53
+ },
54
+ {
55
+ "epoch": 0.07142857142857142,
56
+ "grad_norm": 318.3863525390625,
57
+ "learning_rate": 6.122448979591837e-07,
58
+ "loss": 1.6714,
59
+ "step": 7
60
+ },
61
+ {
62
+ "epoch": 0.08163265306122448,
63
+ "grad_norm": 74.26002502441406,
64
+ "learning_rate": 7.142857142857143e-07,
65
+ "loss": 0.4211,
66
+ "step": 8
67
+ },
68
+ {
69
+ "epoch": 0.09183673469387756,
70
+ "grad_norm": 32.4500846862793,
71
+ "learning_rate": 8.163265306122449e-07,
72
+ "loss": 0.1996,
73
+ "step": 9
74
+ },
75
+ {
76
+ "epoch": 0.10204081632653061,
77
+ "grad_norm": 41.27345275878906,
78
+ "learning_rate": 9.183673469387756e-07,
79
+ "loss": 0.1895,
80
+ "step": 10
81
+ },
82
+ {
83
+ "epoch": 0.11224489795918367,
84
+ "grad_norm": 27.35291862487793,
85
+ "learning_rate": 1.0204081632653063e-06,
86
+ "loss": 0.1358,
87
+ "step": 11
88
+ },
89
+ {
90
+ "epoch": 0.12244897959183673,
91
+ "grad_norm": 103.75244903564453,
92
+ "learning_rate": 1.122448979591837e-06,
93
+ "loss": 0.5552,
94
+ "step": 12
95
+ },
96
+ {
97
+ "epoch": 0.1326530612244898,
98
+ "grad_norm": 155.97923278808594,
99
+ "learning_rate": 1.2244897959183673e-06,
100
+ "loss": 0.5141,
101
+ "step": 13
102
+ },
103
+ {
104
+ "epoch": 0.14285714285714285,
105
+ "grad_norm": 53.757484436035156,
106
+ "learning_rate": 1.3265306122448982e-06,
107
+ "loss": 0.1955,
108
+ "step": 14
109
+ },
110
+ {
111
+ "epoch": 0.15306122448979592,
112
+ "grad_norm": 175.17491149902344,
113
+ "learning_rate": 1.4285714285714286e-06,
114
+ "loss": 1.9114,
115
+ "step": 15
116
+ },
117
+ {
118
+ "epoch": 0.16326530612244897,
119
+ "grad_norm": 49.02252197265625,
120
+ "learning_rate": 1.5306122448979593e-06,
121
+ "loss": 0.2645,
122
+ "step": 16
123
+ },
124
+ {
125
+ "epoch": 0.17346938775510204,
126
+ "grad_norm": 999.3756103515625,
127
+ "learning_rate": 1.6326530612244897e-06,
128
+ "loss": 7.5545,
129
+ "step": 17
130
+ },
131
+ {
132
+ "epoch": 0.1836734693877551,
133
+ "grad_norm": 149.2627410888672,
134
+ "learning_rate": 1.7346938775510206e-06,
135
+ "loss": 0.4297,
136
+ "step": 18
137
+ },
138
+ {
139
+ "epoch": 0.19387755102040816,
140
+ "grad_norm": 204.95181274414062,
141
+ "learning_rate": 1.8367346938775512e-06,
142
+ "loss": 0.678,
143
+ "step": 19
144
+ },
145
+ {
146
+ "epoch": 0.20408163265306123,
147
+ "grad_norm": 103.94851684570312,
148
+ "learning_rate": 1.938775510204082e-06,
149
+ "loss": 0.4634,
150
+ "step": 20
151
+ },
152
+ {
153
+ "epoch": 0.21428571428571427,
154
+ "grad_norm": 536.7100219726562,
155
+ "learning_rate": 2.0408163265306125e-06,
156
+ "loss": 4.2252,
157
+ "step": 21
158
+ },
159
+ {
160
+ "epoch": 0.22448979591836735,
161
+ "grad_norm": 444.44805908203125,
162
+ "learning_rate": 2.1428571428571427e-06,
163
+ "loss": 3.9985,
164
+ "step": 22
165
+ },
166
+ {
167
+ "epoch": 0.23469387755102042,
168
+ "grad_norm": 170.50369262695312,
169
+ "learning_rate": 2.244897959183674e-06,
170
+ "loss": 1.9242,
171
+ "step": 23
172
+ },
173
+ {
174
+ "epoch": 0.24489795918367346,
175
+ "grad_norm": 626.5487060546875,
176
+ "learning_rate": 2.3469387755102044e-06,
177
+ "loss": 3.2716,
178
+ "step": 24
179
+ },
180
+ {
181
+ "epoch": 0.25510204081632654,
182
+ "grad_norm": 51.353050231933594,
183
+ "learning_rate": 2.4489795918367347e-06,
184
+ "loss": 0.123,
185
+ "step": 25
186
+ },
187
+ {
188
+ "epoch": 0.2653061224489796,
189
+ "grad_norm": 108.25341796875,
190
+ "learning_rate": 2.5510204081632657e-06,
191
+ "loss": 1.0011,
192
+ "step": 26
193
+ },
194
+ {
195
+ "epoch": 0.2755102040816326,
196
+ "grad_norm": 322.83502197265625,
197
+ "learning_rate": 2.6530612244897964e-06,
198
+ "loss": 3.5846,
199
+ "step": 27
200
+ },
201
+ {
202
+ "epoch": 0.2857142857142857,
203
+ "grad_norm": 203.38458251953125,
204
+ "learning_rate": 2.7551020408163266e-06,
205
+ "loss": 1.1365,
206
+ "step": 28
207
+ },
208
+ {
209
+ "epoch": 0.29591836734693877,
210
+ "grad_norm": 127.78427124023438,
211
+ "learning_rate": 2.8571428571428573e-06,
212
+ "loss": 0.7149,
213
+ "step": 29
214
+ },
215
+ {
216
+ "epoch": 0.30612244897959184,
217
+ "grad_norm": 283.67645263671875,
218
+ "learning_rate": 2.959183673469388e-06,
219
+ "loss": 1.2629,
220
+ "step": 30
221
+ },
222
+ {
223
+ "epoch": 0.3163265306122449,
224
+ "grad_norm": 82.65542602539062,
225
+ "learning_rate": 3.0612244897959185e-06,
226
+ "loss": 0.6459,
227
+ "step": 31
228
+ },
229
+ {
230
+ "epoch": 0.32653061224489793,
231
+ "grad_norm": 42.66185760498047,
232
+ "learning_rate": 3.1632653061224496e-06,
233
+ "loss": 0.1934,
234
+ "step": 32
235
+ },
236
+ {
237
+ "epoch": 0.336734693877551,
238
+ "grad_norm": 212.1294708251953,
239
+ "learning_rate": 3.2653061224489794e-06,
240
+ "loss": 1.4897,
241
+ "step": 33
242
+ },
243
+ {
244
+ "epoch": 0.3469387755102041,
245
+ "grad_norm": 188.0417022705078,
246
+ "learning_rate": 3.3673469387755105e-06,
247
+ "loss": 0.8561,
248
+ "step": 34
249
+ },
250
+ {
251
+ "epoch": 0.35714285714285715,
252
+ "grad_norm": 2.0467610359191895,
253
+ "learning_rate": 3.469387755102041e-06,
254
+ "loss": 0.0128,
255
+ "step": 35
256
+ },
257
+ {
258
+ "epoch": 0.3673469387755102,
259
+ "grad_norm": 283.3966979980469,
260
+ "learning_rate": 3.5714285714285718e-06,
261
+ "loss": 1.4952,
262
+ "step": 36
263
+ },
264
+ {
265
+ "epoch": 0.37755102040816324,
266
+ "grad_norm": 60.74869155883789,
267
+ "learning_rate": 3.6734693877551024e-06,
268
+ "loss": 0.3181,
269
+ "step": 37
270
+ },
271
+ {
272
+ "epoch": 0.3877551020408163,
273
+ "grad_norm": 824.6165771484375,
274
+ "learning_rate": 3.7755102040816327e-06,
275
+ "loss": 6.3681,
276
+ "step": 38
277
+ },
278
+ {
279
+ "epoch": 0.3979591836734694,
280
+ "grad_norm": 231.1636962890625,
281
+ "learning_rate": 3.877551020408164e-06,
282
+ "loss": 1.4487,
283
+ "step": 39
284
+ },
285
+ {
286
+ "epoch": 0.40816326530612246,
287
+ "grad_norm": 26.46611785888672,
288
+ "learning_rate": 3.979591836734694e-06,
289
+ "loss": 0.1702,
290
+ "step": 40
291
+ },
292
+ {
293
+ "epoch": 0.41836734693877553,
294
+ "grad_norm": 75.88525390625,
295
+ "learning_rate": 4.081632653061225e-06,
296
+ "loss": 0.2513,
297
+ "step": 41
298
+ },
299
+ {
300
+ "epoch": 0.42857142857142855,
301
+ "grad_norm": 465.83392333984375,
302
+ "learning_rate": 4.183673469387755e-06,
303
+ "loss": 4.1595,
304
+ "step": 42
305
+ },
306
+ {
307
+ "epoch": 0.4387755102040816,
308
+ "grad_norm": 306.2772521972656,
309
+ "learning_rate": 4.2857142857142855e-06,
310
+ "loss": 2.7347,
311
+ "step": 43
312
+ },
313
+ {
314
+ "epoch": 0.4489795918367347,
315
+ "grad_norm": 488.9759521484375,
316
+ "learning_rate": 4.3877551020408165e-06,
317
+ "loss": 2.3182,
318
+ "step": 44
319
+ },
320
+ {
321
+ "epoch": 0.45918367346938777,
322
+ "grad_norm": 355.1698913574219,
323
+ "learning_rate": 4.489795918367348e-06,
324
+ "loss": 1.3285,
325
+ "step": 45
326
+ },
327
+ {
328
+ "epoch": 0.46938775510204084,
329
+ "grad_norm": 263.558349609375,
330
+ "learning_rate": 4.591836734693878e-06,
331
+ "loss": 2.1155,
332
+ "step": 46
333
+ },
334
+ {
335
+ "epoch": 0.47959183673469385,
336
+ "grad_norm": 9.667963981628418,
337
+ "learning_rate": 4.693877551020409e-06,
338
+ "loss": 0.0645,
339
+ "step": 47
340
+ },
341
+ {
342
+ "epoch": 0.4897959183673469,
343
+ "grad_norm": 957.79345703125,
344
+ "learning_rate": 4.795918367346939e-06,
345
+ "loss": 7.1283,
346
+ "step": 48
347
+ },
348
+ {
349
+ "epoch": 0.5,
350
+ "grad_norm": 160.0965118408203,
351
+ "learning_rate": 4.897959183673469e-06,
352
+ "loss": 0.711,
353
+ "step": 49
354
+ },
355
+ {
356
+ "epoch": 0.5102040816326531,
357
+ "grad_norm": 93.697265625,
358
+ "learning_rate": 5e-06,
359
+ "loss": 0.4716,
360
+ "step": 50
361
+ },
362
+ {
363
+ "epoch": 0.5204081632653061,
364
+ "grad_norm": 292.9518737792969,
365
+ "learning_rate": 5.1020408163265315e-06,
366
+ "loss": 2.2895,
367
+ "step": 51
368
+ },
369
+ {
370
+ "epoch": 0.5306122448979592,
371
+ "grad_norm": 335.4564514160156,
372
+ "learning_rate": 5.204081632653062e-06,
373
+ "loss": 1.9235,
374
+ "step": 52
375
+ },
376
+ {
377
+ "epoch": 0.5408163265306123,
378
+ "grad_norm": 138.63575744628906,
379
+ "learning_rate": 5.306122448979593e-06,
380
+ "loss": 0.8777,
381
+ "step": 53
382
+ },
383
+ {
384
+ "epoch": 0.5510204081632653,
385
+ "grad_norm": 1.011594533920288,
386
+ "learning_rate": 5.408163265306123e-06,
387
+ "loss": 0.0038,
388
+ "step": 54
389
+ },
390
+ {
391
+ "epoch": 0.5612244897959183,
392
+ "grad_norm": 506.25152587890625,
393
+ "learning_rate": 5.510204081632653e-06,
394
+ "loss": 1.5598,
395
+ "step": 55
396
+ },
397
+ {
398
+ "epoch": 0.5714285714285714,
399
+ "grad_norm": 2.2550530433654785,
400
+ "learning_rate": 5.6122448979591834e-06,
401
+ "loss": 0.0177,
402
+ "step": 56
403
+ },
404
+ {
405
+ "epoch": 0.5816326530612245,
406
+ "grad_norm": 13.93323802947998,
407
+ "learning_rate": 5.7142857142857145e-06,
408
+ "loss": 0.0837,
409
+ "step": 57
410
+ },
411
+ {
412
+ "epoch": 0.5918367346938775,
413
+ "grad_norm": 7.279649257659912,
414
+ "learning_rate": 5.816326530612246e-06,
415
+ "loss": 0.0429,
416
+ "step": 58
417
+ },
418
+ {
419
+ "epoch": 0.6020408163265306,
420
+ "grad_norm": 0.9923371076583862,
421
+ "learning_rate": 5.918367346938776e-06,
422
+ "loss": 0.0071,
423
+ "step": 59
424
+ },
425
+ {
426
+ "epoch": 0.6122448979591837,
427
+ "grad_norm": 743.8301391601562,
428
+ "learning_rate": 6.020408163265307e-06,
429
+ "loss": 2.7217,
430
+ "step": 60
431
+ },
432
+ {
433
+ "epoch": 0.6224489795918368,
434
+ "grad_norm": 227.04403686523438,
435
+ "learning_rate": 6.122448979591837e-06,
436
+ "loss": 3.9013,
437
+ "step": 61
438
+ },
439
+ {
440
+ "epoch": 0.6326530612244898,
441
+ "grad_norm": 193.12701416015625,
442
+ "learning_rate": 6.224489795918368e-06,
443
+ "loss": 1.417,
444
+ "step": 62
445
+ },
446
+ {
447
+ "epoch": 0.6428571428571429,
448
+ "grad_norm": 642.7814331054688,
449
+ "learning_rate": 6.326530612244899e-06,
450
+ "loss": 3.5854,
451
+ "step": 63
452
+ },
453
+ {
454
+ "epoch": 0.6530612244897959,
455
+ "grad_norm": 1007.544189453125,
456
+ "learning_rate": 6.4285714285714295e-06,
457
+ "loss": 12.918,
458
+ "step": 64
459
+ },
460
+ {
461
+ "epoch": 0.6632653061224489,
462
+ "grad_norm": 1310.942138671875,
463
+ "learning_rate": 6.530612244897959e-06,
464
+ "loss": 7.1566,
465
+ "step": 65
466
+ },
467
+ {
468
+ "epoch": 0.673469387755102,
469
+ "grad_norm": 810.1301879882812,
470
+ "learning_rate": 6.63265306122449e-06,
471
+ "loss": 3.9897,
472
+ "step": 66
473
+ },
474
+ {
475
+ "epoch": 0.6836734693877551,
476
+ "grad_norm": 513.1759643554688,
477
+ "learning_rate": 6.734693877551021e-06,
478
+ "loss": 8.1139,
479
+ "step": 67
480
+ },
481
+ {
482
+ "epoch": 0.6938775510204082,
483
+ "grad_norm": 1414.8878173828125,
484
+ "learning_rate": 6.836734693877551e-06,
485
+ "loss": 5.7005,
486
+ "step": 68
487
+ },
488
+ {
489
+ "epoch": 0.7040816326530612,
490
+ "grad_norm": 31.607126235961914,
491
+ "learning_rate": 6.938775510204082e-06,
492
+ "loss": 0.1219,
493
+ "step": 69
494
+ },
495
+ {
496
+ "epoch": 0.7142857142857143,
497
+ "grad_norm": 799.9751586914062,
498
+ "learning_rate": 7.0408163265306125e-06,
499
+ "loss": 5.7849,
500
+ "step": 70
501
+ },
502
+ {
503
+ "epoch": 0.7244897959183674,
504
+ "grad_norm": 132.71778869628906,
505
+ "learning_rate": 7.1428571428571436e-06,
506
+ "loss": 1.0726,
507
+ "step": 71
508
+ },
509
+ {
510
+ "epoch": 0.7346938775510204,
511
+ "grad_norm": 256.61041259765625,
512
+ "learning_rate": 7.244897959183675e-06,
513
+ "loss": 1.2599,
514
+ "step": 72
515
+ },
516
+ {
517
+ "epoch": 0.7448979591836735,
518
+ "grad_norm": 192.0435333251953,
519
+ "learning_rate": 7.346938775510205e-06,
520
+ "loss": 0.6473,
521
+ "step": 73
522
+ },
523
+ {
524
+ "epoch": 0.7551020408163265,
525
+ "grad_norm": 293.7915954589844,
526
+ "learning_rate": 7.448979591836736e-06,
527
+ "loss": 1.0397,
528
+ "step": 74
529
+ },
530
+ {
531
+ "epoch": 0.7653061224489796,
532
+ "grad_norm": 312.2645263671875,
533
+ "learning_rate": 7.551020408163265e-06,
534
+ "loss": 1.5555,
535
+ "step": 75
536
+ },
537
+ {
538
+ "epoch": 0.7755102040816326,
539
+ "grad_norm": 1.417815923690796,
540
+ "learning_rate": 7.653061224489796e-06,
541
+ "loss": 0.0078,
542
+ "step": 76
543
+ },
544
+ {
545
+ "epoch": 0.7857142857142857,
546
+ "grad_norm": 1.4391653537750244,
547
+ "learning_rate": 7.755102040816327e-06,
548
+ "loss": 0.0048,
549
+ "step": 77
550
+ },
551
+ {
552
+ "epoch": 0.7959183673469388,
553
+ "grad_norm": 5.628185749053955,
554
+ "learning_rate": 7.857142857142858e-06,
555
+ "loss": 0.0323,
556
+ "step": 78
557
+ },
558
+ {
559
+ "epoch": 0.8061224489795918,
560
+ "grad_norm": 264.5353698730469,
561
+ "learning_rate": 7.959183673469388e-06,
562
+ "loss": 1.7425,
563
+ "step": 79
564
+ },
565
+ {
566
+ "epoch": 0.8163265306122449,
567
+ "grad_norm": 1.5278851985931396,
568
+ "learning_rate": 8.06122448979592e-06,
569
+ "loss": 0.0035,
570
+ "step": 80
571
+ },
572
+ {
573
+ "epoch": 0.826530612244898,
574
+ "grad_norm": 932.3336181640625,
575
+ "learning_rate": 8.16326530612245e-06,
576
+ "loss": 6.4849,
577
+ "step": 81
578
+ },
579
+ {
580
+ "epoch": 0.8367346938775511,
581
+ "grad_norm": 635.4749145507812,
582
+ "learning_rate": 8.26530612244898e-06,
583
+ "loss": 4.3767,
584
+ "step": 82
585
+ },
586
+ {
587
+ "epoch": 0.8469387755102041,
588
+ "grad_norm": 8.875201225280762,
589
+ "learning_rate": 8.36734693877551e-06,
590
+ "loss": 0.0186,
591
+ "step": 83
592
+ },
593
+ {
594
+ "epoch": 0.8571428571428571,
595
+ "grad_norm": 0.15500876307487488,
596
+ "learning_rate": 8.469387755102042e-06,
597
+ "loss": 0.0008,
598
+ "step": 84
599
+ },
600
+ {
601
+ "epoch": 0.8673469387755102,
602
+ "grad_norm": 269.5357666015625,
603
+ "learning_rate": 8.571428571428571e-06,
604
+ "loss": 0.8354,
605
+ "step": 85
606
+ },
607
+ {
608
+ "epoch": 0.8775510204081632,
609
+ "grad_norm": 5.054287910461426,
610
+ "learning_rate": 8.673469387755103e-06,
611
+ "loss": 0.0162,
612
+ "step": 86
613
+ },
614
+ {
615
+ "epoch": 0.8877551020408163,
616
+ "grad_norm": 84.90735626220703,
617
+ "learning_rate": 8.775510204081633e-06,
618
+ "loss": 0.1282,
619
+ "step": 87
620
+ },
621
+ {
622
+ "epoch": 0.8979591836734694,
623
+ "grad_norm": 81.53719329833984,
624
+ "learning_rate": 8.877551020408163e-06,
625
+ "loss": 0.4514,
626
+ "step": 88
627
+ },
628
+ {
629
+ "epoch": 0.9081632653061225,
630
+ "grad_norm": 547.4005126953125,
631
+ "learning_rate": 8.979591836734695e-06,
632
+ "loss": 4.9103,
633
+ "step": 89
634
+ },
635
+ {
636
+ "epoch": 0.9183673469387755,
637
+ "grad_norm": 25.792213439941406,
638
+ "learning_rate": 9.081632653061225e-06,
639
+ "loss": 0.0762,
640
+ "step": 90
641
+ },
642
+ {
643
+ "epoch": 0.9285714285714286,
644
+ "grad_norm": 10.455421447753906,
645
+ "learning_rate": 9.183673469387756e-06,
646
+ "loss": 0.0444,
647
+ "step": 91
648
+ },
649
+ {
650
+ "epoch": 0.9387755102040817,
651
+ "grad_norm": 472.54376220703125,
652
+ "learning_rate": 9.285714285714288e-06,
653
+ "loss": 1.8609,
654
+ "step": 92
655
+ },
656
+ {
657
+ "epoch": 0.9489795918367347,
658
+ "grad_norm": 31.092357635498047,
659
+ "learning_rate": 9.387755102040818e-06,
660
+ "loss": 0.1489,
661
+ "step": 93
662
+ },
663
+ {
664
+ "epoch": 0.9591836734693877,
665
+ "grad_norm": 231.94151306152344,
666
+ "learning_rate": 9.489795918367348e-06,
667
+ "loss": 0.5926,
668
+ "step": 94
669
+ },
670
+ {
671
+ "epoch": 0.9693877551020408,
672
+ "grad_norm": 211.05117797851562,
673
+ "learning_rate": 9.591836734693878e-06,
674
+ "loss": 0.5344,
675
+ "step": 95
676
+ },
677
+ {
678
+ "epoch": 0.9795918367346939,
679
+ "grad_norm": 217.01339721679688,
680
+ "learning_rate": 9.693877551020408e-06,
681
+ "loss": 0.4693,
682
+ "step": 96
683
+ },
684
+ {
685
+ "epoch": 0.9897959183673469,
686
+ "grad_norm": 1123.96484375,
687
+ "learning_rate": 9.795918367346939e-06,
688
+ "loss": 9.2282,
689
+ "step": 97
690
+ },
691
+ {
692
+ "epoch": 1.0,
693
+ "grad_norm": 741.597412109375,
694
+ "learning_rate": 9.89795918367347e-06,
695
+ "loss": 4.6238,
696
+ "step": 98
697
+ },
698
+ {
699
+ "epoch": 1.0,
700
+ "eval_dim_1024_cosine_accuracy@1": 0.36235595390524966,
701
+ "eval_dim_1024_cosine_accuracy@10": 0.4334186939820743,
702
+ "eval_dim_1024_cosine_accuracy@3": 0.3681177976952625,
703
+ "eval_dim_1024_cosine_accuracy@5": 0.39308578745198464,
704
+ "eval_dim_1024_cosine_map@100": 0.45394800707643057,
705
+ "eval_dim_1024_cosine_mrr@10": 0.37430415828303115,
706
+ "eval_dim_1024_cosine_ndcg@10": 0.3858809020056271,
707
+ "eval_dim_1024_cosine_precision@1": 0.36235595390524966,
708
+ "eval_dim_1024_cosine_precision@10": 0.3176696542893726,
709
+ "eval_dim_1024_cosine_precision@3": 0.36192915066154496,
710
+ "eval_dim_1024_cosine_precision@5": 0.35172855313700385,
711
+ "eval_dim_1024_cosine_recall@1": 0.04346309464734114,
712
+ "eval_dim_1024_cosine_recall@10": 0.28096984500258326,
713
+ "eval_dim_1024_cosine_recall@3": 0.12757812796185336,
714
+ "eval_dim_1024_cosine_recall@5": 0.19200836801442767,
715
+ "eval_dim_128_cosine_accuracy@1": 0.3085787451984635,
716
+ "eval_dim_128_cosine_accuracy@10": 0.37964148527528807,
717
+ "eval_dim_128_cosine_accuracy@3": 0.31241997439180536,
718
+ "eval_dim_128_cosine_accuracy@5": 0.3361075544174136,
719
+ "eval_dim_128_cosine_map@100": 0.3963095303049961,
720
+ "eval_dim_128_cosine_mrr@10": 0.3199812511432227,
721
+ "eval_dim_128_cosine_ndcg@10": 0.3312285498294292,
722
+ "eval_dim_128_cosine_precision@1": 0.3085787451984635,
723
+ "eval_dim_128_cosine_precision@10": 0.2752880921895006,
724
+ "eval_dim_128_cosine_precision@3": 0.3079385403329065,
725
+ "eval_dim_128_cosine_precision@5": 0.29961587708066584,
726
+ "eval_dim_128_cosine_recall@1": 0.036297623853982414,
727
+ "eval_dim_128_cosine_recall@10": 0.24000960695821508,
728
+ "eval_dim_128_cosine_recall@3": 0.10638786483158841,
729
+ "eval_dim_128_cosine_recall@5": 0.16032639984514846,
730
+ "eval_dim_256_cosine_accuracy@1": 0.3437900128040973,
731
+ "eval_dim_256_cosine_accuracy@10": 0.41101152368758004,
732
+ "eval_dim_256_cosine_accuracy@3": 0.34763124199743917,
733
+ "eval_dim_256_cosine_accuracy@5": 0.3764404609475032,
734
+ "eval_dim_256_cosine_map@100": 0.4298669852983799,
735
+ "eval_dim_256_cosine_mrr@10": 0.3551361197487955,
736
+ "eval_dim_256_cosine_ndcg@10": 0.3670052960875804,
737
+ "eval_dim_256_cosine_precision@1": 0.3437900128040973,
738
+ "eval_dim_256_cosine_precision@10": 0.3040973111395647,
739
+ "eval_dim_256_cosine_precision@3": 0.342936406316688,
740
+ "eval_dim_256_cosine_precision@5": 0.33457106274007686,
741
+ "eval_dim_256_cosine_recall@1": 0.04013102608834382,
742
+ "eval_dim_256_cosine_recall@10": 0.2648598688529433,
743
+ "eval_dim_256_cosine_recall@3": 0.11771735023719074,
744
+ "eval_dim_256_cosine_recall@5": 0.17837935755014916,
745
+ "eval_dim_512_cosine_accuracy@1": 0.35979513444302175,
746
+ "eval_dim_512_cosine_accuracy@10": 0.4334186939820743,
747
+ "eval_dim_512_cosine_accuracy@3": 0.36555697823303457,
748
+ "eval_dim_512_cosine_accuracy@5": 0.3911651728553137,
749
+ "eval_dim_512_cosine_map@100": 0.4476805587612892,
750
+ "eval_dim_512_cosine_mrr@10": 0.37212542934373866,
751
+ "eval_dim_512_cosine_ndcg@10": 0.3843750966464458,
752
+ "eval_dim_512_cosine_precision@1": 0.35979513444302175,
753
+ "eval_dim_512_cosine_precision@10": 0.3173495518565941,
754
+ "eval_dim_512_cosine_precision@3": 0.35936833119931705,
755
+ "eval_dim_512_cosine_precision@5": 0.34967989756722156,
756
+ "eval_dim_512_cosine_recall@1": 0.04265405128130224,
757
+ "eval_dim_512_cosine_recall@10": 0.2781876565001863,
758
+ "eval_dim_512_cosine_recall@3": 0.12523102347193127,
759
+ "eval_dim_512_cosine_recall@5": 0.18912519336740205,
760
+ "eval_dim_64_cosine_accuracy@1": 0.2740076824583867,
761
+ "eval_dim_64_cosine_accuracy@10": 0.3354673495518566,
762
+ "eval_dim_64_cosine_accuracy@3": 0.27848911651728553,
763
+ "eval_dim_64_cosine_accuracy@5": 0.30153649167733676,
764
+ "eval_dim_64_cosine_map@100": 0.3539045084602349,
765
+ "eval_dim_64_cosine_mrr@10": 0.28429414873076814,
766
+ "eval_dim_64_cosine_ndcg@10": 0.29402896525927075,
767
+ "eval_dim_64_cosine_precision@1": 0.2740076824583867,
768
+ "eval_dim_64_cosine_precision@10": 0.24571062740076827,
769
+ "eval_dim_64_cosine_precision@3": 0.27315407597097735,
770
+ "eval_dim_64_cosine_precision@5": 0.2670934699103713,
771
+ "eval_dim_64_cosine_recall@1": 0.03167890172057568,
772
+ "eval_dim_64_cosine_recall@10": 0.21092883720941633,
773
+ "eval_dim_64_cosine_recall@3": 0.09267023360511464,
774
+ "eval_dim_64_cosine_recall@5": 0.14048625468314752,
775
+ "eval_dim_768_cosine_accuracy@1": 0.3591549295774648,
776
+ "eval_dim_768_cosine_accuracy@10": 0.4334186939820743,
777
+ "eval_dim_768_cosine_accuracy@3": 0.3649167733674776,
778
+ "eval_dim_768_cosine_accuracy@5": 0.3892445582586428,
779
+ "eval_dim_768_cosine_map@100": 0.4493001842217619,
780
+ "eval_dim_768_cosine_mrr@10": 0.37149335406377615,
781
+ "eval_dim_768_cosine_ndcg@10": 0.38308181752122755,
782
+ "eval_dim_768_cosine_precision@1": 0.3591549295774648,
783
+ "eval_dim_768_cosine_precision@10": 0.31670934699103714,
784
+ "eval_dim_768_cosine_precision@3": 0.3587281263337601,
785
+ "eval_dim_768_cosine_precision@5": 0.34852752880921894,
786
+ "eval_dim_768_cosine_recall@1": 0.04250079684114586,
787
+ "eval_dim_768_cosine_recall@10": 0.27695909667507057,
788
+ "eval_dim_768_cosine_recall@3": 0.12462187901616553,
789
+ "eval_dim_768_cosine_recall@5": 0.1875478484365334,
790
+ "eval_runtime": 99.0843,
791
+ "eval_samples_per_second": 0.0,
792
+ "eval_sequential_score": 0.29402896525927075,
793
+ "eval_steps_per_second": 0.0,
794
+ "step": 98
795
+ },
796
+ {
797
+ "epoch": 1.010204081632653,
798
+ "grad_norm": 342.861328125,
799
+ "learning_rate": 1e-05,
800
+ "loss": 1.9644,
801
+ "step": 99
802
+ },
803
+ {
804
+ "epoch": 1.0204081632653061,
805
+ "grad_norm": 761.8235473632812,
806
+ "learning_rate": 1.0102040816326531e-05,
807
+ "loss": 7.4242,
808
+ "step": 100
809
+ },
810
+ {
811
+ "epoch": 1.030612244897959,
812
+ "grad_norm": 146.39175415039062,
813
+ "learning_rate": 1.0204081632653063e-05,
814
+ "loss": 0.9592,
815
+ "step": 101
816
+ },
817
+ {
818
+ "epoch": 1.0408163265306123,
819
+ "grad_norm": 69.37447357177734,
820
+ "learning_rate": 1.0306122448979591e-05,
821
+ "loss": 0.3051,
822
+ "step": 102
823
+ },
824
+ {
825
+ "epoch": 1.0510204081632653,
826
+ "grad_norm": 241.93687438964844,
827
+ "learning_rate": 1.0408163265306123e-05,
828
+ "loss": 0.926,
829
+ "step": 103
830
+ },
831
+ {
832
+ "epoch": 1.0612244897959184,
833
+ "grad_norm": 13.75313949584961,
834
+ "learning_rate": 1.0510204081632654e-05,
835
+ "loss": 0.0751,
836
+ "step": 104
837
+ },
838
+ {
839
+ "epoch": 1.0714285714285714,
840
+ "grad_norm": 1.861573576927185,
841
+ "learning_rate": 1.0612244897959186e-05,
842
+ "loss": 0.0111,
843
+ "step": 105
844
+ },
845
+ {
846
+ "epoch": 1.0816326530612246,
847
+ "grad_norm": 1.4446377754211426,
848
+ "learning_rate": 1.0714285714285714e-05,
849
+ "loss": 0.0072,
850
+ "step": 106
851
+ },
852
+ {
853
+ "epoch": 1.0918367346938775,
854
+ "grad_norm": 2.217988967895508,
855
+ "learning_rate": 1.0816326530612246e-05,
856
+ "loss": 0.0107,
857
+ "step": 107
858
+ },
859
+ {
860
+ "epoch": 1.1020408163265305,
861
+ "grad_norm": 620.331787109375,
862
+ "learning_rate": 1.0918367346938776e-05,
863
+ "loss": 3.4505,
864
+ "step": 108
865
+ },
866
+ {
867
+ "epoch": 1.1122448979591837,
868
+ "grad_norm": 1.4038218259811401,
869
+ "learning_rate": 1.1020408163265306e-05,
870
+ "loss": 0.005,
871
+ "step": 109
872
+ },
873
+ {
874
+ "epoch": 1.1224489795918366,
875
+ "grad_norm": 46.48203659057617,
876
+ "learning_rate": 1.1122448979591838e-05,
877
+ "loss": 0.1701,
878
+ "step": 110
879
+ },
880
+ {
881
+ "epoch": 1.1326530612244898,
882
+ "grad_norm": 6.003911972045898,
883
+ "learning_rate": 1.1224489795918367e-05,
884
+ "loss": 0.027,
885
+ "step": 111
886
+ },
887
+ {
888
+ "epoch": 1.1428571428571428,
889
+ "grad_norm": 379.09527587890625,
890
+ "learning_rate": 1.1326530612244899e-05,
891
+ "loss": 1.3824,
892
+ "step": 112
893
+ },
894
+ {
895
+ "epoch": 1.153061224489796,
896
+ "grad_norm": 1103.1077880859375,
897
+ "learning_rate": 1.1428571428571429e-05,
898
+ "loss": 8.1459,
899
+ "step": 113
900
+ },
901
+ {
902
+ "epoch": 1.163265306122449,
903
+ "grad_norm": 29.499439239501953,
904
+ "learning_rate": 1.1530612244897961e-05,
905
+ "loss": 0.0917,
906
+ "step": 114
907
+ },
908
+ {
909
+ "epoch": 1.1734693877551021,
910
+ "grad_norm": 0.06352390348911285,
911
+ "learning_rate": 1.1632653061224491e-05,
912
+ "loss": 0.0003,
913
+ "step": 115
914
+ },
915
+ {
916
+ "epoch": 1.183673469387755,
917
+ "grad_norm": 111.55418395996094,
918
+ "learning_rate": 1.1734693877551021e-05,
919
+ "loss": 0.3716,
920
+ "step": 116
921
+ },
922
+ {
923
+ "epoch": 1.193877551020408,
924
+ "grad_norm": 32.166500091552734,
925
+ "learning_rate": 1.1836734693877552e-05,
926
+ "loss": 0.1704,
927
+ "step": 117
928
+ },
929
+ {
930
+ "epoch": 1.2040816326530612,
931
+ "grad_norm": 870.0745239257812,
932
+ "learning_rate": 1.1938775510204084e-05,
933
+ "loss": 9.8059,
934
+ "step": 118
935
+ },
936
+ {
937
+ "epoch": 1.2142857142857142,
938
+ "grad_norm": 139.17662048339844,
939
+ "learning_rate": 1.2040816326530614e-05,
940
+ "loss": 0.5882,
941
+ "step": 119
942
+ },
943
+ {
944
+ "epoch": 1.2244897959183674,
945
+ "grad_norm": 28.489713668823242,
946
+ "learning_rate": 1.2142857142857142e-05,
947
+ "loss": 0.0531,
948
+ "step": 120
949
+ },
950
+ {
951
+ "epoch": 1.2346938775510203,
952
+ "grad_norm": 0.18062859773635864,
953
+ "learning_rate": 1.2244897959183674e-05,
954
+ "loss": 0.0005,
955
+ "step": 121
956
+ },
957
+ {
958
+ "epoch": 1.2448979591836735,
959
+ "grad_norm": 8.26645565032959,
960
+ "learning_rate": 1.2346938775510204e-05,
961
+ "loss": 0.0314,
962
+ "step": 122
963
+ },
964
+ {
965
+ "epoch": 1.2551020408163265,
966
+ "grad_norm": 64.67955017089844,
967
+ "learning_rate": 1.2448979591836736e-05,
968
+ "loss": 0.1811,
969
+ "step": 123
970
+ },
971
+ {
972
+ "epoch": 1.2653061224489797,
973
+ "grad_norm": 420.44439697265625,
974
+ "learning_rate": 1.2551020408163267e-05,
975
+ "loss": 2.6136,
976
+ "step": 124
977
+ },
978
+ {
979
+ "epoch": 1.2755102040816326,
980
+ "grad_norm": 3.5323660373687744,
981
+ "learning_rate": 1.2653061224489798e-05,
982
+ "loss": 0.0087,
983
+ "step": 125
984
+ },
985
+ {
986
+ "epoch": 1.2857142857142856,
987
+ "grad_norm": 52.854801177978516,
988
+ "learning_rate": 1.2755102040816327e-05,
989
+ "loss": 0.1269,
990
+ "step": 126
991
+ },
992
+ {
993
+ "epoch": 1.2959183673469388,
994
+ "grad_norm": 4.583413124084473,
995
+ "learning_rate": 1.2857142857142859e-05,
996
+ "loss": 0.0091,
997
+ "step": 127
998
+ },
999
+ {
1000
+ "epoch": 1.306122448979592,
1001
+ "grad_norm": 17.20958137512207,
1002
+ "learning_rate": 1.2959183673469389e-05,
1003
+ "loss": 0.0467,
1004
+ "step": 128
1005
+ },
1006
+ {
1007
+ "epoch": 1.316326530612245,
1008
+ "grad_norm": 8.821357727050781,
1009
+ "learning_rate": 1.3061224489795918e-05,
1010
+ "loss": 0.0282,
1011
+ "step": 129
1012
+ },
1013
+ {
1014
+ "epoch": 1.3265306122448979,
1015
+ "grad_norm": 0.3024923503398895,
1016
+ "learning_rate": 1.316326530612245e-05,
1017
+ "loss": 0.0012,
1018
+ "step": 130
1019
+ },
1020
+ {
1021
+ "epoch": 1.336734693877551,
1022
+ "grad_norm": 1110.76513671875,
1023
+ "learning_rate": 1.326530612244898e-05,
1024
+ "loss": 3.5135,
1025
+ "step": 131
1026
+ },
1027
+ {
1028
+ "epoch": 1.346938775510204,
1029
+ "grad_norm": 4.655632495880127,
1030
+ "learning_rate": 1.3367346938775512e-05,
1031
+ "loss": 0.0186,
1032
+ "step": 132
1033
+ },
1034
+ {
1035
+ "epoch": 1.3571428571428572,
1036
+ "grad_norm": 641.764404296875,
1037
+ "learning_rate": 1.3469387755102042e-05,
1038
+ "loss": 3.2599,
1039
+ "step": 133
1040
+ },
1041
+ {
1042
+ "epoch": 1.3673469387755102,
1043
+ "grad_norm": 1076.8260498046875,
1044
+ "learning_rate": 1.3571428571428574e-05,
1045
+ "loss": 5.5417,
1046
+ "step": 134
1047
+ },
1048
+ {
1049
+ "epoch": 1.3775510204081631,
1050
+ "grad_norm": 0.5416738390922546,
1051
+ "learning_rate": 1.3673469387755102e-05,
1052
+ "loss": 0.0019,
1053
+ "step": 135
1054
+ },
1055
+ {
1056
+ "epoch": 1.3877551020408163,
1057
+ "grad_norm": 200.03311157226562,
1058
+ "learning_rate": 1.3775510204081634e-05,
1059
+ "loss": 0.5649,
1060
+ "step": 136
1061
+ },
1062
+ {
1063
+ "epoch": 1.3979591836734695,
1064
+ "grad_norm": 35.22038650512695,
1065
+ "learning_rate": 1.3877551020408165e-05,
1066
+ "loss": 0.084,
1067
+ "step": 137
1068
+ },
1069
+ {
1070
+ "epoch": 1.4081632653061225,
1071
+ "grad_norm": 141.9106903076172,
1072
+ "learning_rate": 1.3979591836734696e-05,
1073
+ "loss": 0.6062,
1074
+ "step": 138
1075
+ },
1076
+ {
1077
+ "epoch": 1.4183673469387754,
1078
+ "grad_norm": 15.920783996582031,
1079
+ "learning_rate": 1.4081632653061225e-05,
1080
+ "loss": 0.0639,
1081
+ "step": 139
1082
+ },
1083
+ {
1084
+ "epoch": 1.4285714285714286,
1085
+ "grad_norm": 206.33274841308594,
1086
+ "learning_rate": 1.4183673469387755e-05,
1087
+ "loss": 0.4069,
1088
+ "step": 140
1089
+ },
1090
+ {
1091
+ "epoch": 1.4387755102040816,
1092
+ "grad_norm": 51.149173736572266,
1093
+ "learning_rate": 1.4285714285714287e-05,
1094
+ "loss": 0.2462,
1095
+ "step": 141
1096
+ },
1097
+ {
1098
+ "epoch": 1.4489795918367347,
1099
+ "grad_norm": 658.653564453125,
1100
+ "learning_rate": 1.4387755102040817e-05,
1101
+ "loss": 4.9288,
1102
+ "step": 142
1103
+ },
1104
+ {
1105
+ "epoch": 1.4591836734693877,
1106
+ "grad_norm": 63.49065399169922,
1107
+ "learning_rate": 1.448979591836735e-05,
1108
+ "loss": 0.1852,
1109
+ "step": 143
1110
+ },
1111
+ {
1112
+ "epoch": 1.469387755102041,
1113
+ "grad_norm": 1453.699462890625,
1114
+ "learning_rate": 1.4591836734693878e-05,
1115
+ "loss": 3.0971,
1116
+ "step": 144
1117
+ },
1118
+ {
1119
+ "epoch": 1.4795918367346939,
1120
+ "grad_norm": 499.0628662109375,
1121
+ "learning_rate": 1.469387755102041e-05,
1122
+ "loss": 3.787,
1123
+ "step": 145
1124
+ },
1125
+ {
1126
+ "epoch": 1.489795918367347,
1127
+ "grad_norm": 253.33152770996094,
1128
+ "learning_rate": 1.479591836734694e-05,
1129
+ "loss": 0.8474,
1130
+ "step": 146
1131
+ },
1132
+ {
1133
+ "epoch": 1.5,
1134
+ "grad_norm": 0.8343175649642944,
1135
+ "learning_rate": 1.4897959183673472e-05,
1136
+ "loss": 0.0028,
1137
+ "step": 147
1138
+ },
1139
+ {
1140
+ "epoch": 1.510204081632653,
1141
+ "grad_norm": 38.5785026550293,
1142
+ "learning_rate": 1.5000000000000002e-05,
1143
+ "loss": 0.0931,
1144
+ "step": 148
1145
+ },
1146
+ {
1147
+ "epoch": 1.5204081632653061,
1148
+ "grad_norm": 563.4974365234375,
1149
+ "learning_rate": 1.510204081632653e-05,
1150
+ "loss": 1.8378,
1151
+ "step": 149
1152
+ },
1153
+ {
1154
+ "epoch": 1.5306122448979593,
1155
+ "grad_norm": 749.0945434570312,
1156
+ "learning_rate": 1.5204081632653063e-05,
1157
+ "loss": 2.6074,
1158
+ "step": 150
1159
+ },
1160
+ {
1161
+ "epoch": 1.5408163265306123,
1162
+ "grad_norm": 62.52786636352539,
1163
+ "learning_rate": 1.530612244897959e-05,
1164
+ "loss": 0.1441,
1165
+ "step": 151
1166
+ },
1167
+ {
1168
+ "epoch": 1.5510204081632653,
1169
+ "grad_norm": 281.54400634765625,
1170
+ "learning_rate": 1.5408163265306123e-05,
1171
+ "loss": 0.5622,
1172
+ "step": 152
1173
+ },
1174
+ {
1175
+ "epoch": 1.5612244897959182,
1176
+ "grad_norm": 1.1233166456222534,
1177
+ "learning_rate": 1.5510204081632655e-05,
1178
+ "loss": 0.0049,
1179
+ "step": 153
1180
+ },
1181
+ {
1182
+ "epoch": 1.5714285714285714,
1183
+ "grad_norm": 9.458003044128418,
1184
+ "learning_rate": 1.5612244897959187e-05,
1185
+ "loss": 0.0268,
1186
+ "step": 154
1187
+ },
1188
+ {
1189
+ "epoch": 1.5816326530612246,
1190
+ "grad_norm": 7.9042439460754395,
1191
+ "learning_rate": 1.5714285714285715e-05,
1192
+ "loss": 0.0281,
1193
+ "step": 155
1194
+ },
1195
+ {
1196
+ "epoch": 1.5918367346938775,
1197
+ "grad_norm": 402.8667907714844,
1198
+ "learning_rate": 1.5816326530612247e-05,
1199
+ "loss": 2.9755,
1200
+ "step": 156
1201
+ },
1202
+ {
1203
+ "epoch": 1.6020408163265305,
1204
+ "grad_norm": 359.3101806640625,
1205
+ "learning_rate": 1.5918367346938776e-05,
1206
+ "loss": 1.0982,
1207
+ "step": 157
1208
+ },
1209
+ {
1210
+ "epoch": 1.6122448979591837,
1211
+ "grad_norm": 26.466707229614258,
1212
+ "learning_rate": 1.6020408163265308e-05,
1213
+ "loss": 0.0621,
1214
+ "step": 158
1215
+ },
1216
+ {
1217
+ "epoch": 1.6224489795918369,
1218
+ "grad_norm": 472.1581726074219,
1219
+ "learning_rate": 1.612244897959184e-05,
1220
+ "loss": 6.9631,
1221
+ "step": 159
1222
+ },
1223
+ {
1224
+ "epoch": 1.6326530612244898,
1225
+ "grad_norm": 812.54638671875,
1226
+ "learning_rate": 1.6224489795918368e-05,
1227
+ "loss": 4.7216,
1228
+ "step": 160
1229
+ },
1230
+ {
1231
+ "epoch": 1.6428571428571428,
1232
+ "grad_norm": 252.12796020507812,
1233
+ "learning_rate": 1.63265306122449e-05,
1234
+ "loss": 0.848,
1235
+ "step": 161
1236
+ },
1237
+ {
1238
+ "epoch": 1.6530612244897958,
1239
+ "grad_norm": 1087.48828125,
1240
+ "learning_rate": 1.642857142857143e-05,
1241
+ "loss": 5.6006,
1242
+ "step": 162
1243
+ },
1244
+ {
1245
+ "epoch": 1.663265306122449,
1246
+ "grad_norm": 280.405517578125,
1247
+ "learning_rate": 1.653061224489796e-05,
1248
+ "loss": 4.299,
1249
+ "step": 163
1250
+ },
1251
+ {
1252
+ "epoch": 1.6734693877551021,
1253
+ "grad_norm": 457.81494140625,
1254
+ "learning_rate": 1.6632653061224492e-05,
1255
+ "loss": 2.042,
1256
+ "step": 164
1257
+ },
1258
+ {
1259
+ "epoch": 1.683673469387755,
1260
+ "grad_norm": 511.0380859375,
1261
+ "learning_rate": 1.673469387755102e-05,
1262
+ "loss": 2.4823,
1263
+ "step": 165
1264
+ },
1265
+ {
1266
+ "epoch": 1.693877551020408,
1267
+ "grad_norm": 7.505221366882324,
1268
+ "learning_rate": 1.6836734693877553e-05,
1269
+ "loss": 0.0189,
1270
+ "step": 166
1271
+ },
1272
+ {
1273
+ "epoch": 1.7040816326530612,
1274
+ "grad_norm": 1.01173734664917,
1275
+ "learning_rate": 1.6938775510204085e-05,
1276
+ "loss": 0.0039,
1277
+ "step": 167
1278
+ },
1279
+ {
1280
+ "epoch": 1.7142857142857144,
1281
+ "grad_norm": 0.5971992015838623,
1282
+ "learning_rate": 1.7040816326530613e-05,
1283
+ "loss": 0.0024,
1284
+ "step": 168
1285
+ },
1286
+ {
1287
+ "epoch": 1.7244897959183674,
1288
+ "grad_norm": 505.6401672363281,
1289
+ "learning_rate": 1.7142857142857142e-05,
1290
+ "loss": 2.0453,
1291
+ "step": 169
1292
+ },
1293
+ {
1294
+ "epoch": 1.7346938775510203,
1295
+ "grad_norm": 4.466002464294434,
1296
+ "learning_rate": 1.7244897959183674e-05,
1297
+ "loss": 0.0092,
1298
+ "step": 170
1299
+ },
1300
+ {
1301
+ "epoch": 1.7448979591836735,
1302
+ "grad_norm": 1.1195125579833984,
1303
+ "learning_rate": 1.7346938775510206e-05,
1304
+ "loss": 0.0029,
1305
+ "step": 171
1306
+ },
1307
+ {
1308
+ "epoch": 1.7551020408163265,
1309
+ "grad_norm": 104.82202911376953,
1310
+ "learning_rate": 1.7448979591836738e-05,
1311
+ "loss": 0.3271,
1312
+ "step": 172
1313
+ },
1314
+ {
1315
+ "epoch": 1.7653061224489797,
1316
+ "grad_norm": 1.860406756401062,
1317
+ "learning_rate": 1.7551020408163266e-05,
1318
+ "loss": 0.0054,
1319
+ "step": 173
1320
+ },
1321
+ {
1322
+ "epoch": 1.7755102040816326,
1323
+ "grad_norm": 0.044311508536338806,
1324
+ "learning_rate": 1.7653061224489798e-05,
1325
+ "loss": 0.0002,
1326
+ "step": 174
1327
+ },
1328
+ {
1329
+ "epoch": 1.7857142857142856,
1330
+ "grad_norm": 40.70656204223633,
1331
+ "learning_rate": 1.7755102040816327e-05,
1332
+ "loss": 0.0685,
1333
+ "step": 175
1334
+ },
1335
+ {
1336
+ "epoch": 1.7959183673469388,
1337
+ "grad_norm": 395.348388671875,
1338
+ "learning_rate": 1.785714285714286e-05,
1339
+ "loss": 1.3097,
1340
+ "step": 176
1341
+ },
1342
+ {
1343
+ "epoch": 1.806122448979592,
1344
+ "grad_norm": 326.2778015136719,
1345
+ "learning_rate": 1.795918367346939e-05,
1346
+ "loss": 1.8817,
1347
+ "step": 177
1348
+ },
1349
+ {
1350
+ "epoch": 1.816326530612245,
1351
+ "grad_norm": 41.05072784423828,
1352
+ "learning_rate": 1.806122448979592e-05,
1353
+ "loss": 0.2497,
1354
+ "step": 178
1355
+ },
1356
+ {
1357
+ "epoch": 1.8265306122448979,
1358
+ "grad_norm": 121.29589080810547,
1359
+ "learning_rate": 1.816326530612245e-05,
1360
+ "loss": 0.5822,
1361
+ "step": 179
1362
+ },
1363
+ {
1364
+ "epoch": 1.836734693877551,
1365
+ "grad_norm": 711.2618408203125,
1366
+ "learning_rate": 1.826530612244898e-05,
1367
+ "loss": 1.8103,
1368
+ "step": 180
1369
+ },
1370
+ {
1371
+ "epoch": 1.8469387755102042,
1372
+ "grad_norm": 500.7347106933594,
1373
+ "learning_rate": 1.836734693877551e-05,
1374
+ "loss": 1.5506,
1375
+ "step": 181
1376
+ },
1377
+ {
1378
+ "epoch": 1.8571428571428572,
1379
+ "grad_norm": 252.05322265625,
1380
+ "learning_rate": 1.8469387755102043e-05,
1381
+ "loss": 1.281,
1382
+ "step": 182
1383
+ },
1384
+ {
1385
+ "epoch": 1.8673469387755102,
1386
+ "grad_norm": 370.9935302734375,
1387
+ "learning_rate": 1.8571428571428575e-05,
1388
+ "loss": 2.8616,
1389
+ "step": 183
1390
+ },
1391
+ {
1392
+ "epoch": 1.8775510204081631,
1393
+ "grad_norm": 4.682647705078125,
1394
+ "learning_rate": 1.8673469387755104e-05,
1395
+ "loss": 0.0118,
1396
+ "step": 184
1397
+ },
1398
+ {
1399
+ "epoch": 1.8877551020408163,
1400
+ "grad_norm": 2.143557548522949,
1401
+ "learning_rate": 1.8775510204081636e-05,
1402
+ "loss": 0.0038,
1403
+ "step": 185
1404
+ },
1405
+ {
1406
+ "epoch": 1.8979591836734695,
1407
+ "grad_norm": 6.499508857727051,
1408
+ "learning_rate": 1.8877551020408164e-05,
1409
+ "loss": 0.0331,
1410
+ "step": 186
1411
+ },
1412
+ {
1413
+ "epoch": 1.9081632653061225,
1414
+ "grad_norm": 7.2162089347839355,
1415
+ "learning_rate": 1.8979591836734696e-05,
1416
+ "loss": 0.0273,
1417
+ "step": 187
1418
+ },
1419
+ {
1420
+ "epoch": 1.9183673469387754,
1421
+ "grad_norm": 23.073841094970703,
1422
+ "learning_rate": 1.9081632653061225e-05,
1423
+ "loss": 0.1026,
1424
+ "step": 188
1425
+ },
1426
+ {
1427
+ "epoch": 1.9285714285714286,
1428
+ "grad_norm": 48.74525833129883,
1429
+ "learning_rate": 1.9183673469387756e-05,
1430
+ "loss": 0.1942,
1431
+ "step": 189
1432
+ },
1433
+ {
1434
+ "epoch": 1.9387755102040818,
1435
+ "grad_norm": 384.64678955078125,
1436
+ "learning_rate": 1.928571428571429e-05,
1437
+ "loss": 3.4886,
1438
+ "step": 190
1439
+ },
1440
+ {
1441
+ "epoch": 1.9489795918367347,
1442
+ "grad_norm": 103.53422546386719,
1443
+ "learning_rate": 1.9387755102040817e-05,
1444
+ "loss": 0.628,
1445
+ "step": 191
1446
+ },
1447
+ {
1448
+ "epoch": 1.9591836734693877,
1449
+ "grad_norm": 42.5008544921875,
1450
+ "learning_rate": 1.948979591836735e-05,
1451
+ "loss": 0.1967,
1452
+ "step": 192
1453
+ },
1454
+ {
1455
+ "epoch": 1.9693877551020407,
1456
+ "grad_norm": 145.1553955078125,
1457
+ "learning_rate": 1.9591836734693877e-05,
1458
+ "loss": 3.9822,
1459
+ "step": 193
1460
+ },
1461
+ {
1462
+ "epoch": 1.9795918367346939,
1463
+ "grad_norm": 0.07428821176290512,
1464
+ "learning_rate": 1.969387755102041e-05,
1465
+ "loss": 0.0003,
1466
+ "step": 194
1467
+ },
1468
+ {
1469
+ "epoch": 1.989795918367347,
1470
+ "grad_norm": 545.6088256835938,
1471
+ "learning_rate": 1.979591836734694e-05,
1472
+ "loss": 3.7309,
1473
+ "step": 195
1474
+ },
1475
+ {
1476
+ "epoch": 2.0,
1477
+ "grad_norm": 0.5490627288818359,
1478
+ "learning_rate": 1.9897959183673473e-05,
1479
+ "loss": 0.0024,
1480
+ "step": 196
1481
+ },
1482
+ {
1483
+ "epoch": 2.0,
1484
+ "eval_dim_1024_cosine_accuracy@1": 0.32522407170294493,
1485
+ "eval_dim_1024_cosine_accuracy@10": 0.3969270166453265,
1486
+ "eval_dim_1024_cosine_accuracy@3": 0.33290653008962867,
1487
+ "eval_dim_1024_cosine_accuracy@5": 0.36043533930857874,
1488
+ "eval_dim_1024_cosine_map@100": 0.4164888021641558,
1489
+ "eval_dim_1024_cosine_mrr@10": 0.33769460195516493,
1490
+ "eval_dim_1024_cosine_ndcg@10": 0.34986350069216465,
1491
+ "eval_dim_1024_cosine_precision@1": 0.32522407170294493,
1492
+ "eval_dim_1024_cosine_precision@10": 0.28361075544174136,
1493
+ "eval_dim_1024_cosine_precision@3": 0.3254374733247973,
1494
+ "eval_dim_1024_cosine_precision@5": 0.31626120358514725,
1495
+ "eval_dim_1024_cosine_recall@1": 0.04113491331982186,
1496
+ "eval_dim_1024_cosine_recall@10": 0.2664549051060991,
1497
+ "eval_dim_1024_cosine_recall@3": 0.12080229545561262,
1498
+ "eval_dim_1024_cosine_recall@5": 0.18183789253196145,
1499
+ "eval_dim_128_cosine_accuracy@1": 0.30217669654289375,
1500
+ "eval_dim_128_cosine_accuracy@10": 0.3546734955185659,
1501
+ "eval_dim_128_cosine_accuracy@3": 0.3072983354673495,
1502
+ "eval_dim_128_cosine_accuracy@5": 0.3265044814340589,
1503
+ "eval_dim_128_cosine_map@100": 0.38014172959059034,
1504
+ "eval_dim_128_cosine_mrr@10": 0.3112729406743488,
1505
+ "eval_dim_128_cosine_ndcg@10": 0.32071443787836906,
1506
+ "eval_dim_128_cosine_precision@1": 0.30217669654289375,
1507
+ "eval_dim_128_cosine_precision@10": 0.26312419974391804,
1508
+ "eval_dim_128_cosine_precision@3": 0.30239009816474605,
1509
+ "eval_dim_128_cosine_precision@5": 0.29359795134443023,
1510
+ "eval_dim_128_cosine_recall@1": 0.03603846894598867,
1511
+ "eval_dim_128_cosine_recall@10": 0.23664446759855584,
1512
+ "eval_dim_128_cosine_recall@3": 0.10607255532328354,
1513
+ "eval_dim_128_cosine_recall@5": 0.15998840334482403,
1514
+ "eval_dim_256_cosine_accuracy@1": 0.31049935979513443,
1515
+ "eval_dim_256_cosine_accuracy@10": 0.3725992317541613,
1516
+ "eval_dim_256_cosine_accuracy@3": 0.31882202304737517,
1517
+ "eval_dim_256_cosine_accuracy@5": 0.34571062740076824,
1518
+ "eval_dim_256_cosine_map@100": 0.3940538127924734,
1519
+ "eval_dim_256_cosine_mrr@10": 0.3219094872263883,
1520
+ "eval_dim_256_cosine_ndcg@10": 0.33365785011470184,
1521
+ "eval_dim_256_cosine_precision@1": 0.31049935979513443,
1522
+ "eval_dim_256_cosine_precision@10": 0.2727272727272727,
1523
+ "eval_dim_256_cosine_precision@3": 0.3109261630388391,
1524
+ "eval_dim_256_cosine_precision@5": 0.3035851472471191,
1525
+ "eval_dim_256_cosine_recall@1": 0.0379038673811849,
1526
+ "eval_dim_256_cosine_recall@10": 0.25061548215235363,
1527
+ "eval_dim_256_cosine_recall@3": 0.11184662439829526,
1528
+ "eval_dim_256_cosine_recall@5": 0.16972372403865282,
1529
+ "eval_dim_512_cosine_accuracy@1": 0.32842509603072984,
1530
+ "eval_dim_512_cosine_accuracy@10": 0.39564660691421255,
1531
+ "eval_dim_512_cosine_accuracy@3": 0.33418693982074266,
1532
+ "eval_dim_512_cosine_accuracy@5": 0.36555697823303457,
1533
+ "eval_dim_512_cosine_map@100": 0.4125328284000196,
1534
+ "eval_dim_512_cosine_mrr@10": 0.34027168058858154,
1535
+ "eval_dim_512_cosine_ndcg@10": 0.3525488928748249,
1536
+ "eval_dim_512_cosine_precision@1": 0.32842509603072984,
1537
+ "eval_dim_512_cosine_precision@10": 0.28693982074263763,
1538
+ "eval_dim_512_cosine_precision@3": 0.3282116944088775,
1539
+ "eval_dim_512_cosine_precision@5": 0.31997439180537773,
1540
+ "eval_dim_512_cosine_recall@1": 0.04071091183465321,
1541
+ "eval_dim_512_cosine_recall@10": 0.2638449444559509,
1542
+ "eval_dim_512_cosine_recall@3": 0.11970757850133786,
1543
+ "eval_dim_512_cosine_recall@5": 0.1806811237454132,
1544
+ "eval_dim_64_cosine_accuracy@1": 0.28040973111395645,
1545
+ "eval_dim_64_cosine_accuracy@10": 0.3348271446862996,
1546
+ "eval_dim_64_cosine_accuracy@3": 0.28297055057618437,
1547
+ "eval_dim_64_cosine_accuracy@5": 0.3072983354673495,
1548
+ "eval_dim_64_cosine_map@100": 0.35085623648833997,
1549
+ "eval_dim_64_cosine_mrr@10": 0.28944678170030247,
1550
+ "eval_dim_64_cosine_ndcg@10": 0.2991224720529457,
1551
+ "eval_dim_64_cosine_precision@1": 0.28040973111395645,
1552
+ "eval_dim_64_cosine_precision@10": 0.24878361075544175,
1553
+ "eval_dim_64_cosine_precision@3": 0.27955612462654716,
1554
+ "eval_dim_64_cosine_precision@5": 0.27247119078105,
1555
+ "eval_dim_64_cosine_recall@1": 0.03187808455878807,
1556
+ "eval_dim_64_cosine_recall@10": 0.2128007008801171,
1557
+ "eval_dim_64_cosine_recall@3": 0.09363361347149868,
1558
+ "eval_dim_64_cosine_recall@5": 0.14192536615474802,
1559
+ "eval_dim_768_cosine_accuracy@1": 0.32970550576184376,
1560
+ "eval_dim_768_cosine_accuracy@10": 0.3994878361075544,
1561
+ "eval_dim_768_cosine_accuracy@3": 0.33418693982074266,
1562
+ "eval_dim_768_cosine_accuracy@5": 0.36427656850192064,
1563
+ "eval_dim_768_cosine_map@100": 0.4160652625925415,
1564
+ "eval_dim_768_cosine_mrr@10": 0.3415124585899229,
1565
+ "eval_dim_768_cosine_ndcg@10": 0.35370573856938964,
1566
+ "eval_dim_768_cosine_precision@1": 0.32970550576184376,
1567
+ "eval_dim_768_cosine_precision@10": 0.2877720870678617,
1568
+ "eval_dim_768_cosine_precision@3": 0.3288518992744345,
1569
+ "eval_dim_768_cosine_precision@5": 0.31997439180537773,
1570
+ "eval_dim_768_cosine_recall@1": 0.040955758827011135,
1571
+ "eval_dim_768_cosine_recall@10": 0.26685683005601735,
1572
+ "eval_dim_768_cosine_recall@3": 0.12009305539695316,
1573
+ "eval_dim_768_cosine_recall@5": 0.18142212378067016,
1574
+ "eval_runtime": 99.167,
1575
+ "eval_samples_per_second": 0.0,
1576
+ "eval_sequential_score": 0.2991224720529457,
1577
+ "eval_steps_per_second": 0.0,
1578
+ "step": 196
1579
+ },
1580
+ {
1581
+ "epoch": 2.010204081632653,
1582
+ "grad_norm": 231.35763549804688,
1583
+ "learning_rate": 2e-05,
1584
+ "loss": 4.7353,
1585
+ "step": 197
1586
+ },
1587
+ {
1588
+ "epoch": 2.020408163265306,
1589
+ "grad_norm": 26.110666275024414,
1590
+ "learning_rate": 1.9999984141121447e-05,
1591
+ "loss": 0.0998,
1592
+ "step": 198
1593
+ },
1594
+ {
1595
+ "epoch": 2.0306122448979593,
1596
+ "grad_norm": 17.7508544921875,
1597
+ "learning_rate": 1.9999936564536085e-05,
1598
+ "loss": 0.0474,
1599
+ "step": 199
1600
+ },
1601
+ {
1602
+ "epoch": 2.0408163265306123,
1603
+ "grad_norm": 0.015349287539720535,
1604
+ "learning_rate": 1.9999857270394818e-05,
1605
+ "loss": 0.0,
1606
+ "step": 200
1607
+ },
1608
+ {
1609
+ "epoch": 2.0510204081632653,
1610
+ "grad_norm": 11.307738304138184,
1611
+ "learning_rate": 1.9999746258949146e-05,
1612
+ "loss": 0.0592,
1613
+ "step": 201
1614
+ },
1615
+ {
1616
+ "epoch": 2.061224489795918,
1617
+ "grad_norm": 0.015763908624649048,
1618
+ "learning_rate": 1.9999603530551178e-05,
1619
+ "loss": 0.0001,
1620
+ "step": 202
1621
+ },
1622
+ {
1623
+ "epoch": 2.0714285714285716,
1624
+ "grad_norm": 104.52203369140625,
1625
+ "learning_rate": 1.999942908565361e-05,
1626
+ "loss": 0.5587,
1627
+ "step": 203
1628
+ },
1629
+ {
1630
+ "epoch": 2.0816326530612246,
1631
+ "grad_norm": 149.8668212890625,
1632
+ "learning_rate": 1.999922292480975e-05,
1633
+ "loss": 1.9037,
1634
+ "step": 204
1635
+ },
1636
+ {
1637
+ "epoch": 2.0918367346938775,
1638
+ "grad_norm": 28.32903480529785,
1639
+ "learning_rate": 1.9998985048673486e-05,
1640
+ "loss": 0.1247,
1641
+ "step": 205
1642
+ },
1643
+ {
1644
+ "epoch": 2.1020408163265305,
1645
+ "grad_norm": 361.1968688964844,
1646
+ "learning_rate": 1.9998715457999313e-05,
1647
+ "loss": 2.3233,
1648
+ "step": 206
1649
+ },
1650
+ {
1651
+ "epoch": 2.1122448979591835,
1652
+ "grad_norm": 96.5677719116211,
1653
+ "learning_rate": 1.999841415364231e-05,
1654
+ "loss": 0.255,
1655
+ "step": 207
1656
+ },
1657
+ {
1658
+ "epoch": 2.122448979591837,
1659
+ "grad_norm": 80.80358123779297,
1660
+ "learning_rate": 1.999808113655815e-05,
1661
+ "loss": 0.3498,
1662
+ "step": 208
1663
+ },
1664
+ {
1665
+ "epoch": 2.13265306122449,
1666
+ "grad_norm": 0.5918006300926208,
1667
+ "learning_rate": 1.999771640780308e-05,
1668
+ "loss": 0.003,
1669
+ "step": 209
1670
+ },
1671
+ {
1672
+ "epoch": 2.142857142857143,
1673
+ "grad_norm": 1223.066650390625,
1674
+ "learning_rate": 1.999731996853395e-05,
1675
+ "loss": 9.2851,
1676
+ "step": 210
1677
+ },
1678
+ {
1679
+ "epoch": 2.1530612244897958,
1680
+ "grad_norm": 310.9404602050781,
1681
+ "learning_rate": 1.9996891820008165e-05,
1682
+ "loss": 1.0812,
1683
+ "step": 211
1684
+ },
1685
+ {
1686
+ "epoch": 2.163265306122449,
1687
+ "grad_norm": 72.62071990966797,
1688
+ "learning_rate": 1.9996431963583724e-05,
1689
+ "loss": 0.3192,
1690
+ "step": 212
1691
+ },
1692
+ {
1693
+ "epoch": 2.173469387755102,
1694
+ "grad_norm": 3.9059784412384033,
1695
+ "learning_rate": 1.9995940400719184e-05,
1696
+ "loss": 0.0121,
1697
+ "step": 213
1698
+ },
1699
+ {
1700
+ "epoch": 2.183673469387755,
1701
+ "grad_norm": 753.7849731445312,
1702
+ "learning_rate": 1.9995417132973674e-05,
1703
+ "loss": 5.7421,
1704
+ "step": 214
1705
+ },
1706
+ {
1707
+ "epoch": 2.193877551020408,
1708
+ "grad_norm": 63.62609100341797,
1709
+ "learning_rate": 1.999486216200688e-05,
1710
+ "loss": 0.2867,
1711
+ "step": 215
1712
+ },
1713
+ {
1714
+ "epoch": 2.204081632653061,
1715
+ "grad_norm": 47.21674346923828,
1716
+ "learning_rate": 1.999427548957905e-05,
1717
+ "loss": 0.2971,
1718
+ "step": 216
1719
+ },
1720
+ {
1721
+ "epoch": 2.2142857142857144,
1722
+ "grad_norm": 274.9565734863281,
1723
+ "learning_rate": 1.9993657117550972e-05,
1724
+ "loss": 1.616,
1725
+ "step": 217
1726
+ },
1727
+ {
1728
+ "epoch": 2.2244897959183674,
1729
+ "grad_norm": 151.56639099121094,
1730
+ "learning_rate": 1.9993007047883988e-05,
1731
+ "loss": 0.8724,
1732
+ "step": 218
1733
+ },
1734
+ {
1735
+ "epoch": 2.2346938775510203,
1736
+ "grad_norm": 2.2050163745880127,
1737
+ "learning_rate": 1.999232528263997e-05,
1738
+ "loss": 0.0049,
1739
+ "step": 219
1740
+ },
1741
+ {
1742
+ "epoch": 2.2448979591836733,
1743
+ "grad_norm": 93.08734130859375,
1744
+ "learning_rate": 1.9991611823981322e-05,
1745
+ "loss": 0.285,
1746
+ "step": 220
1747
+ },
1748
+ {
1749
+ "epoch": 2.2551020408163267,
1750
+ "grad_norm": 1049.092529296875,
1751
+ "learning_rate": 1.9990866674170984e-05,
1752
+ "loss": 4.5212,
1753
+ "step": 221
1754
+ },
1755
+ {
1756
+ "epoch": 2.2653061224489797,
1757
+ "grad_norm": 532.4985961914062,
1758
+ "learning_rate": 1.99900898355724e-05,
1759
+ "loss": 1.0961,
1760
+ "step": 222
1761
+ },
1762
+ {
1763
+ "epoch": 2.2755102040816326,
1764
+ "grad_norm": 13.340104103088379,
1765
+ "learning_rate": 1.9989281310649516e-05,
1766
+ "loss": 0.0426,
1767
+ "step": 223
1768
+ },
1769
+ {
1770
+ "epoch": 2.2857142857142856,
1771
+ "grad_norm": 894.0902099609375,
1772
+ "learning_rate": 1.9988441101966807e-05,
1773
+ "loss": 6.8518,
1774
+ "step": 224
1775
+ },
1776
+ {
1777
+ "epoch": 2.295918367346939,
1778
+ "grad_norm": 1.1868412494659424,
1779
+ "learning_rate": 1.9987569212189224e-05,
1780
+ "loss": 0.0034,
1781
+ "step": 225
1782
+ },
1783
+ {
1784
+ "epoch": 2.306122448979592,
1785
+ "grad_norm": 13.72503662109375,
1786
+ "learning_rate": 1.9986665644082204e-05,
1787
+ "loss": 0.0195,
1788
+ "step": 226
1789
+ },
1790
+ {
1791
+ "epoch": 2.316326530612245,
1792
+ "grad_norm": 14.843038558959961,
1793
+ "learning_rate": 1.9985730400511658e-05,
1794
+ "loss": 0.0502,
1795
+ "step": 227
1796
+ },
1797
+ {
1798
+ "epoch": 2.326530612244898,
1799
+ "grad_norm": 226.0292205810547,
1800
+ "learning_rate": 1.998476348444397e-05,
1801
+ "loss": 0.4465,
1802
+ "step": 228
1803
+ },
1804
+ {
1805
+ "epoch": 2.336734693877551,
1806
+ "grad_norm": 0.6770716309547424,
1807
+ "learning_rate": 1.998376489894599e-05,
1808
+ "loss": 0.0024,
1809
+ "step": 229
1810
+ },
1811
+ {
1812
+ "epoch": 2.3469387755102042,
1813
+ "grad_norm": 101.99034881591797,
1814
+ "learning_rate": 1.9982734647184997e-05,
1815
+ "loss": 0.4306,
1816
+ "step": 230
1817
+ },
1818
+ {
1819
+ "epoch": 2.357142857142857,
1820
+ "grad_norm": 402.2799377441406,
1821
+ "learning_rate": 1.998167273242872e-05,
1822
+ "loss": 1.4035,
1823
+ "step": 231
1824
+ },
1825
+ {
1826
+ "epoch": 2.36734693877551,
1827
+ "grad_norm": 1080.5897216796875,
1828
+ "learning_rate": 1.9980579158045322e-05,
1829
+ "loss": 10.1881,
1830
+ "step": 232
1831
+ },
1832
+ {
1833
+ "epoch": 2.377551020408163,
1834
+ "grad_norm": 701.9442138671875,
1835
+ "learning_rate": 1.9979453927503366e-05,
1836
+ "loss": 3.6306,
1837
+ "step": 233
1838
+ },
1839
+ {
1840
+ "epoch": 2.387755102040816,
1841
+ "grad_norm": 526.434326171875,
1842
+ "learning_rate": 1.9978297044371834e-05,
1843
+ "loss": 1.3337,
1844
+ "step": 234
1845
+ },
1846
+ {
1847
+ "epoch": 2.3979591836734695,
1848
+ "grad_norm": 111.15966033935547,
1849
+ "learning_rate": 1.9977108512320103e-05,
1850
+ "loss": 0.6753,
1851
+ "step": 235
1852
+ },
1853
+ {
1854
+ "epoch": 2.4081632653061225,
1855
+ "grad_norm": 138.61196899414062,
1856
+ "learning_rate": 1.9975888335117927e-05,
1857
+ "loss": 0.6526,
1858
+ "step": 236
1859
+ },
1860
+ {
1861
+ "epoch": 2.4183673469387754,
1862
+ "grad_norm": 421.8229675292969,
1863
+ "learning_rate": 1.9974636516635436e-05,
1864
+ "loss": 2.3458,
1865
+ "step": 237
1866
+ },
1867
+ {
1868
+ "epoch": 2.4285714285714284,
1869
+ "grad_norm": 70.16128540039062,
1870
+ "learning_rate": 1.9973353060843118e-05,
1871
+ "loss": 0.2163,
1872
+ "step": 238
1873
+ },
1874
+ {
1875
+ "epoch": 2.438775510204082,
1876
+ "grad_norm": 750.59619140625,
1877
+ "learning_rate": 1.9972037971811802e-05,
1878
+ "loss": 10.2189,
1879
+ "step": 239
1880
+ },
1881
+ {
1882
+ "epoch": 2.4489795918367347,
1883
+ "grad_norm": 73.94668579101562,
1884
+ "learning_rate": 1.9970691253712663e-05,
1885
+ "loss": 0.3347,
1886
+ "step": 240
1887
+ },
1888
+ {
1889
+ "epoch": 2.4591836734693877,
1890
+ "grad_norm": 431.5457763671875,
1891
+ "learning_rate": 1.9969312910817183e-05,
1892
+ "loss": 2.5343,
1893
+ "step": 241
1894
+ },
1895
+ {
1896
+ "epoch": 2.4693877551020407,
1897
+ "grad_norm": 2.1232171058654785,
1898
+ "learning_rate": 1.9967902947497158e-05,
1899
+ "loss": 0.0063,
1900
+ "step": 242
1901
+ },
1902
+ {
1903
+ "epoch": 2.479591836734694,
1904
+ "grad_norm": 0.7545721530914307,
1905
+ "learning_rate": 1.9966461368224676e-05,
1906
+ "loss": 0.0025,
1907
+ "step": 243
1908
+ },
1909
+ {
1910
+ "epoch": 2.489795918367347,
1911
+ "grad_norm": 26.93790626525879,
1912
+ "learning_rate": 1.9964988177572106e-05,
1913
+ "loss": 0.1384,
1914
+ "step": 244
1915
+ },
1916
+ {
1917
+ "epoch": 2.5,
1918
+ "grad_norm": 2.8282601833343506,
1919
+ "learning_rate": 1.996348338021207e-05,
1920
+ "loss": 0.0052,
1921
+ "step": 245
1922
+ },
1923
+ {
1924
+ "epoch": 2.510204081632653,
1925
+ "grad_norm": 584.0491333007812,
1926
+ "learning_rate": 1.9961946980917457e-05,
1927
+ "loss": 12.8801,
1928
+ "step": 246
1929
+ },
1930
+ {
1931
+ "epoch": 2.520408163265306,
1932
+ "grad_norm": 720.6881103515625,
1933
+ "learning_rate": 1.9960378984561377e-05,
1934
+ "loss": 8.5862,
1935
+ "step": 247
1936
+ },
1937
+ {
1938
+ "epoch": 2.5306122448979593,
1939
+ "grad_norm": 834.9110717773438,
1940
+ "learning_rate": 1.9958779396117162e-05,
1941
+ "loss": 7.4971,
1942
+ "step": 248
1943
+ },
1944
+ {
1945
+ "epoch": 2.5408163265306123,
1946
+ "grad_norm": 217.15977478027344,
1947
+ "learning_rate": 1.9957148220658348e-05,
1948
+ "loss": 0.9741,
1949
+ "step": 249
1950
+ },
1951
+ {
1952
+ "epoch": 2.5510204081632653,
1953
+ "grad_norm": 486.4735107421875,
1954
+ "learning_rate": 1.9955485463358655e-05,
1955
+ "loss": 4.6348,
1956
+ "step": 250
1957
+ },
1958
+ {
1959
+ "epoch": 2.561224489795918,
1960
+ "grad_norm": 8.122939109802246,
1961
+ "learning_rate": 1.9953791129491985e-05,
1962
+ "loss": 0.0336,
1963
+ "step": 251
1964
+ },
1965
+ {
1966
+ "epoch": 2.571428571428571,
1967
+ "grad_norm": 110.34471893310547,
1968
+ "learning_rate": 1.9952065224432376e-05,
1969
+ "loss": 0.5127,
1970
+ "step": 252
1971
+ },
1972
+ {
1973
+ "epoch": 2.5816326530612246,
1974
+ "grad_norm": 465.07745361328125,
1975
+ "learning_rate": 1.9950307753654016e-05,
1976
+ "loss": 4.2685,
1977
+ "step": 253
1978
+ },
1979
+ {
1980
+ "epoch": 2.5918367346938775,
1981
+ "grad_norm": 177.638671875,
1982
+ "learning_rate": 1.9948518722731208e-05,
1983
+ "loss": 1.1622,
1984
+ "step": 254
1985
+ },
1986
+ {
1987
+ "epoch": 2.6020408163265305,
1988
+ "grad_norm": 1.3182055950164795,
1989
+ "learning_rate": 1.9946698137338357e-05,
1990
+ "loss": 0.0067,
1991
+ "step": 255
1992
+ },
1993
+ {
1994
+ "epoch": 2.612244897959184,
1995
+ "grad_norm": 158.78639221191406,
1996
+ "learning_rate": 1.994484600324995e-05,
1997
+ "loss": 0.443,
1998
+ "step": 256
1999
+ },
2000
+ {
2001
+ "epoch": 2.622448979591837,
2002
+ "grad_norm": 1005.9036865234375,
2003
+ "learning_rate": 1.994296232634054e-05,
2004
+ "loss": 15.6073,
2005
+ "step": 257
2006
+ },
2007
+ {
2008
+ "epoch": 2.63265306122449,
2009
+ "grad_norm": 3.7752788066864014,
2010
+ "learning_rate": 1.994104711258473e-05,
2011
+ "loss": 0.0127,
2012
+ "step": 258
2013
+ },
2014
+ {
2015
+ "epoch": 2.642857142857143,
2016
+ "grad_norm": 48.576602935791016,
2017
+ "learning_rate": 1.9939100368057144e-05,
2018
+ "loss": 0.1056,
2019
+ "step": 259
2020
+ },
2021
+ {
2022
+ "epoch": 2.6530612244897958,
2023
+ "grad_norm": 340.2049560546875,
2024
+ "learning_rate": 1.9937122098932428e-05,
2025
+ "loss": 1.0591,
2026
+ "step": 260
2027
+ },
2028
+ {
2029
+ "epoch": 2.663265306122449,
2030
+ "grad_norm": 472.6616516113281,
2031
+ "learning_rate": 1.99351123114852e-05,
2032
+ "loss": 2.0244,
2033
+ "step": 261
2034
+ },
2035
+ {
2036
+ "epoch": 2.673469387755102,
2037
+ "grad_norm": 1.43545663356781,
2038
+ "learning_rate": 1.993307101209006e-05,
2039
+ "loss": 0.0047,
2040
+ "step": 262
2041
+ },
2042
+ {
2043
+ "epoch": 2.683673469387755,
2044
+ "grad_norm": 23.00522804260254,
2045
+ "learning_rate": 1.993099820722155e-05,
2046
+ "loss": 0.0402,
2047
+ "step": 263
2048
+ },
2049
+ {
2050
+ "epoch": 2.693877551020408,
2051
+ "grad_norm": 144.068115234375,
2052
+ "learning_rate": 1.992889390345414e-05,
2053
+ "loss": 2.0309,
2054
+ "step": 264
2055
+ },
2056
+ {
2057
+ "epoch": 2.704081632653061,
2058
+ "grad_norm": 26.34888458251953,
2059
+ "learning_rate": 1.9926758107462208e-05,
2060
+ "loss": 0.0599,
2061
+ "step": 265
2062
+ },
2063
+ {
2064
+ "epoch": 2.7142857142857144,
2065
+ "grad_norm": 1333.631591796875,
2066
+ "learning_rate": 1.9924590826020027e-05,
2067
+ "loss": 6.106,
2068
+ "step": 266
2069
+ },
2070
+ {
2071
+ "epoch": 2.7244897959183674,
2072
+ "grad_norm": 4.13116455078125,
2073
+ "learning_rate": 1.9922392066001724e-05,
2074
+ "loss": 0.007,
2075
+ "step": 267
2076
+ },
2077
+ {
2078
+ "epoch": 2.7346938775510203,
2079
+ "grad_norm": 569.2820434570312,
2080
+ "learning_rate": 1.992016183438127e-05,
2081
+ "loss": 4.5277,
2082
+ "step": 268
2083
+ },
2084
+ {
2085
+ "epoch": 2.7448979591836737,
2086
+ "grad_norm": 9.351508140563965,
2087
+ "learning_rate": 1.991790013823246e-05,
2088
+ "loss": 0.0202,
2089
+ "step": 269
2090
+ },
2091
+ {
2092
+ "epoch": 2.7551020408163263,
2093
+ "grad_norm": 0.10505271703004837,
2094
+ "learning_rate": 1.9915606984728896e-05,
2095
+ "loss": 0.0004,
2096
+ "step": 270
2097
+ },
2098
+ {
2099
+ "epoch": 2.7653061224489797,
2100
+ "grad_norm": 16.56380271911621,
2101
+ "learning_rate": 1.9913282381143934e-05,
2102
+ "loss": 0.052,
2103
+ "step": 271
2104
+ },
2105
+ {
2106
+ "epoch": 2.7755102040816326,
2107
+ "grad_norm": 15.175297737121582,
2108
+ "learning_rate": 1.99109263348507e-05,
2109
+ "loss": 0.0429,
2110
+ "step": 272
2111
+ },
2112
+ {
2113
+ "epoch": 2.7857142857142856,
2114
+ "grad_norm": 27.31505012512207,
2115
+ "learning_rate": 1.9908538853322046e-05,
2116
+ "loss": 0.0423,
2117
+ "step": 273
2118
+ },
2119
+ {
2120
+ "epoch": 2.795918367346939,
2121
+ "grad_norm": 87.07357788085938,
2122
+ "learning_rate": 1.9906119944130527e-05,
2123
+ "loss": 0.2729,
2124
+ "step": 274
2125
+ },
2126
+ {
2127
+ "epoch": 2.806122448979592,
2128
+ "grad_norm": 1.1642284393310547,
2129
+ "learning_rate": 1.9903669614948382e-05,
2130
+ "loss": 0.0025,
2131
+ "step": 275
2132
+ },
2133
+ {
2134
+ "epoch": 2.816326530612245,
2135
+ "grad_norm": 12.991185188293457,
2136
+ "learning_rate": 1.9901187873547504e-05,
2137
+ "loss": 0.0278,
2138
+ "step": 276
2139
+ },
2140
+ {
2141
+ "epoch": 2.826530612244898,
2142
+ "grad_norm": 86.49649810791016,
2143
+ "learning_rate": 1.9898674727799418e-05,
2144
+ "loss": 0.4171,
2145
+ "step": 277
2146
+ },
2147
+ {
2148
+ "epoch": 2.836734693877551,
2149
+ "grad_norm": 14.625425338745117,
2150
+ "learning_rate": 1.9896130185675263e-05,
2151
+ "loss": 0.0553,
2152
+ "step": 278
2153
+ },
2154
+ {
2155
+ "epoch": 2.8469387755102042,
2156
+ "grad_norm": 339.1553649902344,
2157
+ "learning_rate": 1.9893554255245748e-05,
2158
+ "loss": 3.4933,
2159
+ "step": 279
2160
+ },
2161
+ {
2162
+ "epoch": 2.857142857142857,
2163
+ "grad_norm": 9.26375961303711,
2164
+ "learning_rate": 1.9890946944681157e-05,
2165
+ "loss": 0.0454,
2166
+ "step": 280
2167
+ },
2168
+ {
2169
+ "epoch": 2.86734693877551,
2170
+ "grad_norm": 66.49364471435547,
2171
+ "learning_rate": 1.9888308262251286e-05,
2172
+ "loss": 0.2936,
2173
+ "step": 281
2174
+ },
2175
+ {
2176
+ "epoch": 2.877551020408163,
2177
+ "grad_norm": 0.08851417154073715,
2178
+ "learning_rate": 1.988563821632545e-05,
2179
+ "loss": 0.0003,
2180
+ "step": 282
2181
+ },
2182
+ {
2183
+ "epoch": 2.887755102040816,
2184
+ "grad_norm": 694.4219360351562,
2185
+ "learning_rate": 1.9882936815372432e-05,
2186
+ "loss": 5.218,
2187
+ "step": 283
2188
+ },
2189
+ {
2190
+ "epoch": 2.8979591836734695,
2191
+ "grad_norm": 932.3369750976562,
2192
+ "learning_rate": 1.9880204067960473e-05,
2193
+ "loss": 12.6645,
2194
+ "step": 284
2195
+ },
2196
+ {
2197
+ "epoch": 2.9081632653061225,
2198
+ "grad_norm": 99.3906021118164,
2199
+ "learning_rate": 1.9877439982757228e-05,
2200
+ "loss": 0.6918,
2201
+ "step": 285
2202
+ },
2203
+ {
2204
+ "epoch": 2.9183673469387754,
2205
+ "grad_norm": 354.6759338378906,
2206
+ "learning_rate": 1.9874644568529763e-05,
2207
+ "loss": 1.913,
2208
+ "step": 286
2209
+ },
2210
+ {
2211
+ "epoch": 2.928571428571429,
2212
+ "grad_norm": 54.5423698425293,
2213
+ "learning_rate": 1.9871817834144506e-05,
2214
+ "loss": 0.2328,
2215
+ "step": 287
2216
+ },
2217
+ {
2218
+ "epoch": 2.938775510204082,
2219
+ "grad_norm": 13.632817268371582,
2220
+ "learning_rate": 1.9868959788567213e-05,
2221
+ "loss": 0.0461,
2222
+ "step": 288
2223
+ },
2224
+ {
2225
+ "epoch": 2.9489795918367347,
2226
+ "grad_norm": 0.13646447658538818,
2227
+ "learning_rate": 1.9866070440862977e-05,
2228
+ "loss": 0.0004,
2229
+ "step": 289
2230
+ },
2231
+ {
2232
+ "epoch": 2.9591836734693877,
2233
+ "grad_norm": 0.027559075504541397,
2234
+ "learning_rate": 1.9863149800196152e-05,
2235
+ "loss": 0.0001,
2236
+ "step": 290
2237
+ },
2238
+ {
2239
+ "epoch": 2.9693877551020407,
2240
+ "grad_norm": 3.3276469707489014,
2241
+ "learning_rate": 1.9860197875830355e-05,
2242
+ "loss": 0.0137,
2243
+ "step": 291
2244
+ },
2245
+ {
2246
+ "epoch": 2.979591836734694,
2247
+ "grad_norm": 29.392108917236328,
2248
+ "learning_rate": 1.9857214677128436e-05,
2249
+ "loss": 0.078,
2250
+ "step": 292
2251
+ },
2252
+ {
2253
+ "epoch": 2.989795918367347,
2254
+ "grad_norm": 156.80345153808594,
2255
+ "learning_rate": 1.9854200213552426e-05,
2256
+ "loss": 0.3899,
2257
+ "step": 293
2258
+ },
2259
+ {
2260
+ "epoch": 3.0,
2261
+ "grad_norm": 545.2831420898438,
2262
+ "learning_rate": 1.985115449466353e-05,
2263
+ "loss": 2.8353,
2264
+ "step": 294
2265
+ },
2266
+ {
2267
+ "epoch": 3.0,
2268
+ "eval_dim_1024_cosine_accuracy@1": 0.30985915492957744,
2269
+ "eval_dim_1024_cosine_accuracy@10": 0.37836107554417414,
2270
+ "eval_dim_1024_cosine_accuracy@3": 0.31498079385403327,
2271
+ "eval_dim_1024_cosine_accuracy@5": 0.34699103713188223,
2272
+ "eval_dim_1024_cosine_map@100": 0.3968421394024028,
2273
+ "eval_dim_1024_cosine_mrr@10": 0.32174181452350414,
2274
+ "eval_dim_1024_cosine_ndcg@10": 0.33364818903542787,
2275
+ "eval_dim_1024_cosine_precision@1": 0.30985915492957744,
2276
+ "eval_dim_1024_cosine_precision@10": 0.2694622279129321,
2277
+ "eval_dim_1024_cosine_precision@3": 0.30900554844216815,
2278
+ "eval_dim_1024_cosine_precision@5": 0.30115236875800255,
2279
+ "eval_dim_1024_cosine_recall@1": 0.03936027574360421,
2280
+ "eval_dim_1024_cosine_recall@10": 0.2548212686119806,
2281
+ "eval_dim_1024_cosine_recall@3": 0.11544349976954149,
2282
+ "eval_dim_1024_cosine_recall@5": 0.17456487753074904,
2283
+ "eval_dim_128_cosine_accuracy@1": 0.2765685019206146,
2284
+ "eval_dim_128_cosine_accuracy@10": 0.3501920614596671,
2285
+ "eval_dim_128_cosine_accuracy@3": 0.2887323943661972,
2286
+ "eval_dim_128_cosine_accuracy@5": 0.31882202304737517,
2287
+ "eval_dim_128_cosine_map@100": 0.36208318391000843,
2288
+ "eval_dim_128_cosine_mrr@10": 0.29051937889966023,
2289
+ "eval_dim_128_cosine_ndcg@10": 0.3054184027921396,
2290
+ "eval_dim_128_cosine_precision@1": 0.2765685019206146,
2291
+ "eval_dim_128_cosine_precision@10": 0.24916773367477596,
2292
+ "eval_dim_128_cosine_precision@3": 0.2787025181391378,
2293
+ "eval_dim_128_cosine_precision@5": 0.27413572343149806,
2294
+ "eval_dim_128_cosine_recall@1": 0.03462627857091171,
2295
+ "eval_dim_128_cosine_recall@10": 0.23219859983003413,
2296
+ "eval_dim_128_cosine_recall@3": 0.10222485929387912,
2297
+ "eval_dim_128_cosine_recall@5": 0.15567435868523452,
2298
+ "eval_dim_256_cosine_accuracy@1": 0.29577464788732394,
2299
+ "eval_dim_256_cosine_accuracy@10": 0.3585147247119078,
2300
+ "eval_dim_256_cosine_accuracy@3": 0.3047375160051216,
2301
+ "eval_dim_256_cosine_accuracy@5": 0.33098591549295775,
2302
+ "eval_dim_256_cosine_map@100": 0.377358622211706,
2303
+ "eval_dim_256_cosine_mrr@10": 0.3073963985935813,
2304
+ "eval_dim_256_cosine_ndcg@10": 0.31951819898251643,
2305
+ "eval_dim_256_cosine_precision@1": 0.29577464788732394,
2306
+ "eval_dim_256_cosine_precision@10": 0.25845070422535216,
2307
+ "eval_dim_256_cosine_precision@3": 0.2968416559965856,
2308
+ "eval_dim_256_cosine_precision@5": 0.2898847631241997,
2309
+ "eval_dim_256_cosine_recall@1": 0.03692836080135826,
2310
+ "eval_dim_256_cosine_recall@10": 0.24162273030445708,
2311
+ "eval_dim_256_cosine_recall@3": 0.1089192018057998,
2312
+ "eval_dim_256_cosine_recall@5": 0.16530160845995479,
2313
+ "eval_dim_512_cosine_accuracy@1": 0.3047375160051216,
2314
+ "eval_dim_512_cosine_accuracy@10": 0.3719590268886043,
2315
+ "eval_dim_512_cosine_accuracy@3": 0.31049935979513443,
2316
+ "eval_dim_512_cosine_accuracy@5": 0.34507042253521125,
2317
+ "eval_dim_512_cosine_map@100": 0.39281877553256617,
2318
+ "eval_dim_512_cosine_mrr@10": 0.31690623539215046,
2319
+ "eval_dim_512_cosine_ndcg@10": 0.3300149893720946,
2320
+ "eval_dim_512_cosine_precision@1": 0.3047375160051216,
2321
+ "eval_dim_512_cosine_precision@10": 0.2661331626120359,
2322
+ "eval_dim_512_cosine_precision@3": 0.30431071276141697,
2323
+ "eval_dim_512_cosine_precision@5": 0.29756722151088344,
2324
+ "eval_dim_512_cosine_recall@1": 0.03902184942619328,
2325
+ "eval_dim_512_cosine_recall@10": 0.2526764166009778,
2326
+ "eval_dim_512_cosine_recall@3": 0.11440062517351587,
2327
+ "eval_dim_512_cosine_recall@5": 0.17317031567103489,
2328
+ "eval_dim_64_cosine_accuracy@1": 0.25480153649167736,
2329
+ "eval_dim_64_cosine_accuracy@10": 0.323303457106274,
2330
+ "eval_dim_64_cosine_accuracy@3": 0.2605633802816901,
2331
+ "eval_dim_64_cosine_accuracy@5": 0.2906530089628681,
2332
+ "eval_dim_64_cosine_map@100": 0.3332689262079475,
2333
+ "eval_dim_64_cosine_mrr@10": 0.266841960856045,
2334
+ "eval_dim_64_cosine_ndcg@10": 0.28022682237950125,
2335
+ "eval_dim_64_cosine_precision@1": 0.25480153649167736,
2336
+ "eval_dim_64_cosine_precision@10": 0.23079385403329064,
2337
+ "eval_dim_64_cosine_precision@3": 0.25480153649167736,
2338
+ "eval_dim_64_cosine_precision@5": 0.25006402048655574,
2339
+ "eval_dim_64_cosine_recall@1": 0.031011767980561305,
2340
+ "eval_dim_64_cosine_recall@10": 0.21011380307216662,
2341
+ "eval_dim_64_cosine_recall@3": 0.09100224310580617,
2342
+ "eval_dim_64_cosine_recall@5": 0.13823759538062028,
2343
+ "eval_dim_768_cosine_accuracy@1": 0.3072983354673495,
2344
+ "eval_dim_768_cosine_accuracy@10": 0.37451984635083224,
2345
+ "eval_dim_768_cosine_accuracy@3": 0.31049935979513443,
2346
+ "eval_dim_768_cosine_accuracy@5": 0.3444302176696543,
2347
+ "eval_dim_768_cosine_map@100": 0.3944113472988561,
2348
+ "eval_dim_768_cosine_mrr@10": 0.3188075422230347,
2349
+ "eval_dim_768_cosine_ndcg@10": 0.3310954692046881,
2350
+ "eval_dim_768_cosine_precision@1": 0.3072983354673495,
2351
+ "eval_dim_768_cosine_precision@10": 0.26677336747759284,
2352
+ "eval_dim_768_cosine_precision@3": 0.3060179257362356,
2353
+ "eval_dim_768_cosine_precision@5": 0.29795134443021765,
2354
+ "eval_dim_768_cosine_recall@1": 0.03940235994624546,
2355
+ "eval_dim_768_cosine_recall@10": 0.2544826642178083,
2356
+ "eval_dim_768_cosine_recall@3": 0.11527075559959522,
2357
+ "eval_dim_768_cosine_recall@5": 0.17393586357387436,
2358
+ "eval_runtime": 99.0959,
2359
+ "eval_samples_per_second": 0.0,
2360
+ "eval_sequential_score": 0.28022682237950125,
2361
+ "eval_steps_per_second": 0.0,
2362
+ "step": 294
2363
+ }
2364
+ ],
2365
+ "logging_steps": 1,
2366
+ "max_steps": 1960,
2367
+ "num_input_tokens_seen": 0,
2368
+ "num_train_epochs": 20,
2369
+ "save_steps": 500,
2370
+ "stateful_callbacks": {
2371
+ "EarlyStoppingCallback": {
2372
+ "args": {
2373
+ "early_stopping_patience": 2,
2374
+ "early_stopping_threshold": 0.0
2375
+ },
2376
+ "attributes": {
2377
+ "early_stopping_patience_counter": 2
2378
+ }
2379
+ },
2380
+ "TrainerControl": {
2381
+ "args": {
2382
+ "should_epoch_stop": false,
2383
+ "should_evaluate": false,
2384
+ "should_log": false,
2385
+ "should_save": true,
2386
+ "should_training_stop": true
2387
+ },
2388
+ "attributes": {}
2389
+ }
2390
+ },
2391
+ "total_flos": 0.0,
2392
+ "train_batch_size": 2,
2393
+ "trial_name": null,
2394
+ "trial_params": null
2395
+ }
checkpoint-294/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:739b4a0a62fdf782034d2ababe5e5ea588023ed6263f2604e31385fc77a8faab
3
+ size 6097
checkpoint-98/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 1024,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-98/README.md ADDED
@@ -0,0 +1,1621 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
5
+ tags:
6
+ - sentence-transformers
7
+ - sentence-similarity
8
+ - feature-extraction
9
+ - dense
10
+ - generated_from_trainer
11
+ - dataset_size:391
12
+ - loss:MatryoshkaLoss
13
+ - loss:MultipleNegativesRankingLoss
14
+ base_model: intfloat/multilingual-e5-large
15
+ widget:
16
+ - source_sentence: What does 'personal data breach' entail?
17
+ sentences:
18
+ - '1.Processing of personal data revealing racial or ethnic origin, political opinions,
19
+ religious or philosophical beliefs, or trade union membership, and the processing
20
+ of genetic data, biometric data for the purpose of uniquely identifying a natural
21
+ person, data concerning health or data concerning a natural person''s sex life
22
+ or sexual orientation shall be prohibited.
23
+
24
+ 2.Paragraph 1 shall not apply if one of the following applies: (a) the data subject
25
+ has given explicit consent to the processing of those personal data for one or
26
+ more specified purposes, except where Union or Member State law provide that the
27
+ prohibition referred to in paragraph 1 may not be lifted by the data subject;
28
+ (b) processing is necessary for the purposes of carrying out the obligations
29
+ and exercising specific rights of the controller or of the data subject in the
30
+ field of employment and social security and social protection law in so far as
31
+ it is authorised by Union or Member State law or a collective agreement pursuant
32
+ to Member State law providing for appropriate safeguards for the fundamental rights
33
+ and the interests of the data subject; (c) processing is necessary to protect
34
+ the vital interests of the data subject or of another natural person where the
35
+ data subject is physically or legally incapable of giving consent; (d) processing
36
+ is carried out in the course of its legitimate activities with appropriate safeguards
37
+ by a foundation, association or any other not-for-profit body with a political,
38
+ philosophical, religious or trade union aim and on condition that the processing
39
+ relates solely to the members or to former members of the body or to persons who
40
+ have regular contact with it in connection with its purposes and that the personal
41
+ data are not disclosed outside that body without the consent of the data subjects;
42
+ (e) processing relates to personal data which are manifestly made public by the
43
+ data subject; (f) processing is necessary for the establishment, exercise or
44
+ defence of legal claims or whenever courts are acting in their judicial capacity;
45
+ (g) processing is necessary for reasons of substantial public interest, on the
46
+ basis of Union or Member State law which shall be proportionate to the aim pursued,
47
+ respect the essence of the right to data protection and provide for suitable and
48
+ specific measures to safeguard the fundamental rights and the interests of the
49
+ data subject; (h) processing is necessary for the purposes of preventive or occupational
50
+ medicine, for the assessment of the working capacity of the employee, medical
51
+ diagnosis, the provision of health or social care or treatment or the management
52
+ of health or social care systems and services on the basis of Union or Member
53
+ State law or pursuant to contract with a health professional and subject to the
54
+ conditions and safeguards referred to in paragraph 3; (i) processing is necessary
55
+ for reasons of public interest in the area of public health, such as protecting
56
+ against serious cross-border threats to health or ensuring high standards of quality
57
+ and safety of health care and of medicinal products or medical devices, on the
58
+ basis of Union or Member State law which provides for suitable and specific measures
59
+ to safeguard the rights and freedoms of the data subject, in particular professional
60
+ secrecy; 4.5.2016 L 119/38 (j) processing is necessary for archiving purposes
61
+ in the public interest, scientific or historical research purposes or statistical
62
+ purposes in accordance with Article 89(1) based on Union or Member State law which
63
+ shall be proportionate to the aim pursued, respect the essence of the right to
64
+ data protection and provide for suitable and specific measures to safeguard the
65
+ fundamental rights and the interests of the data subject.
66
+
67
+ 3.Personal data referred to in paragraph 1 may be processed for the purposes referred
68
+ to in point (h) of paragraph 2 when those data are processed by or under the responsibility
69
+ of a professional subject to the obligation of professional secrecy under Union
70
+ or Member State law or rules established by national competent bodies or by another
71
+ person also subject to an obligation of secrecy under Union or Member State law
72
+ or rules established by national competent bodies.
73
+
74
+ 4.Member States may maintain or introduce further conditions, including limitations,
75
+ with regard to the processing of genetic data, biometric data or data concerning
76
+ health.'
77
+ - '1) ''personal data'' means any information relating to an identified or identifiable
78
+ natural person (''data subject''); an identifiable natural person is one who can
79
+ be identified, directly or indirectly, in particular by reference to an identifier
80
+ such as a name, an identification number, location data, an online identifier
81
+ or to one or more factors specific to the physical, physiological, genetic, mental,
82
+ economic, cultural or social identity of that natural person;
83
+
84
+ (2) ‘processing’ means any operation or set of operations which is performed on
85
+ personal data or on sets of personal data, whether or not by automated means,
86
+ such as collection, recording, organisation, structuring, storage, adaptation
87
+ or alteration, retrieval, consultation, use, disclosure by transmission, dissemination
88
+ or otherwise making available, alignment or combination, restriction, erasure
89
+ or destruction;
90
+
91
+ (3) ‘restriction of processing’ means the marking of stored personal data with
92
+ the aim of limiting their processing in the future;
93
+
94
+ (4) ‘profiling’ means any form of automated processing of personal data consisting
95
+ of the use of personal data to evaluate certain personal aspects relating to a
96
+ natural person, in particular to analyse or predict aspects concerning that natural
97
+ person''s performance at work, economic situation, health, personal preferences,
98
+ interests, reliability, behaviour, location or movements;
99
+
100
+ (5) ‘pseudonymisation’ means the processing of personal data in such a manner
101
+ that the personal data can no longer be attributed to a specific data subject
102
+ without the use of additional information, provided that such additional information
103
+ is kept separately and is subject to technical and organisational measures to
104
+ ensure that the personal data are not attributed to an identified or identifiable
105
+ natural person;
106
+
107
+ (6) ‘filing system’ means any structured set of personal data which are accessible
108
+ according to specific criteria, whether centralised, decentralised or dispersed
109
+ on a functional or geographical basis;
110
+
111
+ (7) ‘controller’ means the natural or legal person, public authority, agency or
112
+ other body which, alone or jointly with others, determines the purposes and means
113
+ of the processing of personal data; where the purposes and means of such processing
114
+ are determined by Union or Member State law, the controller or the specific criteria
115
+ for its nomination may be provided for by Union or Member State law;
116
+
117
+ (8) ‘processor’ means a natural or legal person, public authority, agency or other
118
+ body which processes personal data on behalf of the controller;
119
+
120
+ (9) ‘recipient’ means a natural or legal person, public authority, agency or another
121
+ body, to which the personal data are disclosed, whether a third party or not.
122
+ However, public authorities which may receive personal data in the framework of
123
+ a particular inquiry in accordance with Union or Member State law shall not be
124
+ regarded as recipients; the processing of those data by those public authorities
125
+ shall be in compliance with the applicable data protection rules according to
126
+ the purposes of the processing;
127
+
128
+ (10) ‘third party’ means a natural or legal person, public authority, agency or
129
+ body other than the data subject, controller, processor and persons who, under
130
+ the direct authority of the controller or processor, are authorised to process
131
+ personal data;
132
+
133
+ (11) ‘consent’ of the data subject means any freely given, specific, informed
134
+ and unambiguous indication of the data subject''s wishes by which he or she, by
135
+ a statement or by a clear affirmative action, signifies agreement to the processing
136
+ of personal data relating to him or her;
137
+
138
+ (12) ‘personal data breach’ means a breach of security leading to the accidental
139
+ or unlawful destruction, loss, alteration, unauthorised disclosure of, or access
140
+ to, personal data transmitted, stored or otherwise processed;
141
+
142
+ (13) ‘genetic data’ means personal data relating to the inherited or acquired
143
+ genetic characteristics of a natural person which give unique information about
144
+ the physiology or the health of that natural person and which result, in particular,
145
+ from an analysis of a biological sample from the natural person in question;
146
+
147
+ (14) ‘biometric data’ means personal data resulting from specific technical processing
148
+ relating to the physical, physiological or behavioural characteristics of a natural
149
+ person, which allow or confirm the unique identification of that natural person,
150
+ such as facial images or dactyloscopic data;
151
+
152
+ (15) ‘data concerning health’ means personal data related to the physical or mental
153
+ health of a natural person, including the provision of health care services, which
154
+ reveal information about his or her health status;
155
+
156
+ (16) ‘main establishment’ means: (a) as regards a controller with establishments
157
+ in more than one Member State, the place of its central administration in the
158
+ Union, unless the decisions on the purposes and means of the processing of personal
159
+ data are taken in another establishment of the controller in the Union and the
160
+ latter establishment has the power to have such decisions implemented, in which
161
+ case the establishment having taken such decisions is to be considered to be the
162
+ main establishment; (b) as regards a processor with establishments in more than
163
+ one Member State, the place of its central administration in the Union, or, if
164
+ the processor has no central administration in the Union, the establishment of
165
+ the processor in the Union where the main processing activities in the context
166
+ of the activities of an establishment of the processor take place to the extent
167
+ that the processor is subject to specific obligations under this Regulation;
168
+
169
+ (17) ‘representative’ means a natural or legal person established in the Union
170
+ who, designated by the controller or processor in writing pursuant to Article
171
+ 27, represents the controller or processor with regard to their respective obligations
172
+ under this Regulation;
173
+
174
+ (18) ‘enterprise’ means a natural or legal person engaged in an economic activity,
175
+ irrespective of its legal form, including partnerships or associations regularly
176
+ engaged in an economic activity;
177
+
178
+ (19) ‘group of undertakings’ means a controlling undertaking and its controlled
179
+ undertakings;
180
+
181
+ (20) ‘binding corporate rules’ means personal data protection policies which are
182
+ adhered to by a controller or processor established on the territory of a Member
183
+ State for transfers or a set of transfers of personal data to a controller or
184
+ processor in one or more third countries within a group of undertakings, or group
185
+ of enterprises engaged in a joint economic activity;
186
+
187
+ (21) ‘supervisory authority’ means an independent public authority which is established
188
+ by a Member State pursuant to Article 51;
189
+
190
+ (22) ‘supervisory authority concerned’ means a supervisory authority which is
191
+ concerned by the processing of personal data because: (a) the controller or processor
192
+ is established on the territory of the Member State of that supervisory authority;
193
+ (b) data subjects residing in the Member State of that supervisory authority are
194
+ substantially affected or likely to be substantially affected by the processing;
195
+ or (c) a complaint has been lodged with that supervisory authority;
196
+
197
+ (23) ‘cross-border processing’ means either: (a) processing of personal data which
198
+ takes place in the context of the activities of establishments in more than one
199
+ Member State of a controller or processor in the Union where the controller or
200
+ processor is established in more than one Member State; or (b) processing of personal
201
+ data which takes place in the context of the activities of a single establishment
202
+ of a controller or processor in the Union but which substantially affects or is
203
+ likely to substantially affect data subjects in more than one Member State.
204
+
205
+ (24) ‘relevant and reasoned objection’ means an objection to a draft decision
206
+ as to whether there is an infringement of this Regulation, or whether envisaged
207
+ action in relation to the controller or processor complies with this Regulation,
208
+ which clearly demonstrates the significance of the risks posed by the draft decision
209
+ as regards the fundamental rights and freedoms of data subjects and, where applicable,
210
+ the free flow of personal data within the Union;
211
+
212
+ (25) ‘information society service’ means a service as defined in point (b) of
213
+ Article 1(1) of Directive (EU) 2015/1535 of the European Parliament and of the
214
+ Council (1);
215
+
216
+ (26) ‘international organisation’ means an organisation and its subordinate bodies
217
+ governed by public international law, or any other body which is set up by, or
218
+ on the basis of, an agreement between two or more countries.'
219
+ - Any processing of personal data should be lawful and fair. It should be transparent
220
+ to natural persons that personal data concerning them are collected, used, consulted
221
+ or otherwise processed and to what extent the personal data are or will be processed.
222
+ The principle of transparency requires that any information and communication
223
+ relating to the processing of those personal data be easily accessible and easy
224
+ to understand, and that clear and plain language be used. That principle concerns,
225
+ in particular, information to the data subjects on the identity of the controller
226
+ and the purposes of the processing and further information to ensure fair and
227
+ transparent processing in respect of the natural persons concerned and their right
228
+ to obtain confirmation and communication of personal data concerning them which
229
+ are being processed. Natural persons should be made aware of risks, rules, safeguards
230
+ and rights in relation to the processing of personal data and how to exercise
231
+ their rights in relation to such processing. In particular, the specific purposes
232
+ for which personal data are processed should be explicit and legitimate and determined
233
+ at the time of the collection of the personal data. The personal data should be
234
+ adequate, relevant and limited to what is necessary for the purposes for which
235
+ they are processed. This requires, in particular, ensuring that the period for
236
+ which the personal data are stored is limited to a strict minimum. Personal data
237
+ should be processed only if the purpose of the processing could not reasonably
238
+ be fulfilled by other means. In order to ensure that the personal data are not
239
+ kept longer than necessary, time limits should be established by the controller
240
+ for erasure or for a periodic review. Every reasonable step should be taken to
241
+ ensure that personal data which are inaccurate are rectified or deleted. Personal
242
+ data should be processed in a manner that ensures appropriate security and confidentiality
243
+ of the personal data, including for preventing unauthorised access to or use of
244
+ personal data and the equipment used for the processing.
245
+ - source_sentence: In what situations could providing information to the data subject
246
+ be considered impossible or involve a disproportionate effort?
247
+ sentences:
248
+ - '1.The controller shall consult the supervisory authority prior to processing
249
+ where a data protection impact assessment under Article 35 indicates that the
250
+ processing would result in a high risk in the absence of measures taken by the
251
+ controller to mitigate the risk.
252
+
253
+ 2.Where the supervisory authority is of the opinion that the intended processing
254
+ referred to in paragraph 1 would infringe this Regulation, in particular where
255
+ the controller has insufficiently identified or mitigated the risk, the supervisory
256
+ authority shall, within period of up to eight weeks of receipt of the request
257
+ for consultation, provide written advice to the controller and, where applicable
258
+ to the processor, and may use any of its powers referred to in Article 58. That
259
+ period may be extended by six weeks, taking into account the complexity of the
260
+ intended processing. The supervisory authority shall inform the controller and,
261
+ where applicable, the processor, of any such extension within one month of receipt
262
+ of the request for consultation together with the reasons for the delay. Those
263
+ periods may be suspended until the supervisory authority has obtained information
264
+ it has requested for the purposes of the consultation.
265
+
266
+ 3.When consulting the supervisory authority pursuant to paragraph 1, the controller
267
+ shall provide the supervisory authority with: (a) where applicable, the respective
268
+ responsibilities of the controller, joint controllers and processors involved
269
+ in the processing, in particular for processing within a group of undertakings;
270
+ (b) the purposes and means of the intended processing; (c) the measures and
271
+ safeguards provided to protect the rights and freedoms of data subjects pursuant
272
+ to this Regulation; (d) where applicable, the contact details of the data protection
273
+ officer; 4.5.2016 L 119/54 (e) the data protection impact assessment provided
274
+ for in Article 35; and (f) any other information requested by the supervisory
275
+ authority.
276
+
277
+ 4.Member States shall consult the supervisory authority during the preparation
278
+ of a proposal for a legislative measure to be adopted by a national parliament,
279
+ or of a regulatory measure based on such a legislative measure, which relates
280
+ to processing.
281
+
282
+ 5.Notwithstanding paragraph 1, Member State law may require controllers to consult
283
+ with, and obtain prior authorisation from, the supervisory authority in relation
284
+ to processing by a controller for the performance of a task carried out by the
285
+ controller in the public interest, including processing in relation to social
286
+ protection and public health'
287
+ - "1.The Member States, the supervisory authorities, the Board and the Commission\
288
+ \ shall encourage, in particular at Union level, the establishment of data protection\
289
+ \ certification mechanisms and of data protection seals and marks, for the purpose\
290
+ \ of demonstrating compliance with this Regulation of processing operations by\
291
+ \ controllers and processors. The specific needs of micro, small and medium-sized\
292
+ \ enterprises shall be taken into account. 4.5.2016 L 119/58 \n2.In addition\
293
+ \ to adherence by controllers or processors subject to this Regulation, data protection\
294
+ \ certification mechanisms, seals or marks approved pursuant to paragraph 5 of\
295
+ \ this Article may be established for the purpose of demonstrating the existence\
296
+ \ of appropriate safeguards provided by controllers or processors that are not\
297
+ \ subject to this Regulation pursuant to Article 3 within the framework of personal\
298
+ \ data transfers to third countries or international organisations under the terms\
299
+ \ referred to in point (f) of Article 46(2). Such controllers or processors shall\
300
+ \ make binding and enforceable commitments, via contractual or other legally binding\
301
+ \ instruments, to apply those appropriate safeguards, including with regard to\
302
+ \ the rights of data subjects.\n3.The certification shall be voluntary and available\
303
+ \ via a process that is transparent.\n4.A certification pursuant to this Article\
304
+ \ does not reduce the responsibility of the controller or the processor for compliance\
305
+ \ with this Regulation and is without prejudice to the tasks and powers of the\
306
+ \ supervisory authorities which are competent pursuant to Article 55 or 56\n5.A\
307
+ \ certification pursuant to this Article shall be issued by the certification\
308
+ \ bodies referred to in Article 43 or by the competent supervisory authority,\
309
+ \ on the basis of criteria approved by that competent supervisory authority pursuant\
310
+ \ to Article 58(3) or by the Board pursuant to Article 63. Where the criteria\
311
+ \ are approved by the Board, this may result in a common certification, the European\
312
+ \ Data Protection Seal.\n6.The controller or processor which submits its processing\
313
+ \ to the certification mechanism shall provide the certification body referred\
314
+ \ to in Article 43, or where applicable, the competent supervisory authority,\
315
+ \ with all information and access to its processing activities which are necessary\
316
+ \ to conduct the certification procedure.\n7.Certification shall be issued to\
317
+ \ a controller or processor for a maximum period of three years and may be renewed,\
318
+ \ under the same conditions, provided that the relevant requirements continue\
319
+ \ to be met. Certification shall be withdrawn, as applicable, by the certification\
320
+ \ bodies referred to in Article 43 or by the competent supervisory authority where\
321
+ \ the requirements for the certification are not or are no longer met.\n8.The\
322
+ \ Board shall collate all certification mechanisms and data protection seals and\
323
+ \ marks in a register and shall make them publicly available by any appropriate\
324
+ \ means."
325
+ - However, it is not necessary to impose the obligation to provide information where
326
+ the data subject already possesses the information, where the recording or disclosure
327
+ of the personal data is expressly laid down by law or where the provision of information
328
+ to the data subject proves to be impossible or would involve a disproportionate
329
+ effort. The latter could in particular be the case where processing is carried
330
+ out for archiving purposes in the public interest, scientific or historical research
331
+ purposes or statistical purposes. In that regard, the number of data subjects,
332
+ the age of the data and any appropriate safeguards adopted should be taken into
333
+ consideration.
334
+ - source_sentence: What is the data subject provided with prior to further processing
335
+ of personal data?
336
+ sentences:
337
+ - '1.Where personal data relating to a data subject are collected from the data
338
+ subject, the controller shall, at the time when personal data are obtained, provide
339
+ the data subject with all of the following information: (a) the identity and
340
+ the contact details of the controller and, where applicable, of the controller''s
341
+ representative; (b) the contact details of the data protection officer, where
342
+ applicable; (c) the purposes of the processing for which the personal data are
343
+ intended as well as the legal basis for the processing; 4.5.2016 L 119/40 (d) where
344
+ the processing is based on point (f) of Article 6(1), the legitimate interests
345
+ pursued by the controller or by a third party; (e) the recipients or categories
346
+ of recipients of the personal data, if any; (f) where applicable, the fact that
347
+ the controller intends to transfer personal data to a third country or international
348
+ organisation and the existence or absence of an adequacy decision by the Commission,
349
+ or in the case of transfers referred to in Article 46 or 47, or the second subparagraph
350
+ of Article 49(1), reference to the appropriate or suitable safeguards and the
351
+ means by which to obtain a copy of them or where they have been made available.
352
+
353
+ 2.In addition to the information referred to in paragraph 1, the controller shall,
354
+ at the time when personal data are obtained, provide the data subject with the
355
+ following further information necessary to ensure fair and transparent processing:
356
+ (a) the period for which the personal data will be stored, or if that is not
357
+ possible, the criteria used to determine that period; (b) the existence of the
358
+ right to request from the controller access to and rectification or erasure of
359
+ personal data or restriction of processing concerning the data subject or to object
360
+ to processing as well as the right to data portability; (c) where the processing
361
+ is based on point (a) of Article 6(1) or point (a) of Article 9(2), the existence
362
+ of the right to withdraw consent at any time, without affecting the lawfulness
363
+ of processing based on consent before its withdrawal; (d) the right to lodge
364
+ a complaint with a supervisory authority; (e) whether the provision of personal
365
+ data is a statutory or contractual requirement, or a requirement necessary to
366
+ enter into a contract, as well as whether the data subject is obliged to provide
367
+ the personal data and of the possible consequences of failure to provide such
368
+ data; (f) the existence of automated decision-making, including profiling, referred
369
+ to in Article 22(1) and (4) and, at least in those cases, meaningful information
370
+ about the logic involved, as well as the significance and the envisaged consequences
371
+ of such processing for the data subject.
372
+
373
+ 3.Where the controller intends to further process the personal data for a purpose
374
+ other than that for which the personal data were collected, the controller shall
375
+ provide the data subject prior to that further processing with information on
376
+ that other purpose and with any relevant further information as referred to in
377
+ paragraph 2
378
+
379
+ 4.Paragraphs 1, 2 and 3 shall not apply where and insofar as the data subject
380
+ already has the information.'
381
+ - This Regulation respects and does not prejudice the status under existing constitutional
382
+ law of churches and religious associations or communities in the Member States,
383
+ as recognised in Article 17 TFEU.
384
+ - '1) ''personal data'' means any information relating to an identified or identifiable
385
+ natural person (''data subject''); an identifiable natural person is one who can
386
+ be identified, directly or indirectly, in particular by reference to an identifier
387
+ such as a name, an identification number, location data, an online identifier
388
+ or to one or more factors specific to the physical, physiological, genetic, mental,
389
+ economic, cultural or social identity of that natural person;
390
+
391
+ (2) ‘processing’ means any operation or set of operations which is performed on
392
+ personal data or on sets of personal data, whether or not by automated means,
393
+ such as collection, recording, organisation, structuring, storage, adaptation
394
+ or alteration, retrieval, consultation, use, disclosure by transmission, dissemination
395
+ or otherwise making available, alignment or combination, restriction, erasure
396
+ or destruction;
397
+
398
+ (3) ‘restriction of processing’ means the marking of stored personal data with
399
+ the aim of limiting their processing in the future;
400
+
401
+ (4) ‘profiling’ means any form of automated processing of personal data consisting
402
+ of the use of personal data to evaluate certain personal aspects relating to a
403
+ natural person, in particular to analyse or predict aspects concerning that natural
404
+ person''s performance at work, economic situation, health, personal preferences,
405
+ interests, reliability, behaviour, location or movements;
406
+
407
+ (5) ‘pseudonymisation’ means the processing of personal data in such a manner
408
+ that the personal data can no longer be attributed to a specific data subject
409
+ without the use of additional information, provided that such additional information
410
+ is kept separately and is subject to technical and organisational measures to
411
+ ensure that the personal data are not attributed to an identified or identifiable
412
+ natural person;
413
+
414
+ (6) ‘filing system’ means any structured set of personal data which are accessible
415
+ according to specific criteria, whether centralised, decentralised or dispersed
416
+ on a functional or geographical basis;
417
+
418
+ (7) ‘controller’ means the natural or legal person, public authority, agency or
419
+ other body which, alone or jointly with others, determines the purposes and means
420
+ of the processing of personal data; where the purposes and means of such processing
421
+ are determined by Union or Member State law, the controller or the specific criteria
422
+ for its nomination may be provided for by Union or Member State law;
423
+
424
+ (8) ‘processor’ means a natural or legal person, public authority, agency or other
425
+ body which processes personal data on behalf of the controller;
426
+
427
+ (9) ‘recipient’ means a natural or legal person, public authority, agency or another
428
+ body, to which the personal data are disclosed, whether a third party or not.
429
+ However, public authorities which may receive personal data in the framework of
430
+ a particular inquiry in accordance with Union or Member State law shall not be
431
+ regarded as recipients; the processing of those data by those public authorities
432
+ shall be in compliance with the applicable data protection rules according to
433
+ the purposes of the processing;
434
+
435
+ (10) ‘third party’ means a natural or legal person, public authority, agency or
436
+ body other than the data subject, controller, processor and persons who, under
437
+ the direct authority of the controller or processor, are authorised to process
438
+ personal data;
439
+
440
+ (11) ‘consent’ of the data subject means any freely given, specific, informed
441
+ and unambiguous indication of the data subject''s wishes by which he or she, by
442
+ a statement or by a clear affirmative action, signifies agreement to the processing
443
+ of personal data relating to him or her;
444
+
445
+ (12) ‘personal data breach’ means a breach of security leading to the accidental
446
+ or unlawful destruction, loss, alteration, unauthorised disclosure of, or access
447
+ to, personal data transmitted, stored or otherwise processed;
448
+
449
+ (13) ‘genetic data’ means personal data relating to the inherited or acquired
450
+ genetic characteristics of a natural person which give unique information about
451
+ the physiology or the health of that natural person and which result, in particular,
452
+ from an analysis of a biological sample from the natural person in question;
453
+
454
+ (14) ‘biometric data’ means personal data resulting from specific technical processing
455
+ relating to the physical, physiological or behavioural characteristics of a natural
456
+ person, which allow or confirm the unique identification of that natural person,
457
+ such as facial images or dactyloscopic data;
458
+
459
+ (15) ‘data concerning health’ means personal data related to the physical or mental
460
+ health of a natural person, including the provision of health care services, which
461
+ reveal information about his or her health status;
462
+
463
+ (16) ‘main establishment’ means: (a) as regards a controller with establishments
464
+ in more than one Member State, the place of its central administration in the
465
+ Union, unless the decisions on the purposes and means of the processing of personal
466
+ data are taken in another establishment of the controller in the Union and the
467
+ latter establishment has the power to have such decisions implemented, in which
468
+ case the establishment having taken such decisions is to be considered to be the
469
+ main establishment; (b) as regards a processor with establishments in more than
470
+ one Member State, the place of its central administration in the Union, or, if
471
+ the processor has no central administration in the Union, the establishment of
472
+ the processor in the Union where the main processing activities in the context
473
+ of the activities of an establishment of the processor take place to the extent
474
+ that the processor is subject to specific obligations under this Regulation;
475
+
476
+ (17) ‘representative’ means a natural or legal person established in the Union
477
+ who, designated by the controller or processor in writing pursuant to Article
478
+ 27, represents the controller or processor with regard to their respective obligations
479
+ under this Regulation;
480
+
481
+ (18) ‘enterprise’ means a natural or legal person engaged in an economic activity,
482
+ irrespective of its legal form, including partnerships or associations regularly
483
+ engaged in an economic activity;
484
+
485
+ (19) ‘group of undertakings’ means a controlling undertaking and its controlled
486
+ undertakings;
487
+
488
+ (20) ‘binding corporate rules’ means personal data protection policies which are
489
+ adhered to by a controller or processor established on the territory of a Member
490
+ State for transfers or a set of transfers of personal data to a controller or
491
+ processor in one or more third countries within a group of undertakings, or group
492
+ of enterprises engaged in a joint economic activity;
493
+
494
+ (21) ‘supervisory authority’ means an independent public authority which is established
495
+ by a Member State pursuant to Article 51;
496
+
497
+ (22) ‘supervisory authority concerned’ means a supervisory authority which is
498
+ concerned by the processing of personal data because: (a) the controller or processor
499
+ is established on the territory of the Member State of that supervisory authority;
500
+ (b) data subjects residing in the Member State of that supervisory authority are
501
+ substantially affected or likely to be substantially affected by the processing;
502
+ or (c) a complaint has been lodged with that supervisory authority;
503
+
504
+ (23) ‘cross-border processing’ means either: (a) processing of personal data which
505
+ takes place in the context of the activities of establishments in more than one
506
+ Member State of a controller or processor in the Union where the controller or
507
+ processor is established in more than one Member State; or (b) processing of personal
508
+ data which takes place in the context of the activities of a single establishment
509
+ of a controller or processor in the Union but which substantially affects or is
510
+ likely to substantially affect data subjects in more than one Member State.
511
+
512
+ (24) ‘relevant and reasoned objection’ means an objection to a draft decision
513
+ as to whether there is an infringement of this Regulation, or whether envisaged
514
+ action in relation to the controller or processor complies with this Regulation,
515
+ which clearly demonstrates the significance of the risks posed by the draft decision
516
+ as regards the fundamental rights and freedoms of data subjects and, where applicable,
517
+ the free flow of personal data within the Union;
518
+
519
+ (25) ‘information society service’ means a service as defined in point (b) of
520
+ Article 1(1) of Directive (EU) 2015/1535 of the European Parliament and of the
521
+ Council (1);
522
+
523
+ (26) ‘international organisation’ means an organisation and its subordinate bodies
524
+ governed by public international law, or any other body which is set up by, or
525
+ on the basis of, an agreement between two or more countries.'
526
+ - source_sentence: What type of data may be processed for purposes related to point
527
+ (h) of paragraph 2?
528
+ sentences:
529
+ - '1.Processing of personal data revealing racial or ethnic origin, political opinions,
530
+ religious or philosophical beliefs, or trade union membership, and the processing
531
+ of genetic data, biometric data for the purpose of uniquely identifying a natural
532
+ person, data concerning health or data concerning a natural person''s sex life
533
+ or sexual orientation shall be prohibited.
534
+
535
+ 2.Paragraph 1 shall not apply if one of the following applies: (a) the data subject
536
+ has given explicit consent to the processing of those personal data for one or
537
+ more specified purposes, except where Union or Member State law provide that the
538
+ prohibition referred to in paragraph 1 may not be lifted by the data subject;
539
+ (b) processing is necessary for the purposes of carrying out the obligations
540
+ and exercising specific rights of the controller or of the data subject in the
541
+ field of employment and social security and social protection law in so far as
542
+ it is authorised by Union or Member State law or a collective agreement pursuant
543
+ to Member State law providing for appropriate safeguards for the fundamental rights
544
+ and the interests of the data subject; (c) processing is necessary to protect
545
+ the vital interests of the data subject or of another natural person where the
546
+ data subject is physically or legally incapable of giving consent; (d) processing
547
+ is carried out in the course of its legitimate activities with appropriate safeguards
548
+ by a foundation, association or any other not-for-profit body with a political,
549
+ philosophical, religious or trade union aim and on condition that the processing
550
+ relates solely to the members or to former members of the body or to persons who
551
+ have regular contact with it in connection with its purposes and that the personal
552
+ data are not disclosed outside that body without the consent of the data subjects;
553
+ (e) processing relates to personal data which are manifestly made public by the
554
+ data subject; (f) processing is necessary for the establishment, exercise or
555
+ defence of legal claims or whenever courts are acting in their judicial capacity;
556
+ (g) processing is necessary for reasons of substantial public interest, on the
557
+ basis of Union or Member State law which shall be proportionate to the aim pursued,
558
+ respect the essence of the right to data protection and provide for suitable and
559
+ specific measures to safeguard the fundamental rights and the interests of the
560
+ data subject; (h) processing is necessary for the purposes of preventive or occupational
561
+ medicine, for the assessment of the working capacity of the employee, medical
562
+ diagnosis, the provision of health or social care or treatment or the management
563
+ of health or social care systems and services on the basis of Union or Member
564
+ State law or pursuant to contract with a health professional and subject to the
565
+ conditions and safeguards referred to in paragraph 3; (i) processing is necessary
566
+ for reasons of public interest in the area of public health, such as protecting
567
+ against serious cross-border threats to health or ensuring high standards of quality
568
+ and safety of health care and of medicinal products or medical devices, on the
569
+ basis of Union or Member State law which provides for suitable and specific measures
570
+ to safeguard the rights and freedoms of the data subject, in particular professional
571
+ secrecy; 4.5.2016 L 119/38 (j) processing is necessary for archiving purposes
572
+ in the public interest, scientific or historical research purposes or statistical
573
+ purposes in accordance with Article 89(1) based on Union or Member State law which
574
+ shall be proportionate to the aim pursued, respect the essence of the right to
575
+ data protection and provide for suitable and specific measures to safeguard the
576
+ fundamental rights and the interests of the data subject.
577
+
578
+ 3.Personal data referred to in paragraph 1 may be processed for the purposes referred
579
+ to in point (h) of paragraph 2 when those data are processed by or under the responsibility
580
+ of a professional subject to the obligation of professional secrecy under Union
581
+ or Member State law or rules established by national competent bodies or by another
582
+ person also subject to an obligation of secrecy under Union or Member State law
583
+ or rules established by national competent bodies.
584
+
585
+ 4.Member States may maintain or introduce further conditions, including limitations,
586
+ with regard to the processing of genetic data, biometric data or data concerning
587
+ health.'
588
+ - '1.The data protection officer shall have at least the following tasks: (a) to
589
+ inform and advise the controller or the processor and the employees who carry
590
+ out processing of their obligations pursuant to this Regulation and to other Union
591
+ or Member State data protection provisions; (b) to monitor compliance with this
592
+ Regulation, with other Union or Member State data protection provisions and with
593
+ the policies of the controller or processor in relation to the protection of personal
594
+ data, including the assignment of responsibilities, awareness-raising and training
595
+ of staff involved in processing operations, and the related audits; (c) to provide
596
+ advice where requested as regards the data protection impact assessment and monitor
597
+ its performance pursuant to Article 35; (d) to cooperate with the supervisory
598
+ authority; (e) to act as the contact point for the supervisory authority on issues
599
+ relating to processing, including the prior consultation referred to in Article
600
+ 36, and to consult, where appropriate, with regard to any other matter.
601
+
602
+ 2.The data protection officer shall in the performance of his or her tasks have
603
+ due regard to the risk associated with processing operations, taking into account
604
+ the nature, scope, context and purposes of processing. Section 5 Codes of conduct
605
+ and certification'
606
+ - Processing should be lawful where it is necessary in the context of a contract
607
+ or the intention to enter into a contract.
608
+ - source_sentence: What may impede authorities in the discharge of their responsibilities
609
+ under Union law?
610
+ sentences:
611
+ - '1.The controller and the processor shall designate a data protection officer
612
+ in any case where: (a) the processing is carried out by a public authority or
613
+ body, except for courts acting in their judicial capacity; (b) the core activities
614
+ of the controller or the processor consist of processing operations which, by
615
+ virtue of their nature, their scope and/or their purposes, require regular and
616
+ systematic monitoring of data subjects on a large scale; or (c) the core activities
617
+ of the controller or the processor consist of processing on a large scale of special
618
+ categories of data pursuant to Article 9 and personal data relating to criminal
619
+ convictions and offences referred to in Article 10
620
+
621
+ 2.A group of undertakings may appoint a single data protection officer provided
622
+ that a data protection officer is easily accessible from each establishment.
623
+
624
+ 3.Where the controller or the processor is a public authority or body, a single
625
+ data protection officer may be designated for several such authorities or bodies,
626
+ taking account of their organisational structure and size.
627
+
628
+ 4.In cases other than those referred to in paragraph 1, the controller or processor
629
+ or associations and other bodies representing categories of controllers or processors
630
+ may or, where required by Union or Member State law shall, designate a data protection
631
+ officer. The data protection officer may act for such associations and other bodies
632
+ representing controllers or processors.
633
+
634
+ 5.The data protection officer shall be designated on the basis of professional
635
+ qualities and, in particular, expert knowledge of data protection law and practices
636
+ and the ability to fulfil the tasks referred to in Article 39
637
+
638
+ 6.The data protection officer may be a staff member of the controller or processor,
639
+ or fulfil the tasks on the basis of a service contract.
640
+
641
+ 7.The controller or the processor shall publish the contact details of the data
642
+ protection officer and communicate them to the supervisory authority.'
643
+ - This Regulation is without prejudice to international agreements concluded between
644
+ the Union and third countries regulating the transfer of personal data including
645
+ appropriate safeguards for the data subjects. Member States may conclude international
646
+ agreements which involve the transfer of personal data to third countries or international
647
+ organisations, as far as such agreements do not affect this Regulation or any
648
+ other provisions of Union law and include an appropriate level of protection for
649
+ the fundamental rights of the data subjects.
650
+ - The objectives and principles of Directive 95/46/EC remain sound, but it has not
651
+ prevented fragmentation in the implementation of data protection across the Union,
652
+ legal uncertainty or a widespread public perception that there are significant
653
+ risks to the protection of natural persons, in particular with regard to online
654
+ activity. Differences in the level of protection of the rights and freedoms of
655
+ natural persons, in particular the right to the protection of personal data, with
656
+ regard to the processing of personal data in the Member States may prevent the
657
+ free flow of personal data throughout the Union. Those differences may therefore
658
+ constitute an obstacle to the pursuit of economic activities at the level of the
659
+ Union, distort competition and impede authorities in the discharge of their responsibilities
660
+ under Union law. Such a difference in levels of protection is due to the existence
661
+ of differences in the implementation and application of Directive 95/46/EC.
662
+ pipeline_tag: sentence-similarity
663
+ library_name: sentence-transformers
664
+ metrics:
665
+ - cosine_accuracy@1
666
+ - cosine_accuracy@3
667
+ - cosine_accuracy@5
668
+ - cosine_accuracy@10
669
+ - cosine_precision@1
670
+ - cosine_precision@3
671
+ - cosine_precision@5
672
+ - cosine_precision@10
673
+ - cosine_recall@1
674
+ - cosine_recall@3
675
+ - cosine_recall@5
676
+ - cosine_recall@10
677
+ - cosine_ndcg@10
678
+ - cosine_mrr@10
679
+ - cosine_map@100
680
+ model-index:
681
+ - name: multilingual-e5-large
682
+ results:
683
+ - task:
684
+ type: information-retrieval
685
+ name: Information Retrieval
686
+ dataset:
687
+ name: dim 1024
688
+ type: dim_1024
689
+ metrics:
690
+ - type: cosine_accuracy@1
691
+ value: 0.36235595390524966
692
+ name: Cosine Accuracy@1
693
+ - type: cosine_accuracy@3
694
+ value: 0.3681177976952625
695
+ name: Cosine Accuracy@3
696
+ - type: cosine_accuracy@5
697
+ value: 0.39308578745198464
698
+ name: Cosine Accuracy@5
699
+ - type: cosine_accuracy@10
700
+ value: 0.4334186939820743
701
+ name: Cosine Accuracy@10
702
+ - type: cosine_precision@1
703
+ value: 0.36235595390524966
704
+ name: Cosine Precision@1
705
+ - type: cosine_precision@3
706
+ value: 0.36192915066154496
707
+ name: Cosine Precision@3
708
+ - type: cosine_precision@5
709
+ value: 0.35172855313700385
710
+ name: Cosine Precision@5
711
+ - type: cosine_precision@10
712
+ value: 0.3176696542893726
713
+ name: Cosine Precision@10
714
+ - type: cosine_recall@1
715
+ value: 0.04346309464734114
716
+ name: Cosine Recall@1
717
+ - type: cosine_recall@3
718
+ value: 0.12757812796185336
719
+ name: Cosine Recall@3
720
+ - type: cosine_recall@5
721
+ value: 0.19200836801442767
722
+ name: Cosine Recall@5
723
+ - type: cosine_recall@10
724
+ value: 0.28096984500258326
725
+ name: Cosine Recall@10
726
+ - type: cosine_ndcg@10
727
+ value: 0.3858809020056271
728
+ name: Cosine Ndcg@10
729
+ - type: cosine_mrr@10
730
+ value: 0.37430415828303115
731
+ name: Cosine Mrr@10
732
+ - type: cosine_map@100
733
+ value: 0.45394800707643057
734
+ name: Cosine Map@100
735
+ - task:
736
+ type: information-retrieval
737
+ name: Information Retrieval
738
+ dataset:
739
+ name: dim 768
740
+ type: dim_768
741
+ metrics:
742
+ - type: cosine_accuracy@1
743
+ value: 0.3591549295774648
744
+ name: Cosine Accuracy@1
745
+ - type: cosine_accuracy@3
746
+ value: 0.3649167733674776
747
+ name: Cosine Accuracy@3
748
+ - type: cosine_accuracy@5
749
+ value: 0.3892445582586428
750
+ name: Cosine Accuracy@5
751
+ - type: cosine_accuracy@10
752
+ value: 0.4334186939820743
753
+ name: Cosine Accuracy@10
754
+ - type: cosine_precision@1
755
+ value: 0.3591549295774648
756
+ name: Cosine Precision@1
757
+ - type: cosine_precision@3
758
+ value: 0.3587281263337601
759
+ name: Cosine Precision@3
760
+ - type: cosine_precision@5
761
+ value: 0.34852752880921894
762
+ name: Cosine Precision@5
763
+ - type: cosine_precision@10
764
+ value: 0.31670934699103714
765
+ name: Cosine Precision@10
766
+ - type: cosine_recall@1
767
+ value: 0.04250079684114586
768
+ name: Cosine Recall@1
769
+ - type: cosine_recall@3
770
+ value: 0.12462187901616553
771
+ name: Cosine Recall@3
772
+ - type: cosine_recall@5
773
+ value: 0.1875478484365334
774
+ name: Cosine Recall@5
775
+ - type: cosine_recall@10
776
+ value: 0.27695909667507057
777
+ name: Cosine Recall@10
778
+ - type: cosine_ndcg@10
779
+ value: 0.38308181752122755
780
+ name: Cosine Ndcg@10
781
+ - type: cosine_mrr@10
782
+ value: 0.37149335406377615
783
+ name: Cosine Mrr@10
784
+ - type: cosine_map@100
785
+ value: 0.4493001842217619
786
+ name: Cosine Map@100
787
+ - task:
788
+ type: information-retrieval
789
+ name: Information Retrieval
790
+ dataset:
791
+ name: dim 512
792
+ type: dim_512
793
+ metrics:
794
+ - type: cosine_accuracy@1
795
+ value: 0.35979513444302175
796
+ name: Cosine Accuracy@1
797
+ - type: cosine_accuracy@3
798
+ value: 0.36555697823303457
799
+ name: Cosine Accuracy@3
800
+ - type: cosine_accuracy@5
801
+ value: 0.3911651728553137
802
+ name: Cosine Accuracy@5
803
+ - type: cosine_accuracy@10
804
+ value: 0.4334186939820743
805
+ name: Cosine Accuracy@10
806
+ - type: cosine_precision@1
807
+ value: 0.35979513444302175
808
+ name: Cosine Precision@1
809
+ - type: cosine_precision@3
810
+ value: 0.35936833119931705
811
+ name: Cosine Precision@3
812
+ - type: cosine_precision@5
813
+ value: 0.34967989756722156
814
+ name: Cosine Precision@5
815
+ - type: cosine_precision@10
816
+ value: 0.3173495518565941
817
+ name: Cosine Precision@10
818
+ - type: cosine_recall@1
819
+ value: 0.04265405128130224
820
+ name: Cosine Recall@1
821
+ - type: cosine_recall@3
822
+ value: 0.12523102347193127
823
+ name: Cosine Recall@3
824
+ - type: cosine_recall@5
825
+ value: 0.18912519336740205
826
+ name: Cosine Recall@5
827
+ - type: cosine_recall@10
828
+ value: 0.2781876565001863
829
+ name: Cosine Recall@10
830
+ - type: cosine_ndcg@10
831
+ value: 0.3843750966464458
832
+ name: Cosine Ndcg@10
833
+ - type: cosine_mrr@10
834
+ value: 0.37212542934373866
835
+ name: Cosine Mrr@10
836
+ - type: cosine_map@100
837
+ value: 0.4476805587612892
838
+ name: Cosine Map@100
839
+ - task:
840
+ type: information-retrieval
841
+ name: Information Retrieval
842
+ dataset:
843
+ name: dim 256
844
+ type: dim_256
845
+ metrics:
846
+ - type: cosine_accuracy@1
847
+ value: 0.3437900128040973
848
+ name: Cosine Accuracy@1
849
+ - type: cosine_accuracy@3
850
+ value: 0.34763124199743917
851
+ name: Cosine Accuracy@3
852
+ - type: cosine_accuracy@5
853
+ value: 0.3764404609475032
854
+ name: Cosine Accuracy@5
855
+ - type: cosine_accuracy@10
856
+ value: 0.41101152368758004
857
+ name: Cosine Accuracy@10
858
+ - type: cosine_precision@1
859
+ value: 0.3437900128040973
860
+ name: Cosine Precision@1
861
+ - type: cosine_precision@3
862
+ value: 0.342936406316688
863
+ name: Cosine Precision@3
864
+ - type: cosine_precision@5
865
+ value: 0.33457106274007686
866
+ name: Cosine Precision@5
867
+ - type: cosine_precision@10
868
+ value: 0.3040973111395647
869
+ name: Cosine Precision@10
870
+ - type: cosine_recall@1
871
+ value: 0.04013102608834382
872
+ name: Cosine Recall@1
873
+ - type: cosine_recall@3
874
+ value: 0.11771735023719074
875
+ name: Cosine Recall@3
876
+ - type: cosine_recall@5
877
+ value: 0.17837935755014916
878
+ name: Cosine Recall@5
879
+ - type: cosine_recall@10
880
+ value: 0.2648598688529433
881
+ name: Cosine Recall@10
882
+ - type: cosine_ndcg@10
883
+ value: 0.3670052960875804
884
+ name: Cosine Ndcg@10
885
+ - type: cosine_mrr@10
886
+ value: 0.3551361197487955
887
+ name: Cosine Mrr@10
888
+ - type: cosine_map@100
889
+ value: 0.4298669852983799
890
+ name: Cosine Map@100
891
+ - task:
892
+ type: information-retrieval
893
+ name: Information Retrieval
894
+ dataset:
895
+ name: dim 128
896
+ type: dim_128
897
+ metrics:
898
+ - type: cosine_accuracy@1
899
+ value: 0.3085787451984635
900
+ name: Cosine Accuracy@1
901
+ - type: cosine_accuracy@3
902
+ value: 0.31241997439180536
903
+ name: Cosine Accuracy@3
904
+ - type: cosine_accuracy@5
905
+ value: 0.3361075544174136
906
+ name: Cosine Accuracy@5
907
+ - type: cosine_accuracy@10
908
+ value: 0.37964148527528807
909
+ name: Cosine Accuracy@10
910
+ - type: cosine_precision@1
911
+ value: 0.3085787451984635
912
+ name: Cosine Precision@1
913
+ - type: cosine_precision@3
914
+ value: 0.3079385403329065
915
+ name: Cosine Precision@3
916
+ - type: cosine_precision@5
917
+ value: 0.29961587708066584
918
+ name: Cosine Precision@5
919
+ - type: cosine_precision@10
920
+ value: 0.2752880921895006
921
+ name: Cosine Precision@10
922
+ - type: cosine_recall@1
923
+ value: 0.036297623853982414
924
+ name: Cosine Recall@1
925
+ - type: cosine_recall@3
926
+ value: 0.10638786483158841
927
+ name: Cosine Recall@3
928
+ - type: cosine_recall@5
929
+ value: 0.16032639984514846
930
+ name: Cosine Recall@5
931
+ - type: cosine_recall@10
932
+ value: 0.24000960695821508
933
+ name: Cosine Recall@10
934
+ - type: cosine_ndcg@10
935
+ value: 0.3312285498294292
936
+ name: Cosine Ndcg@10
937
+ - type: cosine_mrr@10
938
+ value: 0.3199812511432227
939
+ name: Cosine Mrr@10
940
+ - type: cosine_map@100
941
+ value: 0.3963095303049961
942
+ name: Cosine Map@100
943
+ - task:
944
+ type: information-retrieval
945
+ name: Information Retrieval
946
+ dataset:
947
+ name: dim 64
948
+ type: dim_64
949
+ metrics:
950
+ - type: cosine_accuracy@1
951
+ value: 0.2740076824583867
952
+ name: Cosine Accuracy@1
953
+ - type: cosine_accuracy@3
954
+ value: 0.27848911651728553
955
+ name: Cosine Accuracy@3
956
+ - type: cosine_accuracy@5
957
+ value: 0.30153649167733676
958
+ name: Cosine Accuracy@5
959
+ - type: cosine_accuracy@10
960
+ value: 0.3354673495518566
961
+ name: Cosine Accuracy@10
962
+ - type: cosine_precision@1
963
+ value: 0.2740076824583867
964
+ name: Cosine Precision@1
965
+ - type: cosine_precision@3
966
+ value: 0.27315407597097735
967
+ name: Cosine Precision@3
968
+ - type: cosine_precision@5
969
+ value: 0.2670934699103713
970
+ name: Cosine Precision@5
971
+ - type: cosine_precision@10
972
+ value: 0.24571062740076827
973
+ name: Cosine Precision@10
974
+ - type: cosine_recall@1
975
+ value: 0.03167890172057568
976
+ name: Cosine Recall@1
977
+ - type: cosine_recall@3
978
+ value: 0.09267023360511464
979
+ name: Cosine Recall@3
980
+ - type: cosine_recall@5
981
+ value: 0.14048625468314752
982
+ name: Cosine Recall@5
983
+ - type: cosine_recall@10
984
+ value: 0.21092883720941633
985
+ name: Cosine Recall@10
986
+ - type: cosine_ndcg@10
987
+ value: 0.29402896525927075
988
+ name: Cosine Ndcg@10
989
+ - type: cosine_mrr@10
990
+ value: 0.28429414873076814
991
+ name: Cosine Mrr@10
992
+ - type: cosine_map@100
993
+ value: 0.3539045084602349
994
+ name: Cosine Map@100
995
+ ---
996
+
997
+ # multilingual-e5-large
998
+
999
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [intfloat/multilingual-e5-large](https://huggingface.co/intfloat/multilingual-e5-large). It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
1000
+
1001
+ ## Model Details
1002
+
1003
+ ### Model Description
1004
+ - **Model Type:** Sentence Transformer
1005
+ - **Base model:** [intfloat/multilingual-e5-large](https://huggingface.co/intfloat/multilingual-e5-large) <!-- at revision 0dc5580a448e4284468b8909bae50fa925907bc5 -->
1006
+ - **Maximum Sequence Length:** 512 tokens
1007
+ - **Output Dimensionality:** 1024 dimensions
1008
+ - **Similarity Function:** Cosine Similarity
1009
+ <!-- - **Training Dataset:** Unknown -->
1010
+ - **Language:** en
1011
+ - **License:** apache-2.0
1012
+
1013
+ ### Model Sources
1014
+
1015
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
1016
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
1017
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
1018
+
1019
+ ### Full Model Architecture
1020
+
1021
+ ```
1022
+ SentenceTransformer(
1023
+ (0): Transformer({'max_seq_length': 512, 'do_lower_case': False, 'architecture': 'XLMRobertaModel'})
1024
+ (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
1025
+ (2): Normalize()
1026
+ )
1027
+ ```
1028
+
1029
+ ## Usage
1030
+
1031
+ ### Direct Usage (Sentence Transformers)
1032
+
1033
+ First install the Sentence Transformers library:
1034
+
1035
+ ```bash
1036
+ pip install -U sentence-transformers
1037
+ ```
1038
+
1039
+ Then you can load this model and run inference.
1040
+ ```python
1041
+ from sentence_transformers import SentenceTransformer
1042
+
1043
+ # Download from the 🤗 Hub
1044
+ model = SentenceTransformer("sentence_transformers_model_id")
1045
+ # Run inference
1046
+ sentences = [
1047
+ 'What may impede authorities in the discharge of their responsibilities under Union law?',
1048
+ 'The objectives and principles of Directive 95/46/EC remain sound, but it has not prevented fragmentation in the implementation of data protection across the Union, legal uncertainty or a widespread public perception that there are significant risks to the protection of natural persons, in particular with regard to online activity. Differences in the level of protection of the rights and freedoms of natural persons, in particular the right to the protection of personal data, with regard to the processing of personal data in the Member States may prevent the free flow of personal data throughout the Union. Those differences may therefore constitute an obstacle to the pursuit of economic activities at the level of the Union, distort competition and impede authorities in the discharge of their responsibilities under Union law. Such a difference in levels of protection is due to the existence of differences in the implementation and application of Directive 95/46/EC.',
1049
+ 'This Regulation is without prejudice to international agreements concluded between the Union and third countries regulating the transfer of personal data including appropriate safeguards for the data subjects. Member States may conclude international agreements which involve the transfer of personal data to third countries or international organisations, as far as such agreements do not affect this Regulation or any other provisions of Union law and include an appropriate level of protection for the fundamental rights of the data subjects.',
1050
+ ]
1051
+ embeddings = model.encode(sentences)
1052
+ print(embeddings.shape)
1053
+ # [3, 1024]
1054
+
1055
+ # Get the similarity scores for the embeddings
1056
+ similarities = model.similarity(embeddings, embeddings)
1057
+ print(similarities)
1058
+ # tensor([[1.0000, 0.5388, 0.3874],
1059
+ # [0.5388, 1.0000, 0.6300],
1060
+ # [0.3874, 0.6300, 1.0000]])
1061
+ ```
1062
+
1063
+ <!--
1064
+ ### Direct Usage (Transformers)
1065
+
1066
+ <details><summary>Click to see the direct usage in Transformers</summary>
1067
+
1068
+ </details>
1069
+ -->
1070
+
1071
+ <!--
1072
+ ### Downstream Usage (Sentence Transformers)
1073
+
1074
+ You can finetune this model on your own dataset.
1075
+
1076
+ <details><summary>Click to expand</summary>
1077
+
1078
+ </details>
1079
+ -->
1080
+
1081
+ <!--
1082
+ ### Out-of-Scope Use
1083
+
1084
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
1085
+ -->
1086
+
1087
+ ## Evaluation
1088
+
1089
+ ### Metrics
1090
+
1091
+ #### Information Retrieval
1092
+
1093
+ * Dataset: `dim_1024`
1094
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
1095
+ ```json
1096
+ {
1097
+ "truncate_dim": 1024
1098
+ }
1099
+ ```
1100
+
1101
+ | Metric | Value |
1102
+ |:--------------------|:-----------|
1103
+ | cosine_accuracy@1 | 0.3624 |
1104
+ | cosine_accuracy@3 | 0.3681 |
1105
+ | cosine_accuracy@5 | 0.3931 |
1106
+ | cosine_accuracy@10 | 0.4334 |
1107
+ | cosine_precision@1 | 0.3624 |
1108
+ | cosine_precision@3 | 0.3619 |
1109
+ | cosine_precision@5 | 0.3517 |
1110
+ | cosine_precision@10 | 0.3177 |
1111
+ | cosine_recall@1 | 0.0435 |
1112
+ | cosine_recall@3 | 0.1276 |
1113
+ | cosine_recall@5 | 0.192 |
1114
+ | cosine_recall@10 | 0.281 |
1115
+ | **cosine_ndcg@10** | **0.3859** |
1116
+ | cosine_mrr@10 | 0.3743 |
1117
+ | cosine_map@100 | 0.4539 |
1118
+
1119
+ #### Information Retrieval
1120
+
1121
+ * Dataset: `dim_768`
1122
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
1123
+ ```json
1124
+ {
1125
+ "truncate_dim": 768
1126
+ }
1127
+ ```
1128
+
1129
+ | Metric | Value |
1130
+ |:--------------------|:-----------|
1131
+ | cosine_accuracy@1 | 0.3592 |
1132
+ | cosine_accuracy@3 | 0.3649 |
1133
+ | cosine_accuracy@5 | 0.3892 |
1134
+ | cosine_accuracy@10 | 0.4334 |
1135
+ | cosine_precision@1 | 0.3592 |
1136
+ | cosine_precision@3 | 0.3587 |
1137
+ | cosine_precision@5 | 0.3485 |
1138
+ | cosine_precision@10 | 0.3167 |
1139
+ | cosine_recall@1 | 0.0425 |
1140
+ | cosine_recall@3 | 0.1246 |
1141
+ | cosine_recall@5 | 0.1875 |
1142
+ | cosine_recall@10 | 0.277 |
1143
+ | **cosine_ndcg@10** | **0.3831** |
1144
+ | cosine_mrr@10 | 0.3715 |
1145
+ | cosine_map@100 | 0.4493 |
1146
+
1147
+ #### Information Retrieval
1148
+
1149
+ * Dataset: `dim_512`
1150
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
1151
+ ```json
1152
+ {
1153
+ "truncate_dim": 512
1154
+ }
1155
+ ```
1156
+
1157
+ | Metric | Value |
1158
+ |:--------------------|:-----------|
1159
+ | cosine_accuracy@1 | 0.3598 |
1160
+ | cosine_accuracy@3 | 0.3656 |
1161
+ | cosine_accuracy@5 | 0.3912 |
1162
+ | cosine_accuracy@10 | 0.4334 |
1163
+ | cosine_precision@1 | 0.3598 |
1164
+ | cosine_precision@3 | 0.3594 |
1165
+ | cosine_precision@5 | 0.3497 |
1166
+ | cosine_precision@10 | 0.3173 |
1167
+ | cosine_recall@1 | 0.0427 |
1168
+ | cosine_recall@3 | 0.1252 |
1169
+ | cosine_recall@5 | 0.1891 |
1170
+ | cosine_recall@10 | 0.2782 |
1171
+ | **cosine_ndcg@10** | **0.3844** |
1172
+ | cosine_mrr@10 | 0.3721 |
1173
+ | cosine_map@100 | 0.4477 |
1174
+
1175
+ #### Information Retrieval
1176
+
1177
+ * Dataset: `dim_256`
1178
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
1179
+ ```json
1180
+ {
1181
+ "truncate_dim": 256
1182
+ }
1183
+ ```
1184
+
1185
+ | Metric | Value |
1186
+ |:--------------------|:----------|
1187
+ | cosine_accuracy@1 | 0.3438 |
1188
+ | cosine_accuracy@3 | 0.3476 |
1189
+ | cosine_accuracy@5 | 0.3764 |
1190
+ | cosine_accuracy@10 | 0.411 |
1191
+ | cosine_precision@1 | 0.3438 |
1192
+ | cosine_precision@3 | 0.3429 |
1193
+ | cosine_precision@5 | 0.3346 |
1194
+ | cosine_precision@10 | 0.3041 |
1195
+ | cosine_recall@1 | 0.0401 |
1196
+ | cosine_recall@3 | 0.1177 |
1197
+ | cosine_recall@5 | 0.1784 |
1198
+ | cosine_recall@10 | 0.2649 |
1199
+ | **cosine_ndcg@10** | **0.367** |
1200
+ | cosine_mrr@10 | 0.3551 |
1201
+ | cosine_map@100 | 0.4299 |
1202
+
1203
+ #### Information Retrieval
1204
+
1205
+ * Dataset: `dim_128`
1206
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
1207
+ ```json
1208
+ {
1209
+ "truncate_dim": 128
1210
+ }
1211
+ ```
1212
+
1213
+ | Metric | Value |
1214
+ |:--------------------|:-----------|
1215
+ | cosine_accuracy@1 | 0.3086 |
1216
+ | cosine_accuracy@3 | 0.3124 |
1217
+ | cosine_accuracy@5 | 0.3361 |
1218
+ | cosine_accuracy@10 | 0.3796 |
1219
+ | cosine_precision@1 | 0.3086 |
1220
+ | cosine_precision@3 | 0.3079 |
1221
+ | cosine_precision@5 | 0.2996 |
1222
+ | cosine_precision@10 | 0.2753 |
1223
+ | cosine_recall@1 | 0.0363 |
1224
+ | cosine_recall@3 | 0.1064 |
1225
+ | cosine_recall@5 | 0.1603 |
1226
+ | cosine_recall@10 | 0.24 |
1227
+ | **cosine_ndcg@10** | **0.3312** |
1228
+ | cosine_mrr@10 | 0.32 |
1229
+ | cosine_map@100 | 0.3963 |
1230
+
1231
+ #### Information Retrieval
1232
+
1233
+ * Dataset: `dim_64`
1234
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
1235
+ ```json
1236
+ {
1237
+ "truncate_dim": 64
1238
+ }
1239
+ ```
1240
+
1241
+ | Metric | Value |
1242
+ |:--------------------|:----------|
1243
+ | cosine_accuracy@1 | 0.274 |
1244
+ | cosine_accuracy@3 | 0.2785 |
1245
+ | cosine_accuracy@5 | 0.3015 |
1246
+ | cosine_accuracy@10 | 0.3355 |
1247
+ | cosine_precision@1 | 0.274 |
1248
+ | cosine_precision@3 | 0.2732 |
1249
+ | cosine_precision@5 | 0.2671 |
1250
+ | cosine_precision@10 | 0.2457 |
1251
+ | cosine_recall@1 | 0.0317 |
1252
+ | cosine_recall@3 | 0.0927 |
1253
+ | cosine_recall@5 | 0.1405 |
1254
+ | cosine_recall@10 | 0.2109 |
1255
+ | **cosine_ndcg@10** | **0.294** |
1256
+ | cosine_mrr@10 | 0.2843 |
1257
+ | cosine_map@100 | 0.3539 |
1258
+
1259
+ <!--
1260
+ ## Bias, Risks and Limitations
1261
+
1262
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
1263
+ -->
1264
+
1265
+ <!--
1266
+ ### Recommendations
1267
+
1268
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
1269
+ -->
1270
+
1271
+ ## Training Details
1272
+
1273
+ ### Training Dataset
1274
+
1275
+ #### Unnamed Dataset
1276
+
1277
+ * Size: 391 training samples
1278
+ * Columns: <code>anchor</code> and <code>positive</code>
1279
+ * Approximate statistics based on the first 391 samples:
1280
+ | | anchor | positive |
1281
+ |:--------|:---------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
1282
+ | type | string | string |
1283
+ | details | <ul><li>min: 8 tokens</li><li>mean: 16.9 tokens</li><li>max: 30 tokens</li></ul> | <ul><li>min: 27 tokens</li><li>mean: 372.91 tokens</li><li>max: 512 tokens</li></ul> |
1284
+ * Samples:
1285
+ | anchor | positive |
1286
+ |:-----------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
1287
+ | <code>On what date did the act occur?</code> | <code>Court (Civil/Criminal): Civil <br>Provisions: Directive 2015/366, Law 4537/2018 <br>Time of the act: 31.08.2022 <br>Outcome (not guilty, guilty): Partially accepts the claim. <br>Reasoning: The Athens Peace Court ordered the bank to return the amount that was withdrawn from the plaintiffs' account and to pay additional compensation for the moral damage they suffered. <br>Facts: The case concerns plaintiffs who fell victim to electronic fraud via phishing, resulting in the withdrawal of money from their bank account. The plaintiffs claimed that the bank did not take the necessary security measures to protect their accounts and sought compensation for the financial loss and moral damage they suffered. The court determined that the bank is responsible for the loss of the money, as it did not prove that the transactions were authorized by the plaintiffs. Furthermore, the court recognized that the bank's refusal to return the funds constitutes an infringement of the plaintiffs' personal rights, as it...</code> |
1288
+ | <code>For what purposes can more specific rules be provided regarding the employment context?</code> | <code>1.Member States may, by law or by collective agreements, provide for more specific rules to ensure the protection of the rights and freedoms in respect of the processing of employees' personal data in the employment context, in particular for the purposes of the recruitment, the performance of the contract of employment, including discharge of obligations laid down by law or by collective agreements, management, planning and organisation of work, equality and diversity in the workplace, health and safety at work, protection of employer's or customer's property and for the purposes of the exercise and enjoyment, on an individual or collective basis, of rights and benefits related to employment, and for the purpose of the termination of the employment relationship.<br>2.Those rules shall include suitable and specific measures to safeguard the data subject's human dignity, legitimate interests and fundamental rights, with particular regard to the transparency of processing, the transfer of p...</code> |
1289
+ | <code>On which date were transactions detailed in the provided text conducted?</code> | <code>**Court (Civil/Criminal): Civil**<br><br>**Provisions:**<br><br>**Time of commission of the act:**<br><br>**Outcome (not guilty, guilty):**<br><br>**Rationale:**<br><br>**Facts:**<br>The plaintiff holds credit card number ............ with the defendant banking corporation. Based on the application for alternative networks dated 19/7/2015 with number ......... submitted at a branch of the defendant, he was granted access to the electronic banking service (e-banking) to conduct banking transactions (debit, credit, updates, payments) remotely. On 30/11/2020, the plaintiff fell victim to electronic fraud through the "phishing" method, whereby an unknown perpetrator managed to withdraw a total amount of €3,121.75 from the aforementioned credit card. Specifically, the plaintiff received an email at 1:35 PM on 29/11/2020 from sender ...... with address ........, informing him that due to an impending system change, he needed to verify the mobile phone number linked to the credit card, urging him to complete the verification...</code> |
1290
+ * Loss: [<code>MatryoshkaLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters:
1291
+ ```json
1292
+ {
1293
+ "loss": "MultipleNegativesRankingLoss",
1294
+ "matryoshka_dims": [
1295
+ 1024,
1296
+ 768,
1297
+ 512,
1298
+ 256,
1299
+ 128,
1300
+ 64
1301
+ ],
1302
+ "matryoshka_weights": [
1303
+ 1,
1304
+ 1,
1305
+ 1,
1306
+ 1,
1307
+ 1,
1308
+ 1
1309
+ ],
1310
+ "n_dims_per_step": -1
1311
+ }
1312
+ ```
1313
+
1314
+ ### Training Hyperparameters
1315
+ #### Non-Default Hyperparameters
1316
+
1317
+ - `eval_strategy`: epoch
1318
+ - `per_device_train_batch_size`: 2
1319
+ - `per_device_eval_batch_size`: 2
1320
+ - `gradient_accumulation_steps`: 2
1321
+ - `learning_rate`: 2e-05
1322
+ - `num_train_epochs`: 20
1323
+ - `lr_scheduler_type`: cosine
1324
+ - `warmup_ratio`: 0.1
1325
+ - `bf16`: True
1326
+ - `load_best_model_at_end`: True
1327
+ - `optim`: adamw_torch_fused
1328
+ - `batch_sampler`: no_duplicates
1329
+
1330
+ #### All Hyperparameters
1331
+ <details><summary>Click to expand</summary>
1332
+
1333
+ - `overwrite_output_dir`: False
1334
+ - `do_predict`: False
1335
+ - `eval_strategy`: epoch
1336
+ - `prediction_loss_only`: True
1337
+ - `per_device_train_batch_size`: 2
1338
+ - `per_device_eval_batch_size`: 2
1339
+ - `per_gpu_train_batch_size`: None
1340
+ - `per_gpu_eval_batch_size`: None
1341
+ - `gradient_accumulation_steps`: 2
1342
+ - `eval_accumulation_steps`: None
1343
+ - `torch_empty_cache_steps`: None
1344
+ - `learning_rate`: 2e-05
1345
+ - `weight_decay`: 0.0
1346
+ - `adam_beta1`: 0.9
1347
+ - `adam_beta2`: 0.999
1348
+ - `adam_epsilon`: 1e-08
1349
+ - `max_grad_norm`: 1.0
1350
+ - `num_train_epochs`: 20
1351
+ - `max_steps`: -1
1352
+ - `lr_scheduler_type`: cosine
1353
+ - `lr_scheduler_kwargs`: {}
1354
+ - `warmup_ratio`: 0.1
1355
+ - `warmup_steps`: 0
1356
+ - `log_level`: passive
1357
+ - `log_level_replica`: warning
1358
+ - `log_on_each_node`: True
1359
+ - `logging_nan_inf_filter`: True
1360
+ - `save_safetensors`: True
1361
+ - `save_on_each_node`: False
1362
+ - `save_only_model`: False
1363
+ - `restore_callback_states_from_checkpoint`: False
1364
+ - `no_cuda`: False
1365
+ - `use_cpu`: False
1366
+ - `use_mps_device`: False
1367
+ - `seed`: 42
1368
+ - `data_seed`: None
1369
+ - `jit_mode_eval`: False
1370
+ - `use_ipex`: False
1371
+ - `bf16`: True
1372
+ - `fp16`: False
1373
+ - `fp16_opt_level`: O1
1374
+ - `half_precision_backend`: auto
1375
+ - `bf16_full_eval`: False
1376
+ - `fp16_full_eval`: False
1377
+ - `tf32`: None
1378
+ - `local_rank`: 0
1379
+ - `ddp_backend`: None
1380
+ - `tpu_num_cores`: None
1381
+ - `tpu_metrics_debug`: False
1382
+ - `debug`: []
1383
+ - `dataloader_drop_last`: False
1384
+ - `dataloader_num_workers`: 0
1385
+ - `dataloader_prefetch_factor`: None
1386
+ - `past_index`: -1
1387
+ - `disable_tqdm`: False
1388
+ - `remove_unused_columns`: True
1389
+ - `label_names`: None
1390
+ - `load_best_model_at_end`: True
1391
+ - `ignore_data_skip`: False
1392
+ - `fsdp`: []
1393
+ - `fsdp_min_num_params`: 0
1394
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
1395
+ - `tp_size`: 0
1396
+ - `fsdp_transformer_layer_cls_to_wrap`: None
1397
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
1398
+ - `deepspeed`: None
1399
+ - `label_smoothing_factor`: 0.0
1400
+ - `optim`: adamw_torch_fused
1401
+ - `optim_args`: None
1402
+ - `adafactor`: False
1403
+ - `group_by_length`: False
1404
+ - `length_column_name`: length
1405
+ - `ddp_find_unused_parameters`: None
1406
+ - `ddp_bucket_cap_mb`: None
1407
+ - `ddp_broadcast_buffers`: False
1408
+ - `dataloader_pin_memory`: True
1409
+ - `dataloader_persistent_workers`: False
1410
+ - `skip_memory_metrics`: True
1411
+ - `use_legacy_prediction_loop`: False
1412
+ - `push_to_hub`: False
1413
+ - `resume_from_checkpoint`: None
1414
+ - `hub_model_id`: None
1415
+ - `hub_strategy`: every_save
1416
+ - `hub_private_repo`: None
1417
+ - `hub_always_push`: False
1418
+ - `gradient_checkpointing`: False
1419
+ - `gradient_checkpointing_kwargs`: None
1420
+ - `include_inputs_for_metrics`: False
1421
+ - `include_for_metrics`: []
1422
+ - `eval_do_concat_batches`: True
1423
+ - `fp16_backend`: auto
1424
+ - `push_to_hub_model_id`: None
1425
+ - `push_to_hub_organization`: None
1426
+ - `mp_parameters`:
1427
+ - `auto_find_batch_size`: False
1428
+ - `full_determinism`: False
1429
+ - `torchdynamo`: None
1430
+ - `ray_scope`: last
1431
+ - `ddp_timeout`: 1800
1432
+ - `torch_compile`: False
1433
+ - `torch_compile_backend`: None
1434
+ - `torch_compile_mode`: None
1435
+ - `include_tokens_per_second`: False
1436
+ - `include_num_input_tokens_seen`: False
1437
+ - `neftune_noise_alpha`: None
1438
+ - `optim_target_modules`: None
1439
+ - `batch_eval_metrics`: False
1440
+ - `eval_on_start`: False
1441
+ - `use_liger_kernel`: False
1442
+ - `eval_use_gather_object`: False
1443
+ - `average_tokens_across_devices`: False
1444
+ - `prompts`: None
1445
+ - `batch_sampler`: no_duplicates
1446
+ - `multi_dataset_batch_sampler`: proportional
1447
+ - `router_mapping`: {}
1448
+ - `learning_rate_mapping`: {}
1449
+
1450
+ </details>
1451
+
1452
+ ### Training Logs
1453
+ | Epoch | Step | Training Loss | dim_1024_cosine_ndcg@10 | dim_768_cosine_ndcg@10 | dim_512_cosine_ndcg@10 | dim_256_cosine_ndcg@10 | dim_128_cosine_ndcg@10 | dim_64_cosine_ndcg@10 |
1454
+ |:------:|:----:|:-------------:|:-----------------------:|:----------------------:|:----------------------:|:----------------------:|:----------------------:|:---------------------:|
1455
+ | 0.0102 | 1 | 15.8588 | - | - | - | - | - | - |
1456
+ | 0.0204 | 2 | 10.7411 | - | - | - | - | - | - |
1457
+ | 0.0306 | 3 | 1.3873 | - | - | - | - | - | - |
1458
+ | 0.0408 | 4 | 0.9088 | - | - | - | - | - | - |
1459
+ | 0.0510 | 5 | 0.0077 | - | - | - | - | - | - |
1460
+ | 0.0612 | 6 | 0.6016 | - | - | - | - | - | - |
1461
+ | 0.0714 | 7 | 1.6714 | - | - | - | - | - | - |
1462
+ | 0.0816 | 8 | 0.4211 | - | - | - | - | - | - |
1463
+ | 0.0918 | 9 | 0.1996 | - | - | - | - | - | - |
1464
+ | 0.1020 | 10 | 0.1895 | - | - | - | - | - | - |
1465
+ | 0.1122 | 11 | 0.1358 | - | - | - | - | - | - |
1466
+ | 0.1224 | 12 | 0.5552 | - | - | - | - | - | - |
1467
+ | 0.1327 | 13 | 0.5141 | - | - | - | - | - | - |
1468
+ | 0.1429 | 14 | 0.1955 | - | - | - | - | - | - |
1469
+ | 0.1531 | 15 | 1.9114 | - | - | - | - | - | - |
1470
+ | 0.1633 | 16 | 0.2645 | - | - | - | - | - | - |
1471
+ | 0.1735 | 17 | 7.5545 | - | - | - | - | - | - |
1472
+ | 0.1837 | 18 | 0.4297 | - | - | - | - | - | - |
1473
+ | 0.1939 | 19 | 0.678 | - | - | - | - | - | - |
1474
+ | 0.2041 | 20 | 0.4634 | - | - | - | - | - | - |
1475
+ | 0.2143 | 21 | 4.2252 | - | - | - | - | - | - |
1476
+ | 0.2245 | 22 | 3.9985 | - | - | - | - | - | - |
1477
+ | 0.2347 | 23 | 1.9242 | - | - | - | - | - | - |
1478
+ | 0.2449 | 24 | 3.2716 | - | - | - | - | - | - |
1479
+ | 0.2551 | 25 | 0.123 | - | - | - | - | - | - |
1480
+ | 0.2653 | 26 | 1.0011 | - | - | - | - | - | - |
1481
+ | 0.2755 | 27 | 3.5846 | - | - | - | - | - | - |
1482
+ | 0.2857 | 28 | 1.1365 | - | - | - | - | - | - |
1483
+ | 0.2959 | 29 | 0.7149 | - | - | - | - | - | - |
1484
+ | 0.3061 | 30 | 1.2629 | - | - | - | - | - | - |
1485
+ | 0.3163 | 31 | 0.6459 | - | - | - | - | - | - |
1486
+ | 0.3265 | 32 | 0.1934 | - | - | - | - | - | - |
1487
+ | 0.3367 | 33 | 1.4897 | - | - | - | - | - | - |
1488
+ | 0.3469 | 34 | 0.8561 | - | - | - | - | - | - |
1489
+ | 0.3571 | 35 | 0.0128 | - | - | - | - | - | - |
1490
+ | 0.3673 | 36 | 1.4952 | - | - | - | - | - | - |
1491
+ | 0.3776 | 37 | 0.3181 | - | - | - | - | - | - |
1492
+ | 0.3878 | 38 | 6.3681 | - | - | - | - | - | - |
1493
+ | 0.3980 | 39 | 1.4487 | - | - | - | - | - | - |
1494
+ | 0.4082 | 40 | 0.1702 | - | - | - | - | - | - |
1495
+ | 0.4184 | 41 | 0.2513 | - | - | - | - | - | - |
1496
+ | 0.4286 | 42 | 4.1595 | - | - | - | - | - | - |
1497
+ | 0.4388 | 43 | 2.7347 | - | - | - | - | - | - |
1498
+ | 0.4490 | 44 | 2.3182 | - | - | - | - | - | - |
1499
+ | 0.4592 | 45 | 1.3285 | - | - | - | - | - | - |
1500
+ | 0.4694 | 46 | 2.1155 | - | - | - | - | - | - |
1501
+ | 0.4796 | 47 | 0.0645 | - | - | - | - | - | - |
1502
+ | 0.4898 | 48 | 7.1283 | - | - | - | - | - | - |
1503
+ | 0.5 | 49 | 0.711 | - | - | - | - | - | - |
1504
+ | 0.5102 | 50 | 0.4716 | - | - | - | - | - | - |
1505
+ | 0.5204 | 51 | 2.2895 | - | - | - | - | - | - |
1506
+ | 0.5306 | 52 | 1.9235 | - | - | - | - | - | - |
1507
+ | 0.5408 | 53 | 0.8777 | - | - | - | - | - | - |
1508
+ | 0.5510 | 54 | 0.0038 | - | - | - | - | - | - |
1509
+ | 0.5612 | 55 | 1.5598 | - | - | - | - | - | - |
1510
+ | 0.5714 | 56 | 0.0177 | - | - | - | - | - | - |
1511
+ | 0.5816 | 57 | 0.0837 | - | - | - | - | - | - |
1512
+ | 0.5918 | 58 | 0.0429 | - | - | - | - | - | - |
1513
+ | 0.6020 | 59 | 0.0071 | - | - | - | - | - | - |
1514
+ | 0.6122 | 60 | 2.7217 | - | - | - | - | - | - |
1515
+ | 0.6224 | 61 | 3.9013 | - | - | - | - | - | - |
1516
+ | 0.6327 | 62 | 1.417 | - | - | - | - | - | - |
1517
+ | 0.6429 | 63 | 3.5854 | - | - | - | - | - | - |
1518
+ | 0.6531 | 64 | 12.918 | - | - | - | - | - | - |
1519
+ | 0.6633 | 65 | 7.1566 | - | - | - | - | - | - |
1520
+ | 0.6735 | 66 | 3.9897 | - | - | - | - | - | - |
1521
+ | 0.6837 | 67 | 8.1139 | - | - | - | - | - | - |
1522
+ | 0.6939 | 68 | 5.7005 | - | - | - | - | - | - |
1523
+ | 0.7041 | 69 | 0.1219 | - | - | - | - | - | - |
1524
+ | 0.7143 | 70 | 5.7849 | - | - | - | - | - | - |
1525
+ | 0.7245 | 71 | 1.0726 | - | - | - | - | - | - |
1526
+ | 0.7347 | 72 | 1.2599 | - | - | - | - | - | - |
1527
+ | 0.7449 | 73 | 0.6473 | - | - | - | - | - | - |
1528
+ | 0.7551 | 74 | 1.0397 | - | - | - | - | - | - |
1529
+ | 0.7653 | 75 | 1.5555 | - | - | - | - | - | - |
1530
+ | 0.7755 | 76 | 0.0078 | - | - | - | - | - | - |
1531
+ | 0.7857 | 77 | 0.0048 | - | - | - | - | - | - |
1532
+ | 0.7959 | 78 | 0.0323 | - | - | - | - | - | - |
1533
+ | 0.8061 | 79 | 1.7425 | - | - | - | - | - | - |
1534
+ | 0.8163 | 80 | 0.0035 | - | - | - | - | - | - |
1535
+ | 0.8265 | 81 | 6.4849 | - | - | - | - | - | - |
1536
+ | 0.8367 | 82 | 4.3767 | - | - | - | - | - | - |
1537
+ | 0.8469 | 83 | 0.0186 | - | - | - | - | - | - |
1538
+ | 0.8571 | 84 | 0.0008 | - | - | - | - | - | - |
1539
+ | 0.8673 | 85 | 0.8354 | - | - | - | - | - | - |
1540
+ | 0.8776 | 86 | 0.0162 | - | - | - | - | - | - |
1541
+ | 0.8878 | 87 | 0.1282 | - | - | - | - | - | - |
1542
+ | 0.8980 | 88 | 0.4514 | - | - | - | - | - | - |
1543
+ | 0.9082 | 89 | 4.9103 | - | - | - | - | - | - |
1544
+ | 0.9184 | 90 | 0.0762 | - | - | - | - | - | - |
1545
+ | 0.9286 | 91 | 0.0444 | - | - | - | - | - | - |
1546
+ | 0.9388 | 92 | 1.8609 | - | - | - | - | - | - |
1547
+ | 0.9490 | 93 | 0.1489 | - | - | - | - | - | - |
1548
+ | 0.9592 | 94 | 0.5926 | - | - | - | - | - | - |
1549
+ | 0.9694 | 95 | 0.5344 | - | - | - | - | - | - |
1550
+ | 0.9796 | 96 | 0.4693 | - | - | - | - | - | - |
1551
+ | 0.9898 | 97 | 9.2282 | - | - | - | - | - | - |
1552
+ | 1.0 | 98 | 4.6238 | 0.3859 | 0.3831 | 0.3844 | 0.3670 | 0.3312 | 0.2940 |
1553
+
1554
+
1555
+ ### Framework Versions
1556
+ - Python: 3.12.11
1557
+ - Sentence Transformers: 5.1.0
1558
+ - Transformers: 4.51.3
1559
+ - PyTorch: 2.8.0+cu126
1560
+ - Accelerate: 1.10.1
1561
+ - Datasets: 4.0.0
1562
+ - Tokenizers: 0.21.4
1563
+
1564
+ ## Citation
1565
+
1566
+ ### BibTeX
1567
+
1568
+ #### Sentence Transformers
1569
+ ```bibtex
1570
+ @inproceedings{reimers-2019-sentence-bert,
1571
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
1572
+ author = "Reimers, Nils and Gurevych, Iryna",
1573
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
1574
+ month = "11",
1575
+ year = "2019",
1576
+ publisher = "Association for Computational Linguistics",
1577
+ url = "https://arxiv.org/abs/1908.10084",
1578
+ }
1579
+ ```
1580
+
1581
+ #### MatryoshkaLoss
1582
+ ```bibtex
1583
+ @misc{kusupati2024matryoshka,
1584
+ title={Matryoshka Representation Learning},
1585
+ author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
1586
+ year={2024},
1587
+ eprint={2205.13147},
1588
+ archivePrefix={arXiv},
1589
+ primaryClass={cs.LG}
1590
+ }
1591
+ ```
1592
+
1593
+ #### MultipleNegativesRankingLoss
1594
+ ```bibtex
1595
+ @misc{henderson2017efficient,
1596
+ title={Efficient Natural Language Response Suggestion for Smart Reply},
1597
+ author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
1598
+ year={2017},
1599
+ eprint={1705.00652},
1600
+ archivePrefix={arXiv},
1601
+ primaryClass={cs.CL}
1602
+ }
1603
+ ```
1604
+
1605
+ <!--
1606
+ ## Glossary
1607
+
1608
+ *Clearly define terms in order to be accessible across audiences.*
1609
+ -->
1610
+
1611
+ <!--
1612
+ ## Model Card Authors
1613
+
1614
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
1615
+ -->
1616
+
1617
+ <!--
1618
+ ## Model Card Contact
1619
+
1620
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
1621
+ -->
checkpoint-98/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "XLMRobertaModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 4096,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 514,
16
+ "model_type": "xlm-roberta",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 24,
19
+ "output_past": true,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.51.3",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 250002
27
+ }
checkpoint-98/config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "SentenceTransformer",
3
+ "__version__": {
4
+ "sentence_transformers": "5.1.0",
5
+ "transformers": "4.51.3",
6
+ "pytorch": "2.8.0+cu126"
7
+ },
8
+ "prompts": {
9
+ "query": "",
10
+ "document": ""
11
+ },
12
+ "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
14
+ }
checkpoint-98/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8b8678a200e1ec3a97ec08f700f81cc6660e581d09862b47b576834736c0668
3
+ size 2239607176
checkpoint-98/modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
checkpoint-98/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43981d3b1c8c7efd9d147726925594fec137b1d2137148a81f15c7a1d493486a
3
+ size 4471067142
checkpoint-98/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4f05f697e2a026dbb8be0397c5f3215957e05bbf5897dea20c686e5f8917f13
3
+ size 14645
checkpoint-98/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8050407fb4fa517140d91f016be515b027290100821411e470b937a3a98f10c3
3
+ size 1465
checkpoint-98/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
checkpoint-98/sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
checkpoint-98/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
checkpoint-98/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:883b037111086fd4dfebbbc9b7cee11e1517b5e0c0514879478661440f137085
3
+ size 17082987
checkpoint-98/tokenizer_config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "extra_special_tokens": {},
49
+ "mask_token": "<mask>",
50
+ "max_length": 512,
51
+ "model_max_length": 512,
52
+ "pad_to_multiple_of": null,
53
+ "pad_token": "<pad>",
54
+ "pad_token_type_id": 0,
55
+ "padding_side": "right",
56
+ "sep_token": "</s>",
57
+ "stride": 0,
58
+ "tokenizer_class": "XLMRobertaTokenizer",
59
+ "truncation_side": "right",
60
+ "truncation_strategy": "longest_first",
61
+ "unk_token": "<unk>"
62
+ }
checkpoint-98/trainer_state.json ADDED
@@ -0,0 +1,827 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 98,
3
+ "best_metric": 0.3312285498294292,
4
+ "best_model_checkpoint": "intfloat/multilingual-e5-large/checkpoint-98",
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 98,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.01020408163265306,
14
+ "grad_norm": 973.273681640625,
15
+ "learning_rate": 0.0,
16
+ "loss": 15.8588,
17
+ "step": 1
18
+ },
19
+ {
20
+ "epoch": 0.02040816326530612,
21
+ "grad_norm": 1016.8517456054688,
22
+ "learning_rate": 1.0204081632653061e-07,
23
+ "loss": 10.7411,
24
+ "step": 2
25
+ },
26
+ {
27
+ "epoch": 0.030612244897959183,
28
+ "grad_norm": 166.88465881347656,
29
+ "learning_rate": 2.0408163265306121e-07,
30
+ "loss": 1.3873,
31
+ "step": 3
32
+ },
33
+ {
34
+ "epoch": 0.04081632653061224,
35
+ "grad_norm": 108.06741333007812,
36
+ "learning_rate": 3.0612244897959183e-07,
37
+ "loss": 0.9088,
38
+ "step": 4
39
+ },
40
+ {
41
+ "epoch": 0.05102040816326531,
42
+ "grad_norm": 1.1959134340286255,
43
+ "learning_rate": 4.0816326530612243e-07,
44
+ "loss": 0.0077,
45
+ "step": 5
46
+ },
47
+ {
48
+ "epoch": 0.061224489795918366,
49
+ "grad_norm": 130.83908081054688,
50
+ "learning_rate": 5.102040816326531e-07,
51
+ "loss": 0.6016,
52
+ "step": 6
53
+ },
54
+ {
55
+ "epoch": 0.07142857142857142,
56
+ "grad_norm": 318.3863525390625,
57
+ "learning_rate": 6.122448979591837e-07,
58
+ "loss": 1.6714,
59
+ "step": 7
60
+ },
61
+ {
62
+ "epoch": 0.08163265306122448,
63
+ "grad_norm": 74.26002502441406,
64
+ "learning_rate": 7.142857142857143e-07,
65
+ "loss": 0.4211,
66
+ "step": 8
67
+ },
68
+ {
69
+ "epoch": 0.09183673469387756,
70
+ "grad_norm": 32.4500846862793,
71
+ "learning_rate": 8.163265306122449e-07,
72
+ "loss": 0.1996,
73
+ "step": 9
74
+ },
75
+ {
76
+ "epoch": 0.10204081632653061,
77
+ "grad_norm": 41.27345275878906,
78
+ "learning_rate": 9.183673469387756e-07,
79
+ "loss": 0.1895,
80
+ "step": 10
81
+ },
82
+ {
83
+ "epoch": 0.11224489795918367,
84
+ "grad_norm": 27.35291862487793,
85
+ "learning_rate": 1.0204081632653063e-06,
86
+ "loss": 0.1358,
87
+ "step": 11
88
+ },
89
+ {
90
+ "epoch": 0.12244897959183673,
91
+ "grad_norm": 103.75244903564453,
92
+ "learning_rate": 1.122448979591837e-06,
93
+ "loss": 0.5552,
94
+ "step": 12
95
+ },
96
+ {
97
+ "epoch": 0.1326530612244898,
98
+ "grad_norm": 155.97923278808594,
99
+ "learning_rate": 1.2244897959183673e-06,
100
+ "loss": 0.5141,
101
+ "step": 13
102
+ },
103
+ {
104
+ "epoch": 0.14285714285714285,
105
+ "grad_norm": 53.757484436035156,
106
+ "learning_rate": 1.3265306122448982e-06,
107
+ "loss": 0.1955,
108
+ "step": 14
109
+ },
110
+ {
111
+ "epoch": 0.15306122448979592,
112
+ "grad_norm": 175.17491149902344,
113
+ "learning_rate": 1.4285714285714286e-06,
114
+ "loss": 1.9114,
115
+ "step": 15
116
+ },
117
+ {
118
+ "epoch": 0.16326530612244897,
119
+ "grad_norm": 49.02252197265625,
120
+ "learning_rate": 1.5306122448979593e-06,
121
+ "loss": 0.2645,
122
+ "step": 16
123
+ },
124
+ {
125
+ "epoch": 0.17346938775510204,
126
+ "grad_norm": 999.3756103515625,
127
+ "learning_rate": 1.6326530612244897e-06,
128
+ "loss": 7.5545,
129
+ "step": 17
130
+ },
131
+ {
132
+ "epoch": 0.1836734693877551,
133
+ "grad_norm": 149.2627410888672,
134
+ "learning_rate": 1.7346938775510206e-06,
135
+ "loss": 0.4297,
136
+ "step": 18
137
+ },
138
+ {
139
+ "epoch": 0.19387755102040816,
140
+ "grad_norm": 204.95181274414062,
141
+ "learning_rate": 1.8367346938775512e-06,
142
+ "loss": 0.678,
143
+ "step": 19
144
+ },
145
+ {
146
+ "epoch": 0.20408163265306123,
147
+ "grad_norm": 103.94851684570312,
148
+ "learning_rate": 1.938775510204082e-06,
149
+ "loss": 0.4634,
150
+ "step": 20
151
+ },
152
+ {
153
+ "epoch": 0.21428571428571427,
154
+ "grad_norm": 536.7100219726562,
155
+ "learning_rate": 2.0408163265306125e-06,
156
+ "loss": 4.2252,
157
+ "step": 21
158
+ },
159
+ {
160
+ "epoch": 0.22448979591836735,
161
+ "grad_norm": 444.44805908203125,
162
+ "learning_rate": 2.1428571428571427e-06,
163
+ "loss": 3.9985,
164
+ "step": 22
165
+ },
166
+ {
167
+ "epoch": 0.23469387755102042,
168
+ "grad_norm": 170.50369262695312,
169
+ "learning_rate": 2.244897959183674e-06,
170
+ "loss": 1.9242,
171
+ "step": 23
172
+ },
173
+ {
174
+ "epoch": 0.24489795918367346,
175
+ "grad_norm": 626.5487060546875,
176
+ "learning_rate": 2.3469387755102044e-06,
177
+ "loss": 3.2716,
178
+ "step": 24
179
+ },
180
+ {
181
+ "epoch": 0.25510204081632654,
182
+ "grad_norm": 51.353050231933594,
183
+ "learning_rate": 2.4489795918367347e-06,
184
+ "loss": 0.123,
185
+ "step": 25
186
+ },
187
+ {
188
+ "epoch": 0.2653061224489796,
189
+ "grad_norm": 108.25341796875,
190
+ "learning_rate": 2.5510204081632657e-06,
191
+ "loss": 1.0011,
192
+ "step": 26
193
+ },
194
+ {
195
+ "epoch": 0.2755102040816326,
196
+ "grad_norm": 322.83502197265625,
197
+ "learning_rate": 2.6530612244897964e-06,
198
+ "loss": 3.5846,
199
+ "step": 27
200
+ },
201
+ {
202
+ "epoch": 0.2857142857142857,
203
+ "grad_norm": 203.38458251953125,
204
+ "learning_rate": 2.7551020408163266e-06,
205
+ "loss": 1.1365,
206
+ "step": 28
207
+ },
208
+ {
209
+ "epoch": 0.29591836734693877,
210
+ "grad_norm": 127.78427124023438,
211
+ "learning_rate": 2.8571428571428573e-06,
212
+ "loss": 0.7149,
213
+ "step": 29
214
+ },
215
+ {
216
+ "epoch": 0.30612244897959184,
217
+ "grad_norm": 283.67645263671875,
218
+ "learning_rate": 2.959183673469388e-06,
219
+ "loss": 1.2629,
220
+ "step": 30
221
+ },
222
+ {
223
+ "epoch": 0.3163265306122449,
224
+ "grad_norm": 82.65542602539062,
225
+ "learning_rate": 3.0612244897959185e-06,
226
+ "loss": 0.6459,
227
+ "step": 31
228
+ },
229
+ {
230
+ "epoch": 0.32653061224489793,
231
+ "grad_norm": 42.66185760498047,
232
+ "learning_rate": 3.1632653061224496e-06,
233
+ "loss": 0.1934,
234
+ "step": 32
235
+ },
236
+ {
237
+ "epoch": 0.336734693877551,
238
+ "grad_norm": 212.1294708251953,
239
+ "learning_rate": 3.2653061224489794e-06,
240
+ "loss": 1.4897,
241
+ "step": 33
242
+ },
243
+ {
244
+ "epoch": 0.3469387755102041,
245
+ "grad_norm": 188.0417022705078,
246
+ "learning_rate": 3.3673469387755105e-06,
247
+ "loss": 0.8561,
248
+ "step": 34
249
+ },
250
+ {
251
+ "epoch": 0.35714285714285715,
252
+ "grad_norm": 2.0467610359191895,
253
+ "learning_rate": 3.469387755102041e-06,
254
+ "loss": 0.0128,
255
+ "step": 35
256
+ },
257
+ {
258
+ "epoch": 0.3673469387755102,
259
+ "grad_norm": 283.3966979980469,
260
+ "learning_rate": 3.5714285714285718e-06,
261
+ "loss": 1.4952,
262
+ "step": 36
263
+ },
264
+ {
265
+ "epoch": 0.37755102040816324,
266
+ "grad_norm": 60.74869155883789,
267
+ "learning_rate": 3.6734693877551024e-06,
268
+ "loss": 0.3181,
269
+ "step": 37
270
+ },
271
+ {
272
+ "epoch": 0.3877551020408163,
273
+ "grad_norm": 824.6165771484375,
274
+ "learning_rate": 3.7755102040816327e-06,
275
+ "loss": 6.3681,
276
+ "step": 38
277
+ },
278
+ {
279
+ "epoch": 0.3979591836734694,
280
+ "grad_norm": 231.1636962890625,
281
+ "learning_rate": 3.877551020408164e-06,
282
+ "loss": 1.4487,
283
+ "step": 39
284
+ },
285
+ {
286
+ "epoch": 0.40816326530612246,
287
+ "grad_norm": 26.46611785888672,
288
+ "learning_rate": 3.979591836734694e-06,
289
+ "loss": 0.1702,
290
+ "step": 40
291
+ },
292
+ {
293
+ "epoch": 0.41836734693877553,
294
+ "grad_norm": 75.88525390625,
295
+ "learning_rate": 4.081632653061225e-06,
296
+ "loss": 0.2513,
297
+ "step": 41
298
+ },
299
+ {
300
+ "epoch": 0.42857142857142855,
301
+ "grad_norm": 465.83392333984375,
302
+ "learning_rate": 4.183673469387755e-06,
303
+ "loss": 4.1595,
304
+ "step": 42
305
+ },
306
+ {
307
+ "epoch": 0.4387755102040816,
308
+ "grad_norm": 306.2772521972656,
309
+ "learning_rate": 4.2857142857142855e-06,
310
+ "loss": 2.7347,
311
+ "step": 43
312
+ },
313
+ {
314
+ "epoch": 0.4489795918367347,
315
+ "grad_norm": 488.9759521484375,
316
+ "learning_rate": 4.3877551020408165e-06,
317
+ "loss": 2.3182,
318
+ "step": 44
319
+ },
320
+ {
321
+ "epoch": 0.45918367346938777,
322
+ "grad_norm": 355.1698913574219,
323
+ "learning_rate": 4.489795918367348e-06,
324
+ "loss": 1.3285,
325
+ "step": 45
326
+ },
327
+ {
328
+ "epoch": 0.46938775510204084,
329
+ "grad_norm": 263.558349609375,
330
+ "learning_rate": 4.591836734693878e-06,
331
+ "loss": 2.1155,
332
+ "step": 46
333
+ },
334
+ {
335
+ "epoch": 0.47959183673469385,
336
+ "grad_norm": 9.667963981628418,
337
+ "learning_rate": 4.693877551020409e-06,
338
+ "loss": 0.0645,
339
+ "step": 47
340
+ },
341
+ {
342
+ "epoch": 0.4897959183673469,
343
+ "grad_norm": 957.79345703125,
344
+ "learning_rate": 4.795918367346939e-06,
345
+ "loss": 7.1283,
346
+ "step": 48
347
+ },
348
+ {
349
+ "epoch": 0.5,
350
+ "grad_norm": 160.0965118408203,
351
+ "learning_rate": 4.897959183673469e-06,
352
+ "loss": 0.711,
353
+ "step": 49
354
+ },
355
+ {
356
+ "epoch": 0.5102040816326531,
357
+ "grad_norm": 93.697265625,
358
+ "learning_rate": 5e-06,
359
+ "loss": 0.4716,
360
+ "step": 50
361
+ },
362
+ {
363
+ "epoch": 0.5204081632653061,
364
+ "grad_norm": 292.9518737792969,
365
+ "learning_rate": 5.1020408163265315e-06,
366
+ "loss": 2.2895,
367
+ "step": 51
368
+ },
369
+ {
370
+ "epoch": 0.5306122448979592,
371
+ "grad_norm": 335.4564514160156,
372
+ "learning_rate": 5.204081632653062e-06,
373
+ "loss": 1.9235,
374
+ "step": 52
375
+ },
376
+ {
377
+ "epoch": 0.5408163265306123,
378
+ "grad_norm": 138.63575744628906,
379
+ "learning_rate": 5.306122448979593e-06,
380
+ "loss": 0.8777,
381
+ "step": 53
382
+ },
383
+ {
384
+ "epoch": 0.5510204081632653,
385
+ "grad_norm": 1.011594533920288,
386
+ "learning_rate": 5.408163265306123e-06,
387
+ "loss": 0.0038,
388
+ "step": 54
389
+ },
390
+ {
391
+ "epoch": 0.5612244897959183,
392
+ "grad_norm": 506.25152587890625,
393
+ "learning_rate": 5.510204081632653e-06,
394
+ "loss": 1.5598,
395
+ "step": 55
396
+ },
397
+ {
398
+ "epoch": 0.5714285714285714,
399
+ "grad_norm": 2.2550530433654785,
400
+ "learning_rate": 5.6122448979591834e-06,
401
+ "loss": 0.0177,
402
+ "step": 56
403
+ },
404
+ {
405
+ "epoch": 0.5816326530612245,
406
+ "grad_norm": 13.93323802947998,
407
+ "learning_rate": 5.7142857142857145e-06,
408
+ "loss": 0.0837,
409
+ "step": 57
410
+ },
411
+ {
412
+ "epoch": 0.5918367346938775,
413
+ "grad_norm": 7.279649257659912,
414
+ "learning_rate": 5.816326530612246e-06,
415
+ "loss": 0.0429,
416
+ "step": 58
417
+ },
418
+ {
419
+ "epoch": 0.6020408163265306,
420
+ "grad_norm": 0.9923371076583862,
421
+ "learning_rate": 5.918367346938776e-06,
422
+ "loss": 0.0071,
423
+ "step": 59
424
+ },
425
+ {
426
+ "epoch": 0.6122448979591837,
427
+ "grad_norm": 743.8301391601562,
428
+ "learning_rate": 6.020408163265307e-06,
429
+ "loss": 2.7217,
430
+ "step": 60
431
+ },
432
+ {
433
+ "epoch": 0.6224489795918368,
434
+ "grad_norm": 227.04403686523438,
435
+ "learning_rate": 6.122448979591837e-06,
436
+ "loss": 3.9013,
437
+ "step": 61
438
+ },
439
+ {
440
+ "epoch": 0.6326530612244898,
441
+ "grad_norm": 193.12701416015625,
442
+ "learning_rate": 6.224489795918368e-06,
443
+ "loss": 1.417,
444
+ "step": 62
445
+ },
446
+ {
447
+ "epoch": 0.6428571428571429,
448
+ "grad_norm": 642.7814331054688,
449
+ "learning_rate": 6.326530612244899e-06,
450
+ "loss": 3.5854,
451
+ "step": 63
452
+ },
453
+ {
454
+ "epoch": 0.6530612244897959,
455
+ "grad_norm": 1007.544189453125,
456
+ "learning_rate": 6.4285714285714295e-06,
457
+ "loss": 12.918,
458
+ "step": 64
459
+ },
460
+ {
461
+ "epoch": 0.6632653061224489,
462
+ "grad_norm": 1310.942138671875,
463
+ "learning_rate": 6.530612244897959e-06,
464
+ "loss": 7.1566,
465
+ "step": 65
466
+ },
467
+ {
468
+ "epoch": 0.673469387755102,
469
+ "grad_norm": 810.1301879882812,
470
+ "learning_rate": 6.63265306122449e-06,
471
+ "loss": 3.9897,
472
+ "step": 66
473
+ },
474
+ {
475
+ "epoch": 0.6836734693877551,
476
+ "grad_norm": 513.1759643554688,
477
+ "learning_rate": 6.734693877551021e-06,
478
+ "loss": 8.1139,
479
+ "step": 67
480
+ },
481
+ {
482
+ "epoch": 0.6938775510204082,
483
+ "grad_norm": 1414.8878173828125,
484
+ "learning_rate": 6.836734693877551e-06,
485
+ "loss": 5.7005,
486
+ "step": 68
487
+ },
488
+ {
489
+ "epoch": 0.7040816326530612,
490
+ "grad_norm": 31.607126235961914,
491
+ "learning_rate": 6.938775510204082e-06,
492
+ "loss": 0.1219,
493
+ "step": 69
494
+ },
495
+ {
496
+ "epoch": 0.7142857142857143,
497
+ "grad_norm": 799.9751586914062,
498
+ "learning_rate": 7.0408163265306125e-06,
499
+ "loss": 5.7849,
500
+ "step": 70
501
+ },
502
+ {
503
+ "epoch": 0.7244897959183674,
504
+ "grad_norm": 132.71778869628906,
505
+ "learning_rate": 7.1428571428571436e-06,
506
+ "loss": 1.0726,
507
+ "step": 71
508
+ },
509
+ {
510
+ "epoch": 0.7346938775510204,
511
+ "grad_norm": 256.61041259765625,
512
+ "learning_rate": 7.244897959183675e-06,
513
+ "loss": 1.2599,
514
+ "step": 72
515
+ },
516
+ {
517
+ "epoch": 0.7448979591836735,
518
+ "grad_norm": 192.0435333251953,
519
+ "learning_rate": 7.346938775510205e-06,
520
+ "loss": 0.6473,
521
+ "step": 73
522
+ },
523
+ {
524
+ "epoch": 0.7551020408163265,
525
+ "grad_norm": 293.7915954589844,
526
+ "learning_rate": 7.448979591836736e-06,
527
+ "loss": 1.0397,
528
+ "step": 74
529
+ },
530
+ {
531
+ "epoch": 0.7653061224489796,
532
+ "grad_norm": 312.2645263671875,
533
+ "learning_rate": 7.551020408163265e-06,
534
+ "loss": 1.5555,
535
+ "step": 75
536
+ },
537
+ {
538
+ "epoch": 0.7755102040816326,
539
+ "grad_norm": 1.417815923690796,
540
+ "learning_rate": 7.653061224489796e-06,
541
+ "loss": 0.0078,
542
+ "step": 76
543
+ },
544
+ {
545
+ "epoch": 0.7857142857142857,
546
+ "grad_norm": 1.4391653537750244,
547
+ "learning_rate": 7.755102040816327e-06,
548
+ "loss": 0.0048,
549
+ "step": 77
550
+ },
551
+ {
552
+ "epoch": 0.7959183673469388,
553
+ "grad_norm": 5.628185749053955,
554
+ "learning_rate": 7.857142857142858e-06,
555
+ "loss": 0.0323,
556
+ "step": 78
557
+ },
558
+ {
559
+ "epoch": 0.8061224489795918,
560
+ "grad_norm": 264.5353698730469,
561
+ "learning_rate": 7.959183673469388e-06,
562
+ "loss": 1.7425,
563
+ "step": 79
564
+ },
565
+ {
566
+ "epoch": 0.8163265306122449,
567
+ "grad_norm": 1.5278851985931396,
568
+ "learning_rate": 8.06122448979592e-06,
569
+ "loss": 0.0035,
570
+ "step": 80
571
+ },
572
+ {
573
+ "epoch": 0.826530612244898,
574
+ "grad_norm": 932.3336181640625,
575
+ "learning_rate": 8.16326530612245e-06,
576
+ "loss": 6.4849,
577
+ "step": 81
578
+ },
579
+ {
580
+ "epoch": 0.8367346938775511,
581
+ "grad_norm": 635.4749145507812,
582
+ "learning_rate": 8.26530612244898e-06,
583
+ "loss": 4.3767,
584
+ "step": 82
585
+ },
586
+ {
587
+ "epoch": 0.8469387755102041,
588
+ "grad_norm": 8.875201225280762,
589
+ "learning_rate": 8.36734693877551e-06,
590
+ "loss": 0.0186,
591
+ "step": 83
592
+ },
593
+ {
594
+ "epoch": 0.8571428571428571,
595
+ "grad_norm": 0.15500876307487488,
596
+ "learning_rate": 8.469387755102042e-06,
597
+ "loss": 0.0008,
598
+ "step": 84
599
+ },
600
+ {
601
+ "epoch": 0.8673469387755102,
602
+ "grad_norm": 269.5357666015625,
603
+ "learning_rate": 8.571428571428571e-06,
604
+ "loss": 0.8354,
605
+ "step": 85
606
+ },
607
+ {
608
+ "epoch": 0.8775510204081632,
609
+ "grad_norm": 5.054287910461426,
610
+ "learning_rate": 8.673469387755103e-06,
611
+ "loss": 0.0162,
612
+ "step": 86
613
+ },
614
+ {
615
+ "epoch": 0.8877551020408163,
616
+ "grad_norm": 84.90735626220703,
617
+ "learning_rate": 8.775510204081633e-06,
618
+ "loss": 0.1282,
619
+ "step": 87
620
+ },
621
+ {
622
+ "epoch": 0.8979591836734694,
623
+ "grad_norm": 81.53719329833984,
624
+ "learning_rate": 8.877551020408163e-06,
625
+ "loss": 0.4514,
626
+ "step": 88
627
+ },
628
+ {
629
+ "epoch": 0.9081632653061225,
630
+ "grad_norm": 547.4005126953125,
631
+ "learning_rate": 8.979591836734695e-06,
632
+ "loss": 4.9103,
633
+ "step": 89
634
+ },
635
+ {
636
+ "epoch": 0.9183673469387755,
637
+ "grad_norm": 25.792213439941406,
638
+ "learning_rate": 9.081632653061225e-06,
639
+ "loss": 0.0762,
640
+ "step": 90
641
+ },
642
+ {
643
+ "epoch": 0.9285714285714286,
644
+ "grad_norm": 10.455421447753906,
645
+ "learning_rate": 9.183673469387756e-06,
646
+ "loss": 0.0444,
647
+ "step": 91
648
+ },
649
+ {
650
+ "epoch": 0.9387755102040817,
651
+ "grad_norm": 472.54376220703125,
652
+ "learning_rate": 9.285714285714288e-06,
653
+ "loss": 1.8609,
654
+ "step": 92
655
+ },
656
+ {
657
+ "epoch": 0.9489795918367347,
658
+ "grad_norm": 31.092357635498047,
659
+ "learning_rate": 9.387755102040818e-06,
660
+ "loss": 0.1489,
661
+ "step": 93
662
+ },
663
+ {
664
+ "epoch": 0.9591836734693877,
665
+ "grad_norm": 231.94151306152344,
666
+ "learning_rate": 9.489795918367348e-06,
667
+ "loss": 0.5926,
668
+ "step": 94
669
+ },
670
+ {
671
+ "epoch": 0.9693877551020408,
672
+ "grad_norm": 211.05117797851562,
673
+ "learning_rate": 9.591836734693878e-06,
674
+ "loss": 0.5344,
675
+ "step": 95
676
+ },
677
+ {
678
+ "epoch": 0.9795918367346939,
679
+ "grad_norm": 217.01339721679688,
680
+ "learning_rate": 9.693877551020408e-06,
681
+ "loss": 0.4693,
682
+ "step": 96
683
+ },
684
+ {
685
+ "epoch": 0.9897959183673469,
686
+ "grad_norm": 1123.96484375,
687
+ "learning_rate": 9.795918367346939e-06,
688
+ "loss": 9.2282,
689
+ "step": 97
690
+ },
691
+ {
692
+ "epoch": 1.0,
693
+ "grad_norm": 741.597412109375,
694
+ "learning_rate": 9.89795918367347e-06,
695
+ "loss": 4.6238,
696
+ "step": 98
697
+ },
698
+ {
699
+ "epoch": 1.0,
700
+ "eval_dim_1024_cosine_accuracy@1": 0.36235595390524966,
701
+ "eval_dim_1024_cosine_accuracy@10": 0.4334186939820743,
702
+ "eval_dim_1024_cosine_accuracy@3": 0.3681177976952625,
703
+ "eval_dim_1024_cosine_accuracy@5": 0.39308578745198464,
704
+ "eval_dim_1024_cosine_map@100": 0.45394800707643057,
705
+ "eval_dim_1024_cosine_mrr@10": 0.37430415828303115,
706
+ "eval_dim_1024_cosine_ndcg@10": 0.3858809020056271,
707
+ "eval_dim_1024_cosine_precision@1": 0.36235595390524966,
708
+ "eval_dim_1024_cosine_precision@10": 0.3176696542893726,
709
+ "eval_dim_1024_cosine_precision@3": 0.36192915066154496,
710
+ "eval_dim_1024_cosine_precision@5": 0.35172855313700385,
711
+ "eval_dim_1024_cosine_recall@1": 0.04346309464734114,
712
+ "eval_dim_1024_cosine_recall@10": 0.28096984500258326,
713
+ "eval_dim_1024_cosine_recall@3": 0.12757812796185336,
714
+ "eval_dim_1024_cosine_recall@5": 0.19200836801442767,
715
+ "eval_dim_128_cosine_accuracy@1": 0.3085787451984635,
716
+ "eval_dim_128_cosine_accuracy@10": 0.37964148527528807,
717
+ "eval_dim_128_cosine_accuracy@3": 0.31241997439180536,
718
+ "eval_dim_128_cosine_accuracy@5": 0.3361075544174136,
719
+ "eval_dim_128_cosine_map@100": 0.3963095303049961,
720
+ "eval_dim_128_cosine_mrr@10": 0.3199812511432227,
721
+ "eval_dim_128_cosine_ndcg@10": 0.3312285498294292,
722
+ "eval_dim_128_cosine_precision@1": 0.3085787451984635,
723
+ "eval_dim_128_cosine_precision@10": 0.2752880921895006,
724
+ "eval_dim_128_cosine_precision@3": 0.3079385403329065,
725
+ "eval_dim_128_cosine_precision@5": 0.29961587708066584,
726
+ "eval_dim_128_cosine_recall@1": 0.036297623853982414,
727
+ "eval_dim_128_cosine_recall@10": 0.24000960695821508,
728
+ "eval_dim_128_cosine_recall@3": 0.10638786483158841,
729
+ "eval_dim_128_cosine_recall@5": 0.16032639984514846,
730
+ "eval_dim_256_cosine_accuracy@1": 0.3437900128040973,
731
+ "eval_dim_256_cosine_accuracy@10": 0.41101152368758004,
732
+ "eval_dim_256_cosine_accuracy@3": 0.34763124199743917,
733
+ "eval_dim_256_cosine_accuracy@5": 0.3764404609475032,
734
+ "eval_dim_256_cosine_map@100": 0.4298669852983799,
735
+ "eval_dim_256_cosine_mrr@10": 0.3551361197487955,
736
+ "eval_dim_256_cosine_ndcg@10": 0.3670052960875804,
737
+ "eval_dim_256_cosine_precision@1": 0.3437900128040973,
738
+ "eval_dim_256_cosine_precision@10": 0.3040973111395647,
739
+ "eval_dim_256_cosine_precision@3": 0.342936406316688,
740
+ "eval_dim_256_cosine_precision@5": 0.33457106274007686,
741
+ "eval_dim_256_cosine_recall@1": 0.04013102608834382,
742
+ "eval_dim_256_cosine_recall@10": 0.2648598688529433,
743
+ "eval_dim_256_cosine_recall@3": 0.11771735023719074,
744
+ "eval_dim_256_cosine_recall@5": 0.17837935755014916,
745
+ "eval_dim_512_cosine_accuracy@1": 0.35979513444302175,
746
+ "eval_dim_512_cosine_accuracy@10": 0.4334186939820743,
747
+ "eval_dim_512_cosine_accuracy@3": 0.36555697823303457,
748
+ "eval_dim_512_cosine_accuracy@5": 0.3911651728553137,
749
+ "eval_dim_512_cosine_map@100": 0.4476805587612892,
750
+ "eval_dim_512_cosine_mrr@10": 0.37212542934373866,
751
+ "eval_dim_512_cosine_ndcg@10": 0.3843750966464458,
752
+ "eval_dim_512_cosine_precision@1": 0.35979513444302175,
753
+ "eval_dim_512_cosine_precision@10": 0.3173495518565941,
754
+ "eval_dim_512_cosine_precision@3": 0.35936833119931705,
755
+ "eval_dim_512_cosine_precision@5": 0.34967989756722156,
756
+ "eval_dim_512_cosine_recall@1": 0.04265405128130224,
757
+ "eval_dim_512_cosine_recall@10": 0.2781876565001863,
758
+ "eval_dim_512_cosine_recall@3": 0.12523102347193127,
759
+ "eval_dim_512_cosine_recall@5": 0.18912519336740205,
760
+ "eval_dim_64_cosine_accuracy@1": 0.2740076824583867,
761
+ "eval_dim_64_cosine_accuracy@10": 0.3354673495518566,
762
+ "eval_dim_64_cosine_accuracy@3": 0.27848911651728553,
763
+ "eval_dim_64_cosine_accuracy@5": 0.30153649167733676,
764
+ "eval_dim_64_cosine_map@100": 0.3539045084602349,
765
+ "eval_dim_64_cosine_mrr@10": 0.28429414873076814,
766
+ "eval_dim_64_cosine_ndcg@10": 0.29402896525927075,
767
+ "eval_dim_64_cosine_precision@1": 0.2740076824583867,
768
+ "eval_dim_64_cosine_precision@10": 0.24571062740076827,
769
+ "eval_dim_64_cosine_precision@3": 0.27315407597097735,
770
+ "eval_dim_64_cosine_precision@5": 0.2670934699103713,
771
+ "eval_dim_64_cosine_recall@1": 0.03167890172057568,
772
+ "eval_dim_64_cosine_recall@10": 0.21092883720941633,
773
+ "eval_dim_64_cosine_recall@3": 0.09267023360511464,
774
+ "eval_dim_64_cosine_recall@5": 0.14048625468314752,
775
+ "eval_dim_768_cosine_accuracy@1": 0.3591549295774648,
776
+ "eval_dim_768_cosine_accuracy@10": 0.4334186939820743,
777
+ "eval_dim_768_cosine_accuracy@3": 0.3649167733674776,
778
+ "eval_dim_768_cosine_accuracy@5": 0.3892445582586428,
779
+ "eval_dim_768_cosine_map@100": 0.4493001842217619,
780
+ "eval_dim_768_cosine_mrr@10": 0.37149335406377615,
781
+ "eval_dim_768_cosine_ndcg@10": 0.38308181752122755,
782
+ "eval_dim_768_cosine_precision@1": 0.3591549295774648,
783
+ "eval_dim_768_cosine_precision@10": 0.31670934699103714,
784
+ "eval_dim_768_cosine_precision@3": 0.3587281263337601,
785
+ "eval_dim_768_cosine_precision@5": 0.34852752880921894,
786
+ "eval_dim_768_cosine_recall@1": 0.04250079684114586,
787
+ "eval_dim_768_cosine_recall@10": 0.27695909667507057,
788
+ "eval_dim_768_cosine_recall@3": 0.12462187901616553,
789
+ "eval_dim_768_cosine_recall@5": 0.1875478484365334,
790
+ "eval_runtime": 99.0843,
791
+ "eval_samples_per_second": 0.0,
792
+ "eval_sequential_score": 0.29402896525927075,
793
+ "eval_steps_per_second": 0.0,
794
+ "step": 98
795
+ }
796
+ ],
797
+ "logging_steps": 1,
798
+ "max_steps": 1960,
799
+ "num_input_tokens_seen": 0,
800
+ "num_train_epochs": 20,
801
+ "save_steps": 500,
802
+ "stateful_callbacks": {
803
+ "EarlyStoppingCallback": {
804
+ "args": {
805
+ "early_stopping_patience": 2,
806
+ "early_stopping_threshold": 0.0
807
+ },
808
+ "attributes": {
809
+ "early_stopping_patience_counter": 0
810
+ }
811
+ },
812
+ "TrainerControl": {
813
+ "args": {
814
+ "should_epoch_stop": false,
815
+ "should_evaluate": false,
816
+ "should_log": false,
817
+ "should_save": true,
818
+ "should_training_stop": false
819
+ },
820
+ "attributes": {}
821
+ }
822
+ },
823
+ "total_flos": 0.0,
824
+ "train_batch_size": 2,
825
+ "trial_name": null,
826
+ "trial_params": null
827
+ }