radoslavralev commited on
Commit
9a2cf68
·
verified ·
1 Parent(s): 2fe69a4

Training in progress, step 5000

Browse files
config.json CHANGED
@@ -5,6 +5,7 @@
5
  "attention_probs_dropout_prob": 0.1,
6
  "classifier_dropout": null,
7
  "dtype": "bfloat16",
 
8
  "hidden_act": "gelu",
9
  "hidden_dropout_prob": 0.1,
10
  "hidden_size": 384,
@@ -14,7 +15,7 @@
14
  "max_position_embeddings": 512,
15
  "model_type": "bert",
16
  "num_attention_heads": 12,
17
- "num_hidden_layers": 12,
18
  "pad_token_id": 0,
19
  "position_embedding_type": "absolute",
20
  "transformers_version": "4.57.0",
 
5
  "attention_probs_dropout_prob": 0.1,
6
  "classifier_dropout": null,
7
  "dtype": "bfloat16",
8
+ "gradient_checkpointing": false,
9
  "hidden_act": "gelu",
10
  "hidden_dropout_prob": 0.1,
11
  "hidden_size": 384,
 
15
  "max_position_embeddings": 512,
16
  "model_type": "bert",
17
  "num_attention_heads": 12,
18
+ "num_hidden_layers": 6,
19
  "pad_token_id": 0,
20
  "position_embedding_type": "absolute",
21
  "transformers_version": "4.57.0",
eval/Information-Retrieval_evaluation_test_results.csv CHANGED
@@ -60,3 +60,27 @@ epoch,steps,cosine-Accuracy@1,cosine-Precision@1,cosine-Recall@1,cosine-MRR@1,co
60
  0.49206544470414565,4000,0.5136291460832745,0.5136291460832745,0.49609913213697615,0.5136291460832745,0.7094132063717694,0.6539726048972176,{1: np.float64(0.3119784463901498)},{1: np.float64(0.21389626227951036)}
61
  0.5535736252921638,4500,0.5121736062103035,0.5121736062103035,0.49458110696642804,0.5121736062103035,0.7075051193793919,0.6522760525794585,{1: np.float64(0.31343196470110707)},{1: np.float64(0.21483213049895286)}
62
  0.615081805880182,5000,0.5073659139026111,0.5073659139026111,0.48995940030917096,0.5073659139026111,0.704125968555372,0.6486868844121669,{1: np.float64(0.30976968186914705)},{1: np.float64(0.21846349203066784)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  0.49206544470414565,4000,0.5136291460832745,0.5136291460832745,0.49609913213697615,0.5136291460832745,0.7094132063717694,0.6539726048972176,{1: np.float64(0.3119784463901498)},{1: np.float64(0.21389626227951036)}
61
  0.5535736252921638,4500,0.5121736062103035,0.5121736062103035,0.49458110696642804,0.5121736062103035,0.7075051193793919,0.6522760525794585,{1: np.float64(0.31343196470110707)},{1: np.float64(0.21483213049895286)}
62
  0.615081805880182,5000,0.5073659139026111,0.5073659139026111,0.48995940030917096,0.5073659139026111,0.704125968555372,0.6486868844121669,{1: np.float64(0.30976968186914705)},{1: np.float64(0.21846349203066784)}
63
+ 0.6765899864682002,5500,0.5051164431898377,0.5051164431898377,0.4877760904997144,0.5051164431898377,0.7022776742806202,0.6468460558896545,{1: np.float64(0.30797653909235867)},{1: np.float64(0.21939950112168932)}
64
+ 0.7380981670562184,6000,0.5052487649964714,0.5052487649964714,0.48791208791208796,0.5052487649964714,0.7018006758534852,0.6463104839757691,{1: np.float64(0.30939388097644616)},{1: np.float64(0.22077790819696838)}
65
+ 0.7996063476442367,6500,0.5031757233592096,0.5031757233592096,0.4858743320899284,0.5031757233592096,0.7018024941985244,0.6457940441222814,{1: np.float64(0.3071103744572787)},{1: np.float64(0.21924260973263582)}
66
+ 0.8611145282322549,7000,0.5014555398729711,0.5014555398729711,0.48431366989279834,0.5014555398729711,0.7003065535212539,0.644486083880378,{1: np.float64(0.3059839444807557)},{1: np.float64(0.22119951578936942)}
67
+ 0.9226227088202731,7500,0.5005733944954128,0.5005733944954128,0.48360060237927216,0.5005733944954128,0.6997240529681923,0.6437306889155522,{1: np.float64(0.3058524170445839)},{1: np.float64(0.2215523302463902)}
68
+ 0,0,0.5309191954834157,0.5309191954834157,0.5126434011325066,0.5309191954834157,0.7314305279871582,0.675229160358019,{1: np.float64(0.3015556447544318)},{1: np.float64(0.19690931872370762)}
69
+ 0.03075598203850649,500,0.5370059985885673,0.5370059985885673,0.5186901401350943,0.5370059985885673,0.737809170544929,0.6818464315190007,{1: np.float64(0.30481837104809123)},{1: np.float64(0.17553712520412681)}
70
+ 0.06151196407701298,1000,0.5384615384615384,0.5384615384615384,0.5200659193635111,0.5384615384615384,0.7358701729703853,0.6804762985676286,{1: np.float64(0.31569810249687086)},{1: np.float64(0.18389194101031053)}
71
+ 0,0,0.5309191954834157,0.5309191954834157,0.5126434011325066,0.5309191954834157,0.7314305279871582,0.675229160358019,{1: np.float64(0.3015556447544318)},{1: np.float64(0.19690931872370762)}
72
+ 0,0,0.5474153140437544,0.5474153140437544,0.5284808511442686,0.5474153140437544,0.7464206027541721,0.6905111169633417,{1: np.float64(0.3145837338283234)},{1: np.float64(0.16086033196070731)}
73
+ 0.03075598203850649,500,0.547106563161609,0.547106563161609,0.5281706300198272,0.547106563161609,0.7461821929919408,0.690293009969878,{1: np.float64(0.31459186837145553)},{1: np.float64(0.16280243465720218)}
74
+ 0.06151196407701298,1000,0.5404463655610444,0.5404463655610444,0.5219959799374937,0.5404463655610444,0.7407090011662882,0.6845898197371095,{1: np.float64(0.31411398761743997)},{1: np.float64(0.179996654024503)}
75
+ 0.09226794611551947,1500,0.531227946365561,0.531227946365561,0.5130385287495379,0.531227946365561,0.7319342349371158,0.6752496263522522,{1: np.float64(0.3114153905151002)},{1: np.float64(0.20177076753513354)}
76
+ 0,0,0.5474153140437544,0.5474153140437544,0.5284808511442686,0.5474153140437544,0.7464206027541721,0.6905111169633417,{1: np.float64(0.3145837338283234)},{1: np.float64(0.16086033196070731)}
77
+ 0.03075598203850649,500,0.547106563161609,0.547106563161609,0.5281706300198272,0.547106563161609,0.7461821929919408,0.690293009969878,{1: np.float64(0.31459186837145553)},{1: np.float64(0.16280243465720218)}
78
+ 0.06151196407701298,1000,0.5404463655610444,0.5404463655610444,0.5219959799374937,0.5404463655610444,0.7407090011662882,0.6845898197371095,{1: np.float64(0.31411398761743997)},{1: np.float64(0.179996654024503)}
79
+ 0.09226794611551947,1500,0.531227946365561,0.531227946365561,0.5130385287495379,0.531227946365561,0.7319342349371158,0.6752496263522522,{1: np.float64(0.3114153905151002)},{1: np.float64(0.20177076753513354)}
80
+ 0.12302392815402596,2000,0.5258468595624559,0.5258468595624559,0.5078706270793427,0.5258468595624559,0.7245864644994908,0.6685842514505764,{1: np.float64(0.3123872498414073)},{1: np.float64(0.20395092275330823)}
81
+ 0.15377991019253245,2500,0.5213920254057869,0.5213920254057869,0.5035958976039251,0.5213920254057869,0.719590786147121,0.6638421006585614,{1: np.float64(0.31129992235883797)},{1: np.float64(0.2047724041817989)}
82
+ 0.18453589223103894,3000,0.5178193366266761,0.5178193366266761,0.5000673160936923,0.5178193366266761,0.7163131766086044,0.6603664447787778,{1: np.float64(0.3085206311438812)},{1: np.float64(0.20528798172967666)}
83
+ 0.21529187426954544,3500,0.5159668313338038,0.5159668313338038,0.49833243018449447,0.5159668313338038,0.7145314571439964,0.6586537491588038,{1: np.float64(0.30641447356250734)},{1: np.float64(0.20732705885352593)}
84
+ 0.24604785630805193,4000,0.5132762879322512,0.5132762879322512,0.4957499495916927,0.5132762879322512,0.7123072279482698,0.6561196211679712,{1: np.float64(0.3058214968650276)},{1: np.float64(0.21048968308185473)}
85
+ 0.2768038383465584,4500,0.5087332392378264,0.5087332392378264,0.4910709034848943,0.5087332392378264,0.709310692558922,0.652656638617702,{1: np.float64(0.30174652682850833)},{1: np.float64(0.21226196776001288)}
86
+ 0.3075598203850649,5000,0.5047194777699365,0.5047194777699365,0.48727988372483777,0.5047194777699365,0.7059068523784022,0.6490213484284776,{1: np.float64(0.3008660367505195)},{1: np.float64(0.21497544837116472)}
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:551613faa4cf473038ea65dedbd058fb620e0c7f8d94dc653d50e5ee4508fe69
3
- size 66742184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f94a3ce836e18aa6c43796ded8fc6e30cc777f68148f860fc7554a79b4fcbb59
3
+ size 45437864
tokenizer_config.json CHANGED
@@ -41,17 +41,25 @@
41
  "special": true
42
  }
43
  },
44
- "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
 
46
  "do_lower_case": true,
47
  "extra_special_tokens": {},
48
  "mask_token": "[MASK]",
49
  "max_length": 64,
50
- "model_max_length": 512,
 
 
51
  "pad_token": "[PAD]",
 
 
52
  "sep_token": "[SEP]",
 
53
  "strip_accents": null,
54
  "tokenize_chinese_chars": true,
55
  "tokenizer_class": "BertTokenizer",
 
 
56
  "unk_token": "[UNK]"
57
  }
 
41
  "special": true
42
  }
43
  },
44
+ "clean_up_tokenization_spaces": false,
45
  "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
  "do_lower_case": true,
48
  "extra_special_tokens": {},
49
  "mask_token": "[MASK]",
50
  "max_length": 64,
51
+ "model_max_length": 256,
52
+ "never_split": null,
53
+ "pad_to_multiple_of": null,
54
  "pad_token": "[PAD]",
55
+ "pad_token_type_id": 0,
56
+ "padding_side": "right",
57
  "sep_token": "[SEP]",
58
+ "stride": 0,
59
  "strip_accents": null,
60
  "tokenize_chinese_chars": true,
61
  "tokenizer_class": "BertTokenizer",
62
+ "truncation_side": "right",
63
+ "truncation_strategy": "longest_first",
64
  "unk_token": "[UNK]"
65
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4ec5e27753a1494ed86180cda30a69319d61af98b822adc19821142a45f8eae
3
  size 6353
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:317bd5b83332174b76fe35a4fc644ffadedecb5bcbd748cb002e6652cc3d9041
3
  size 6353