Training in progress, step 5000
Browse files- config.json +2 -1
- eval/Information-Retrieval_evaluation_test_results.csv +24 -0
- model.safetensors +2 -2
- tokenizer_config.json +10 -2
- training_args.bin +1 -1
config.json
CHANGED
|
@@ -5,6 +5,7 @@
|
|
| 5 |
"attention_probs_dropout_prob": 0.1,
|
| 6 |
"classifier_dropout": null,
|
| 7 |
"dtype": "bfloat16",
|
|
|
|
| 8 |
"hidden_act": "gelu",
|
| 9 |
"hidden_dropout_prob": 0.1,
|
| 10 |
"hidden_size": 384,
|
|
@@ -14,7 +15,7 @@
|
|
| 14 |
"max_position_embeddings": 512,
|
| 15 |
"model_type": "bert",
|
| 16 |
"num_attention_heads": 12,
|
| 17 |
-
"num_hidden_layers":
|
| 18 |
"pad_token_id": 0,
|
| 19 |
"position_embedding_type": "absolute",
|
| 20 |
"transformers_version": "4.57.0",
|
|
|
|
| 5 |
"attention_probs_dropout_prob": 0.1,
|
| 6 |
"classifier_dropout": null,
|
| 7 |
"dtype": "bfloat16",
|
| 8 |
+
"gradient_checkpointing": false,
|
| 9 |
"hidden_act": "gelu",
|
| 10 |
"hidden_dropout_prob": 0.1,
|
| 11 |
"hidden_size": 384,
|
|
|
|
| 15 |
"max_position_embeddings": 512,
|
| 16 |
"model_type": "bert",
|
| 17 |
"num_attention_heads": 12,
|
| 18 |
+
"num_hidden_layers": 6,
|
| 19 |
"pad_token_id": 0,
|
| 20 |
"position_embedding_type": "absolute",
|
| 21 |
"transformers_version": "4.57.0",
|
eval/Information-Retrieval_evaluation_test_results.csv
CHANGED
|
@@ -60,3 +60,27 @@ epoch,steps,cosine-Accuracy@1,cosine-Precision@1,cosine-Recall@1,cosine-MRR@1,co
|
|
| 60 |
0.49206544470414565,4000,0.5136291460832745,0.5136291460832745,0.49609913213697615,0.5136291460832745,0.7094132063717694,0.6539726048972176,{1: np.float64(0.3119784463901498)},{1: np.float64(0.21389626227951036)}
|
| 61 |
0.5535736252921638,4500,0.5121736062103035,0.5121736062103035,0.49458110696642804,0.5121736062103035,0.7075051193793919,0.6522760525794585,{1: np.float64(0.31343196470110707)},{1: np.float64(0.21483213049895286)}
|
| 62 |
0.615081805880182,5000,0.5073659139026111,0.5073659139026111,0.48995940030917096,0.5073659139026111,0.704125968555372,0.6486868844121669,{1: np.float64(0.30976968186914705)},{1: np.float64(0.21846349203066784)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
0.49206544470414565,4000,0.5136291460832745,0.5136291460832745,0.49609913213697615,0.5136291460832745,0.7094132063717694,0.6539726048972176,{1: np.float64(0.3119784463901498)},{1: np.float64(0.21389626227951036)}
|
| 61 |
0.5535736252921638,4500,0.5121736062103035,0.5121736062103035,0.49458110696642804,0.5121736062103035,0.7075051193793919,0.6522760525794585,{1: np.float64(0.31343196470110707)},{1: np.float64(0.21483213049895286)}
|
| 62 |
0.615081805880182,5000,0.5073659139026111,0.5073659139026111,0.48995940030917096,0.5073659139026111,0.704125968555372,0.6486868844121669,{1: np.float64(0.30976968186914705)},{1: np.float64(0.21846349203066784)}
|
| 63 |
+
0.6765899864682002,5500,0.5051164431898377,0.5051164431898377,0.4877760904997144,0.5051164431898377,0.7022776742806202,0.6468460558896545,{1: np.float64(0.30797653909235867)},{1: np.float64(0.21939950112168932)}
|
| 64 |
+
0.7380981670562184,6000,0.5052487649964714,0.5052487649964714,0.48791208791208796,0.5052487649964714,0.7018006758534852,0.6463104839757691,{1: np.float64(0.30939388097644616)},{1: np.float64(0.22077790819696838)}
|
| 65 |
+
0.7996063476442367,6500,0.5031757233592096,0.5031757233592096,0.4858743320899284,0.5031757233592096,0.7018024941985244,0.6457940441222814,{1: np.float64(0.3071103744572787)},{1: np.float64(0.21924260973263582)}
|
| 66 |
+
0.8611145282322549,7000,0.5014555398729711,0.5014555398729711,0.48431366989279834,0.5014555398729711,0.7003065535212539,0.644486083880378,{1: np.float64(0.3059839444807557)},{1: np.float64(0.22119951578936942)}
|
| 67 |
+
0.9226227088202731,7500,0.5005733944954128,0.5005733944954128,0.48360060237927216,0.5005733944954128,0.6997240529681923,0.6437306889155522,{1: np.float64(0.3058524170445839)},{1: np.float64(0.2215523302463902)}
|
| 68 |
+
0,0,0.5309191954834157,0.5309191954834157,0.5126434011325066,0.5309191954834157,0.7314305279871582,0.675229160358019,{1: np.float64(0.3015556447544318)},{1: np.float64(0.19690931872370762)}
|
| 69 |
+
0.03075598203850649,500,0.5370059985885673,0.5370059985885673,0.5186901401350943,0.5370059985885673,0.737809170544929,0.6818464315190007,{1: np.float64(0.30481837104809123)},{1: np.float64(0.17553712520412681)}
|
| 70 |
+
0.06151196407701298,1000,0.5384615384615384,0.5384615384615384,0.5200659193635111,0.5384615384615384,0.7358701729703853,0.6804762985676286,{1: np.float64(0.31569810249687086)},{1: np.float64(0.18389194101031053)}
|
| 71 |
+
0,0,0.5309191954834157,0.5309191954834157,0.5126434011325066,0.5309191954834157,0.7314305279871582,0.675229160358019,{1: np.float64(0.3015556447544318)},{1: np.float64(0.19690931872370762)}
|
| 72 |
+
0,0,0.5474153140437544,0.5474153140437544,0.5284808511442686,0.5474153140437544,0.7464206027541721,0.6905111169633417,{1: np.float64(0.3145837338283234)},{1: np.float64(0.16086033196070731)}
|
| 73 |
+
0.03075598203850649,500,0.547106563161609,0.547106563161609,0.5281706300198272,0.547106563161609,0.7461821929919408,0.690293009969878,{1: np.float64(0.31459186837145553)},{1: np.float64(0.16280243465720218)}
|
| 74 |
+
0.06151196407701298,1000,0.5404463655610444,0.5404463655610444,0.5219959799374937,0.5404463655610444,0.7407090011662882,0.6845898197371095,{1: np.float64(0.31411398761743997)},{1: np.float64(0.179996654024503)}
|
| 75 |
+
0.09226794611551947,1500,0.531227946365561,0.531227946365561,0.5130385287495379,0.531227946365561,0.7319342349371158,0.6752496263522522,{1: np.float64(0.3114153905151002)},{1: np.float64(0.20177076753513354)}
|
| 76 |
+
0,0,0.5474153140437544,0.5474153140437544,0.5284808511442686,0.5474153140437544,0.7464206027541721,0.6905111169633417,{1: np.float64(0.3145837338283234)},{1: np.float64(0.16086033196070731)}
|
| 77 |
+
0.03075598203850649,500,0.547106563161609,0.547106563161609,0.5281706300198272,0.547106563161609,0.7461821929919408,0.690293009969878,{1: np.float64(0.31459186837145553)},{1: np.float64(0.16280243465720218)}
|
| 78 |
+
0.06151196407701298,1000,0.5404463655610444,0.5404463655610444,0.5219959799374937,0.5404463655610444,0.7407090011662882,0.6845898197371095,{1: np.float64(0.31411398761743997)},{1: np.float64(0.179996654024503)}
|
| 79 |
+
0.09226794611551947,1500,0.531227946365561,0.531227946365561,0.5130385287495379,0.531227946365561,0.7319342349371158,0.6752496263522522,{1: np.float64(0.3114153905151002)},{1: np.float64(0.20177076753513354)}
|
| 80 |
+
0.12302392815402596,2000,0.5258468595624559,0.5258468595624559,0.5078706270793427,0.5258468595624559,0.7245864644994908,0.6685842514505764,{1: np.float64(0.3123872498414073)},{1: np.float64(0.20395092275330823)}
|
| 81 |
+
0.15377991019253245,2500,0.5213920254057869,0.5213920254057869,0.5035958976039251,0.5213920254057869,0.719590786147121,0.6638421006585614,{1: np.float64(0.31129992235883797)},{1: np.float64(0.2047724041817989)}
|
| 82 |
+
0.18453589223103894,3000,0.5178193366266761,0.5178193366266761,0.5000673160936923,0.5178193366266761,0.7163131766086044,0.6603664447787778,{1: np.float64(0.3085206311438812)},{1: np.float64(0.20528798172967666)}
|
| 83 |
+
0.21529187426954544,3500,0.5159668313338038,0.5159668313338038,0.49833243018449447,0.5159668313338038,0.7145314571439964,0.6586537491588038,{1: np.float64(0.30641447356250734)},{1: np.float64(0.20732705885352593)}
|
| 84 |
+
0.24604785630805193,4000,0.5132762879322512,0.5132762879322512,0.4957499495916927,0.5132762879322512,0.7123072279482698,0.6561196211679712,{1: np.float64(0.3058214968650276)},{1: np.float64(0.21048968308185473)}
|
| 85 |
+
0.2768038383465584,4500,0.5087332392378264,0.5087332392378264,0.4910709034848943,0.5087332392378264,0.709310692558922,0.652656638617702,{1: np.float64(0.30174652682850833)},{1: np.float64(0.21226196776001288)}
|
| 86 |
+
0.3075598203850649,5000,0.5047194777699365,0.5047194777699365,0.48727988372483777,0.5047194777699365,0.7059068523784022,0.6490213484284776,{1: np.float64(0.3008660367505195)},{1: np.float64(0.21497544837116472)}
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f94a3ce836e18aa6c43796ded8fc6e30cc777f68148f860fc7554a79b4fcbb59
|
| 3 |
+
size 45437864
|
tokenizer_config.json
CHANGED
|
@@ -41,17 +41,25 @@
|
|
| 41 |
"special": true
|
| 42 |
}
|
| 43 |
},
|
| 44 |
-
"clean_up_tokenization_spaces":
|
| 45 |
"cls_token": "[CLS]",
|
|
|
|
| 46 |
"do_lower_case": true,
|
| 47 |
"extra_special_tokens": {},
|
| 48 |
"mask_token": "[MASK]",
|
| 49 |
"max_length": 64,
|
| 50 |
-
"model_max_length":
|
|
|
|
|
|
|
| 51 |
"pad_token": "[PAD]",
|
|
|
|
|
|
|
| 52 |
"sep_token": "[SEP]",
|
|
|
|
| 53 |
"strip_accents": null,
|
| 54 |
"tokenize_chinese_chars": true,
|
| 55 |
"tokenizer_class": "BertTokenizer",
|
|
|
|
|
|
|
| 56 |
"unk_token": "[UNK]"
|
| 57 |
}
|
|
|
|
| 41 |
"special": true
|
| 42 |
}
|
| 43 |
},
|
| 44 |
+
"clean_up_tokenization_spaces": false,
|
| 45 |
"cls_token": "[CLS]",
|
| 46 |
+
"do_basic_tokenize": true,
|
| 47 |
"do_lower_case": true,
|
| 48 |
"extra_special_tokens": {},
|
| 49 |
"mask_token": "[MASK]",
|
| 50 |
"max_length": 64,
|
| 51 |
+
"model_max_length": 256,
|
| 52 |
+
"never_split": null,
|
| 53 |
+
"pad_to_multiple_of": null,
|
| 54 |
"pad_token": "[PAD]",
|
| 55 |
+
"pad_token_type_id": 0,
|
| 56 |
+
"padding_side": "right",
|
| 57 |
"sep_token": "[SEP]",
|
| 58 |
+
"stride": 0,
|
| 59 |
"strip_accents": null,
|
| 60 |
"tokenize_chinese_chars": true,
|
| 61 |
"tokenizer_class": "BertTokenizer",
|
| 62 |
+
"truncation_side": "right",
|
| 63 |
+
"truncation_strategy": "longest_first",
|
| 64 |
"unk_token": "[UNK]"
|
| 65 |
}
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6353
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:317bd5b83332174b76fe35a4fc644ffadedecb5bcbd748cb002e6652cc3d9041
|
| 3 |
size 6353
|