embedding_finetuned / trainer_state.json
bhuy71's picture
Upload checkpoint-276 from local
6981c42 verified
{
"best_global_step": 276,
"best_metric": 0.12710943818092346,
"best_model_checkpoint": "bkai-fine-tuned-legal/checkpoint-276",
"epoch": 6.0,
"eval_steps": 500,
"global_step": 276,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 3.53977108001709,
"learning_rate": 2.4107142857142858e-05,
"loss": 1.016345148501189,
"step": 46
},
{
"epoch": 1.0,
"eval_dim_128_cosine_accuracy@1": 0.213777715799749,
"eval_dim_128_cosine_accuracy@10": 0.670199414307628,
"eval_dim_128_cosine_accuracy@3": 0.41361037512201926,
"eval_dim_128_cosine_accuracy@5": 0.5034165388369823,
"eval_dim_128_cosine_map@100": 0.35224211836983055,
"eval_dim_128_cosine_mrr@10": 0.3412989488083612,
"eval_dim_128_cosine_ndcg@10": 0.41868129352359357,
"eval_dim_128_cosine_precision@1": 0.213777715799749,
"eval_dim_128_cosine_precision@10": 0.06701994143076279,
"eval_dim_128_cosine_precision@3": 0.13787012504067309,
"eval_dim_128_cosine_precision@5": 0.10068330776739645,
"eval_dim_128_cosine_recall@1": 0.213777715799749,
"eval_dim_128_cosine_recall@10": 0.670199414307628,
"eval_dim_128_cosine_recall@3": 0.41361037512201926,
"eval_dim_128_cosine_recall@5": 0.5034165388369823,
"eval_dim_256_cosine_accuracy@1": 0.2169850787895691,
"eval_dim_256_cosine_accuracy@10": 0.6794031515827639,
"eval_dim_256_cosine_accuracy@3": 0.41486543020499234,
"eval_dim_256_cosine_accuracy@5": 0.5027192860131083,
"eval_dim_256_cosine_map@100": 0.3555720655683085,
"eval_dim_256_cosine_mrr@10": 0.344718033171525,
"eval_dim_256_cosine_ndcg@10": 0.4233294657573625,
"eval_dim_256_cosine_precision@1": 0.2169850787895691,
"eval_dim_256_cosine_precision@10": 0.06794031515827638,
"eval_dim_256_cosine_precision@3": 0.13828847673499742,
"eval_dim_256_cosine_precision@5": 0.10054385720262167,
"eval_dim_256_cosine_recall@1": 0.2169850787895691,
"eval_dim_256_cosine_recall@10": 0.6794031515827639,
"eval_dim_256_cosine_recall@3": 0.41486543020499234,
"eval_dim_256_cosine_recall@5": 0.5027192860131083,
"eval_dim_512_cosine_accuracy@1": 0.22102914516803793,
"eval_dim_512_cosine_accuracy@10": 0.684562822479431,
"eval_dim_512_cosine_accuracy@3": 0.4230930135267048,
"eval_dim_512_cosine_accuracy@5": 0.5092734625575234,
"eval_dim_512_cosine_map@100": 0.36044654114667407,
"eval_dim_512_cosine_mrr@10": 0.3498800282442726,
"eval_dim_512_cosine_ndcg@10": 0.4285631830606908,
"eval_dim_512_cosine_precision@1": 0.22102914516803793,
"eval_dim_512_cosine_precision@10": 0.06845628224794309,
"eval_dim_512_cosine_precision@3": 0.14103100450890158,
"eval_dim_512_cosine_precision@5": 0.10185469251150465,
"eval_dim_512_cosine_recall@1": 0.22102914516803793,
"eval_dim_512_cosine_recall@10": 0.684562822479431,
"eval_dim_512_cosine_recall@3": 0.4230930135267048,
"eval_dim_512_cosine_recall@5": 0.5092734625575234,
"eval_dim_64_cosine_accuracy@1": 0.19746199972109887,
"eval_dim_64_cosine_accuracy@10": 0.6455166643424906,
"eval_dim_64_cosine_accuracy@3": 0.38613861386138615,
"eval_dim_64_cosine_accuracy@5": 0.4791521405661693,
"eval_dim_64_cosine_map@100": 0.332107485720445,
"eval_dim_64_cosine_mrr@10": 0.32084381979445287,
"eval_dim_64_cosine_ndcg@10": 0.3971543906124372,
"eval_dim_64_cosine_precision@1": 0.19746199972109887,
"eval_dim_64_cosine_precision@10": 0.06455166643424905,
"eval_dim_64_cosine_precision@3": 0.12871287128712872,
"eval_dim_64_cosine_precision@5": 0.09583042811323385,
"eval_dim_64_cosine_recall@1": 0.19746199972109887,
"eval_dim_64_cosine_recall@10": 0.6455166643424906,
"eval_dim_64_cosine_recall@3": 0.38613861386138615,
"eval_dim_64_cosine_recall@5": 0.4791521405661693,
"eval_dim_768_cosine_accuracy@1": 0.2259099149351555,
"eval_dim_768_cosine_accuracy@10": 0.6851206247385302,
"eval_dim_768_cosine_accuracy@3": 0.42978664063589456,
"eval_dim_768_cosine_accuracy@5": 0.5170826941849115,
"eval_dim_768_cosine_map@100": 0.3656884435565745,
"eval_dim_768_cosine_mrr@10": 0.3550936532285014,
"eval_dim_768_cosine_ndcg@10": 0.43283429692560266,
"eval_dim_768_cosine_precision@1": 0.2259099149351555,
"eval_dim_768_cosine_precision@10": 0.06851206247385301,
"eval_dim_768_cosine_precision@3": 0.1432622135452982,
"eval_dim_768_cosine_precision@5": 0.10341653883698228,
"eval_dim_768_cosine_recall@1": 0.2259099149351555,
"eval_dim_768_cosine_recall@10": 0.6851206247385302,
"eval_dim_768_cosine_recall@3": 0.42978664063589456,
"eval_dim_768_cosine_recall@5": 0.5170826941849115,
"eval_loss": 0.38568806648254395,
"eval_runtime": 5135.457,
"eval_samples_per_second": 1.397,
"eval_sequential_score": 0.3971543906124372,
"eval_steps_per_second": 0.013,
"step": 46
},
{
"epoch": 2.0,
"grad_norm": 3.2190756797790527,
"learning_rate": 2.9078090590344733e-05,
"loss": 0.2932926675547724,
"step": 92
},
{
"epoch": 2.0,
"eval_dim_128_cosine_accuracy@1": 0.3525310277506624,
"eval_dim_128_cosine_accuracy@10": 0.7081299679263701,
"eval_dim_128_cosine_accuracy@3": 0.47273741458652907,
"eval_dim_128_cosine_accuracy@5": 0.5437177520568959,
"eval_dim_128_cosine_map@100": 0.45170521683593867,
"eval_dim_128_cosine_mrr@10": 0.4414466446644676,
"eval_dim_128_cosine_ndcg@10": 0.5032435031696283,
"eval_dim_128_cosine_precision@1": 0.3525310277506624,
"eval_dim_128_cosine_precision@10": 0.070812996792637,
"eval_dim_128_cosine_precision@3": 0.1575791381955097,
"eval_dim_128_cosine_precision@5": 0.10874355041137916,
"eval_dim_128_cosine_recall@1": 0.3525310277506624,
"eval_dim_128_cosine_recall@10": 0.7081299679263701,
"eval_dim_128_cosine_recall@3": 0.47273741458652907,
"eval_dim_128_cosine_recall@5": 0.5437177520568959,
"eval_dim_256_cosine_accuracy@1": 0.3576906986473295,
"eval_dim_256_cosine_accuracy@10": 0.7170548040719565,
"eval_dim_256_cosine_accuracy@3": 0.4801282945195928,
"eval_dim_256_cosine_accuracy@5": 0.5580811602286989,
"eval_dim_256_cosine_map@100": 0.45920859350066756,
"eval_dim_256_cosine_mrr@10": 0.4493326294400066,
"eval_dim_256_cosine_ndcg@10": 0.5115508657442693,
"eval_dim_256_cosine_precision@1": 0.3576906986473295,
"eval_dim_256_cosine_precision@10": 0.07170548040719565,
"eval_dim_256_cosine_precision@3": 0.16004276483986424,
"eval_dim_256_cosine_precision@5": 0.11161623204573978,
"eval_dim_256_cosine_recall@1": 0.3576906986473295,
"eval_dim_256_cosine_recall@10": 0.7170548040719565,
"eval_dim_256_cosine_recall@3": 0.4801282945195928,
"eval_dim_256_cosine_recall@5": 0.5580811602286989,
"eval_dim_512_cosine_accuracy@1": 0.3614558638962488,
"eval_dim_512_cosine_accuracy@10": 0.7205410681913261,
"eval_dim_512_cosine_accuracy@3": 0.4808255473434667,
"eval_dim_512_cosine_accuracy@5": 0.5544554455445545,
"eval_dim_512_cosine_map@100": 0.46086139918174507,
"eval_dim_512_cosine_mrr@10": 0.45110962806542504,
"eval_dim_512_cosine_ndcg@10": 0.5136290484927198,
"eval_dim_512_cosine_precision@1": 0.3614558638962488,
"eval_dim_512_cosine_precision@10": 0.07205410681913263,
"eval_dim_512_cosine_precision@3": 0.16027518244782224,
"eval_dim_512_cosine_precision@5": 0.11089108910891088,
"eval_dim_512_cosine_recall@1": 0.3614558638962488,
"eval_dim_512_cosine_recall@10": 0.7205410681913261,
"eval_dim_512_cosine_recall@3": 0.4808255473434667,
"eval_dim_512_cosine_recall@5": 0.5544554455445545,
"eval_dim_64_cosine_accuracy@1": 0.34527959838237343,
"eval_dim_64_cosine_accuracy@10": 0.6934876586250174,
"eval_dim_64_cosine_accuracy@3": 0.4613024682749965,
"eval_dim_64_cosine_accuracy@5": 0.5328406080044624,
"eval_dim_64_cosine_map@100": 0.4429389789139068,
"eval_dim_64_cosine_mrr@10": 0.43229392858802956,
"eval_dim_64_cosine_ndcg@10": 0.49282395499966114,
"eval_dim_64_cosine_precision@1": 0.34527959838237343,
"eval_dim_64_cosine_precision@10": 0.06934876586250174,
"eval_dim_64_cosine_precision@3": 0.15376748942499882,
"eval_dim_64_cosine_precision@5": 0.10656812160089248,
"eval_dim_64_cosine_recall@1": 0.34527959838237343,
"eval_dim_64_cosine_recall@10": 0.6934876586250174,
"eval_dim_64_cosine_recall@3": 0.4613024682749965,
"eval_dim_64_cosine_recall@5": 0.5328406080044624,
"eval_dim_768_cosine_accuracy@1": 0.36480267745084366,
"eval_dim_768_cosine_accuracy@10": 0.7151024961651095,
"eval_dim_768_cosine_accuracy@3": 0.48333565750941293,
"eval_dim_768_cosine_accuracy@5": 0.5564077534514015,
"eval_dim_768_cosine_map@100": 0.4630869213805269,
"eval_dim_768_cosine_mrr@10": 0.45277440218871223,
"eval_dim_768_cosine_ndcg@10": 0.5136830862608868,
"eval_dim_768_cosine_precision@1": 0.36480267745084366,
"eval_dim_768_cosine_precision@10": 0.07151024961651094,
"eval_dim_768_cosine_precision@3": 0.16111188583647096,
"eval_dim_768_cosine_precision@5": 0.11128155069028028,
"eval_dim_768_cosine_recall@1": 0.36480267745084366,
"eval_dim_768_cosine_recall@10": 0.7151024961651095,
"eval_dim_768_cosine_recall@3": 0.48333565750941293,
"eval_dim_768_cosine_recall@5": 0.5564077534514015,
"eval_loss": 0.19174915552139282,
"eval_runtime": 5125.6155,
"eval_samples_per_second": 1.399,
"eval_sequential_score": 0.49282395499966114,
"eval_steps_per_second": 0.013,
"step": 92
},
{
"epoch": 3.0,
"grad_norm": 1.435935139656067,
"learning_rate": 2.5281411335025595e-05,
"loss": 0.1528056186178456,
"step": 138
},
{
"epoch": 3.0,
"eval_dim_128_cosine_accuracy@1": 0.3654999302747176,
"eval_dim_128_cosine_accuracy@10": 0.7250034862641194,
"eval_dim_128_cosine_accuracy@3": 0.4868219216287826,
"eval_dim_128_cosine_accuracy@5": 0.5612885232185191,
"eval_dim_128_cosine_map@100": 0.46579021075100474,
"eval_dim_128_cosine_mrr@10": 0.4560526525489593,
"eval_dim_128_cosine_ndcg@10": 0.5184617012059481,
"eval_dim_128_cosine_precision@1": 0.3654999302747176,
"eval_dim_128_cosine_precision@10": 0.07250034862641193,
"eval_dim_128_cosine_precision@3": 0.16227397387626086,
"eval_dim_128_cosine_precision@5": 0.11225770464370378,
"eval_dim_128_cosine_recall@1": 0.3654999302747176,
"eval_dim_128_cosine_recall@10": 0.7250034862641194,
"eval_dim_128_cosine_recall@3": 0.4868219216287826,
"eval_dim_128_cosine_recall@5": 0.5612885232185191,
"eval_dim_256_cosine_accuracy@1": 0.36508157858039325,
"eval_dim_256_cosine_accuracy@10": 0.7268163436061916,
"eval_dim_256_cosine_accuracy@3": 0.48654302049923304,
"eval_dim_256_cosine_accuracy@5": 0.5640775345140148,
"eval_dim_256_cosine_map@100": 0.4656359359210204,
"eval_dim_256_cosine_mrr@10": 0.4559058420932647,
"eval_dim_256_cosine_ndcg@10": 0.5187585343172242,
"eval_dim_256_cosine_precision@1": 0.36508157858039325,
"eval_dim_256_cosine_precision@10": 0.07268163436061917,
"eval_dim_256_cosine_precision@3": 0.16218100683307768,
"eval_dim_256_cosine_precision@5": 0.11281550690280295,
"eval_dim_256_cosine_recall@1": 0.36508157858039325,
"eval_dim_256_cosine_recall@10": 0.7268163436061916,
"eval_dim_256_cosine_recall@3": 0.48654302049923304,
"eval_dim_256_cosine_recall@5": 0.5640775345140148,
"eval_dim_512_cosine_accuracy@1": 0.37303026077255613,
"eval_dim_512_cosine_accuracy@10": 0.7275135964300655,
"eval_dim_512_cosine_accuracy@3": 0.49532840608004464,
"eval_dim_512_cosine_accuracy@5": 0.5689583042811324,
"eval_dim_512_cosine_map@100": 0.47218646353528615,
"eval_dim_512_cosine_mrr@10": 0.46250495270855235,
"eval_dim_512_cosine_ndcg@10": 0.5240494016636663,
"eval_dim_512_cosine_precision@1": 0.37303026077255613,
"eval_dim_512_cosine_precision@10": 0.07275135964300654,
"eval_dim_512_cosine_precision@3": 0.16510946869334822,
"eval_dim_512_cosine_precision@5": 0.11379166085622647,
"eval_dim_512_cosine_recall@1": 0.37303026077255613,
"eval_dim_512_cosine_recall@10": 0.7275135964300655,
"eval_dim_512_cosine_recall@3": 0.49532840608004464,
"eval_dim_512_cosine_recall@5": 0.5689583042811324,
"eval_dim_64_cosine_accuracy@1": 0.354762236787059,
"eval_dim_64_cosine_accuracy@10": 0.7084088690559197,
"eval_dim_64_cosine_accuracy@3": 0.4685538976432855,
"eval_dim_64_cosine_accuracy@5": 0.5463673127876167,
"eval_dim_64_cosine_map@100": 0.4528341447756788,
"eval_dim_64_cosine_mrr@10": 0.4426069619034351,
"eval_dim_64_cosine_ndcg@10": 0.5042049654509246,
"eval_dim_64_cosine_precision@1": 0.354762236787059,
"eval_dim_64_cosine_precision@10": 0.07084088690559197,
"eval_dim_64_cosine_precision@3": 0.15618463254776183,
"eval_dim_64_cosine_precision@5": 0.10927346255752335,
"eval_dim_64_cosine_recall@1": 0.354762236787059,
"eval_dim_64_cosine_recall@10": 0.7084088690559197,
"eval_dim_64_cosine_recall@3": 0.4685538976432855,
"eval_dim_64_cosine_recall@5": 0.5463673127876167,
"eval_dim_768_cosine_accuracy@1": 0.3689861943940873,
"eval_dim_768_cosine_accuracy@10": 0.730442058290336,
"eval_dim_768_cosine_accuracy@3": 0.4868219216287826,
"eval_dim_768_cosine_accuracy@5": 0.5649142379026635,
"eval_dim_768_cosine_map@100": 0.46810281327770226,
"eval_dim_768_cosine_mrr@10": 0.45858146679859224,
"eval_dim_768_cosine_ndcg@10": 0.5215840208749241,
"eval_dim_768_cosine_precision@1": 0.3689861943940873,
"eval_dim_768_cosine_precision@10": 0.0730442058290336,
"eval_dim_768_cosine_precision@3": 0.16227397387626086,
"eval_dim_768_cosine_precision@5": 0.11298284758053269,
"eval_dim_768_cosine_recall@1": 0.3689861943940873,
"eval_dim_768_cosine_recall@10": 0.730442058290336,
"eval_dim_768_cosine_recall@3": 0.4868219216287826,
"eval_dim_768_cosine_recall@5": 0.5649142379026635,
"eval_loss": 0.15238769352436066,
"eval_runtime": 5128.0318,
"eval_samples_per_second": 1.399,
"eval_sequential_score": 0.5042049654509246,
"eval_steps_per_second": 0.013,
"step": 138
},
{
"epoch": 4.0,
"grad_norm": 1.2450511455535889,
"learning_rate": 1.9318122786371193e-05,
"loss": 0.11104167026022206,
"step": 184
},
{
"epoch": 4.0,
"eval_dim_128_cosine_accuracy@1": 0.36619718309859156,
"eval_dim_128_cosine_accuracy@10": 0.7255612885232186,
"eval_dim_128_cosine_accuracy@3": 0.4857063171105843,
"eval_dim_128_cosine_accuracy@5": 0.5624041277367173,
"eval_dim_128_cosine_map@100": 0.46578169439240885,
"eval_dim_128_cosine_mrr@10": 0.45578664506289746,
"eval_dim_128_cosine_ndcg@10": 0.5183564003053771,
"eval_dim_128_cosine_precision@1": 0.36619718309859156,
"eval_dim_128_cosine_precision@10": 0.07255612885232185,
"eval_dim_128_cosine_precision@3": 0.1619021057035281,
"eval_dim_128_cosine_precision@5": 0.11248082554734345,
"eval_dim_128_cosine_recall@1": 0.36619718309859156,
"eval_dim_128_cosine_recall@10": 0.7255612885232186,
"eval_dim_128_cosine_recall@3": 0.4857063171105843,
"eval_dim_128_cosine_recall@5": 0.5624041277367173,
"eval_dim_256_cosine_accuracy@1": 0.36619718309859156,
"eval_dim_256_cosine_accuracy@10": 0.7269557941709663,
"eval_dim_256_cosine_accuracy@3": 0.4847301631571608,
"eval_dim_256_cosine_accuracy@5": 0.5621252266071678,
"eval_dim_256_cosine_map@100": 0.4659867486137855,
"eval_dim_256_cosine_mrr@10": 0.4562129653609242,
"eval_dim_256_cosine_ndcg@10": 0.5189825183389356,
"eval_dim_256_cosine_precision@1": 0.36619718309859156,
"eval_dim_256_cosine_precision@10": 0.07269557941709663,
"eval_dim_256_cosine_precision@3": 0.1615767210523869,
"eval_dim_256_cosine_precision@5": 0.11242504532143355,
"eval_dim_256_cosine_recall@1": 0.36619718309859156,
"eval_dim_256_cosine_recall@10": 0.7269557941709663,
"eval_dim_256_cosine_recall@3": 0.4847301631571608,
"eval_dim_256_cosine_recall@5": 0.5621252266071678,
"eval_dim_512_cosine_accuracy@1": 0.3657788314042672,
"eval_dim_512_cosine_accuracy@10": 0.7297448054664621,
"eval_dim_512_cosine_accuracy@3": 0.48710082275833216,
"eval_dim_512_cosine_accuracy@5": 0.561985776042393,
"eval_dim_512_cosine_map@100": 0.4664999147434714,
"eval_dim_512_cosine_mrr@10": 0.45684652469271236,
"eval_dim_512_cosine_ndcg@10": 0.5201161952598219,
"eval_dim_512_cosine_precision@1": 0.3657788314042672,
"eval_dim_512_cosine_precision@10": 0.07297448054664621,
"eval_dim_512_cosine_precision@3": 0.1623669409194441,
"eval_dim_512_cosine_precision@5": 0.11239715520847858,
"eval_dim_512_cosine_recall@1": 0.3657788314042672,
"eval_dim_512_cosine_recall@10": 0.7297448054664621,
"eval_dim_512_cosine_recall@3": 0.48710082275833216,
"eval_dim_512_cosine_recall@5": 0.561985776042393,
"eval_dim_64_cosine_accuracy@1": 0.3514154232324641,
"eval_dim_64_cosine_accuracy@10": 0.7138474410821364,
"eval_dim_64_cosine_accuracy@3": 0.4725979640217543,
"eval_dim_64_cosine_accuracy@5": 0.5434388509273462,
"eval_dim_64_cosine_map@100": 0.4521933044833062,
"eval_dim_64_cosine_mrr@10": 0.4420436258917645,
"eval_dim_64_cosine_ndcg@10": 0.5050332106839454,
"eval_dim_64_cosine_precision@1": 0.3514154232324641,
"eval_dim_64_cosine_precision@10": 0.07138474410821363,
"eval_dim_64_cosine_precision@3": 0.15753265467391808,
"eval_dim_64_cosine_precision@5": 0.10868777018546923,
"eval_dim_64_cosine_recall@1": 0.3514154232324641,
"eval_dim_64_cosine_recall@10": 0.7138474410821364,
"eval_dim_64_cosine_recall@3": 0.4725979640217543,
"eval_dim_64_cosine_recall@5": 0.5434388509273462,
"eval_dim_768_cosine_accuracy@1": 0.3723330079486822,
"eval_dim_768_cosine_accuracy@10": 0.7312787616789848,
"eval_dim_768_cosine_accuracy@3": 0.4951889555152698,
"eval_dim_768_cosine_accuracy@5": 0.570492260493655,
"eval_dim_768_cosine_map@100": 0.47234757553193524,
"eval_dim_768_cosine_mrr@10": 0.46258391935773235,
"eval_dim_768_cosine_ndcg@10": 0.5249716949025338,
"eval_dim_768_cosine_precision@1": 0.3723330079486822,
"eval_dim_768_cosine_precision@10": 0.07312787616789848,
"eval_dim_768_cosine_precision@3": 0.1650629851717566,
"eval_dim_768_cosine_precision@5": 0.11409845209873098,
"eval_dim_768_cosine_recall@1": 0.3723330079486822,
"eval_dim_768_cosine_recall@10": 0.7312787616789848,
"eval_dim_768_cosine_recall@3": 0.4951889555152698,
"eval_dim_768_cosine_recall@5": 0.570492260493655,
"eval_loss": 0.13917988538742065,
"eval_runtime": 5127.8405,
"eval_samples_per_second": 1.399,
"eval_sequential_score": 0.5050332106839454,
"eval_steps_per_second": 0.013,
"step": 184
},
{
"epoch": 5.0,
"grad_norm": 1.0852317810058594,
"learning_rate": 1.2444873080259475e-05,
"loss": 0.08755316941634468,
"step": 230
},
{
"epoch": 5.0,
"eval_dim_128_cosine_accuracy@1": 0.36926509552363684,
"eval_dim_128_cosine_accuracy@10": 0.7303026077255613,
"eval_dim_128_cosine_accuracy@3": 0.4847301631571608,
"eval_dim_128_cosine_accuracy@5": 0.559336215311672,
"eval_dim_128_cosine_map@100": 0.46751231421013056,
"eval_dim_128_cosine_mrr@10": 0.4577717127849617,
"eval_dim_128_cosine_ndcg@10": 0.5208050058986332,
"eval_dim_128_cosine_precision@1": 0.36926509552363684,
"eval_dim_128_cosine_precision@10": 0.07303026077255612,
"eval_dim_128_cosine_precision@3": 0.1615767210523869,
"eval_dim_128_cosine_precision@5": 0.11186724306233439,
"eval_dim_128_cosine_recall@1": 0.36926509552363684,
"eval_dim_128_cosine_recall@10": 0.7303026077255613,
"eval_dim_128_cosine_recall@3": 0.4847301631571608,
"eval_dim_128_cosine_recall@5": 0.559336215311672,
"eval_dim_256_cosine_accuracy@1": 0.36745223818156464,
"eval_dim_256_cosine_accuracy@10": 0.7289081020778134,
"eval_dim_256_cosine_accuracy@3": 0.4869613721935574,
"eval_dim_256_cosine_accuracy@5": 0.5610096220889694,
"eval_dim_256_cosine_map@100": 0.4672783873145468,
"eval_dim_256_cosine_mrr@10": 0.45734638856239845,
"eval_dim_256_cosine_ndcg@10": 0.5202779962822549,
"eval_dim_256_cosine_precision@1": 0.36745223818156464,
"eval_dim_256_cosine_precision@10": 0.07289081020778135,
"eval_dim_256_cosine_precision@3": 0.16232045739785247,
"eval_dim_256_cosine_precision@5": 0.11220192441779388,
"eval_dim_256_cosine_recall@1": 0.36745223818156464,
"eval_dim_256_cosine_recall@10": 0.7289081020778134,
"eval_dim_256_cosine_recall@3": 0.4869613721935574,
"eval_dim_256_cosine_recall@5": 0.5610096220889694,
"eval_dim_512_cosine_accuracy@1": 0.3733091619021057,
"eval_dim_512_cosine_accuracy@10": 0.7358806303165528,
"eval_dim_512_cosine_accuracy@3": 0.49714126342211684,
"eval_dim_512_cosine_accuracy@5": 0.5745363268721239,
"eval_dim_512_cosine_map@100": 0.47438800022814137,
"eval_dim_512_cosine_mrr@10": 0.4649837971724749,
"eval_dim_512_cosine_ndcg@10": 0.5278640631789735,
"eval_dim_512_cosine_precision@1": 0.3733091619021057,
"eval_dim_512_cosine_precision@10": 0.07358806303165527,
"eval_dim_512_cosine_precision@3": 0.16571375447403894,
"eval_dim_512_cosine_precision@5": 0.11490726537442476,
"eval_dim_512_cosine_recall@1": 0.3733091619021057,
"eval_dim_512_cosine_recall@10": 0.7358806303165528,
"eval_dim_512_cosine_recall@3": 0.49714126342211684,
"eval_dim_512_cosine_recall@5": 0.5745363268721239,
"eval_dim_64_cosine_accuracy@1": 0.35601729187003206,
"eval_dim_64_cosine_accuracy@10": 0.7197043648026774,
"eval_dim_64_cosine_accuracy@3": 0.46980895272625856,
"eval_dim_64_cosine_accuracy@5": 0.5460884116580672,
"eval_dim_64_cosine_map@100": 0.45478631984265544,
"eval_dim_64_cosine_mrr@10": 0.4447587062529192,
"eval_dim_64_cosine_ndcg@10": 0.5083136533419047,
"eval_dim_64_cosine_precision@1": 0.35601729187003206,
"eval_dim_64_cosine_precision@10": 0.07197043648026773,
"eval_dim_64_cosine_precision@3": 0.15660298424208619,
"eval_dim_64_cosine_precision@5": 0.10921768233161343,
"eval_dim_64_cosine_recall@1": 0.35601729187003206,
"eval_dim_64_cosine_recall@10": 0.7197043648026774,
"eval_dim_64_cosine_recall@3": 0.46980895272625856,
"eval_dim_64_cosine_recall@5": 0.5460884116580672,
"eval_dim_768_cosine_accuracy@1": 0.36954399665318644,
"eval_dim_768_cosine_accuracy@10": 0.7333705201506067,
"eval_dim_768_cosine_accuracy@3": 0.4882164272765305,
"eval_dim_768_cosine_accuracy@5": 0.5631013805605912,
"eval_dim_768_cosine_map@100": 0.4692423416522091,
"eval_dim_768_cosine_mrr@10": 0.45940809875756333,
"eval_dim_768_cosine_ndcg@10": 0.5228646825758372,
"eval_dim_768_cosine_precision@1": 0.36954399665318644,
"eval_dim_768_cosine_precision@10": 0.07333705201506065,
"eval_dim_768_cosine_precision@3": 0.16273880909217683,
"eval_dim_768_cosine_precision@5": 0.11262027611211824,
"eval_dim_768_cosine_recall@1": 0.36954399665318644,
"eval_dim_768_cosine_recall@10": 0.7333705201506067,
"eval_dim_768_cosine_recall@3": 0.4882164272765305,
"eval_dim_768_cosine_recall@5": 0.5631013805605912,
"eval_loss": 0.1289709359407425,
"eval_runtime": 5133.7392,
"eval_samples_per_second": 1.397,
"eval_sequential_score": 0.5083136533419047,
"eval_steps_per_second": 0.013,
"step": 230
},
{
"epoch": 6.0,
"grad_norm": 1.0251929759979248,
"learning_rate": 6.11006712953975e-06,
"loss": 0.07780430109604546,
"step": 276
},
{
"epoch": 6.0,
"eval_dim_128_cosine_accuracy@1": 0.36424487519174453,
"eval_dim_128_cosine_accuracy@10": 0.729187003207363,
"eval_dim_128_cosine_accuracy@3": 0.48333565750941293,
"eval_dim_128_cosine_accuracy@5": 0.5603123692650955,
"eval_dim_128_cosine_map@100": 0.4650707051757783,
"eval_dim_128_cosine_mrr@10": 0.4552743855874532,
"eval_dim_128_cosine_ndcg@10": 0.5187763857272285,
"eval_dim_128_cosine_precision@1": 0.36424487519174453,
"eval_dim_128_cosine_precision@10": 0.0729187003207363,
"eval_dim_128_cosine_precision@3": 0.16111188583647096,
"eval_dim_128_cosine_precision@5": 0.1120624738530191,
"eval_dim_128_cosine_recall@1": 0.36424487519174453,
"eval_dim_128_cosine_recall@10": 0.729187003207363,
"eval_dim_128_cosine_recall@3": 0.48333565750941293,
"eval_dim_128_cosine_recall@5": 0.5603123692650955,
"eval_dim_256_cosine_accuracy@1": 0.37163575512480823,
"eval_dim_256_cosine_accuracy@10": 0.7309998605494352,
"eval_dim_256_cosine_accuracy@3": 0.49295774647887325,
"eval_dim_256_cosine_accuracy@5": 0.5681216008924836,
"eval_dim_256_cosine_map@100": 0.4718786149307726,
"eval_dim_256_cosine_mrr@10": 0.46190055625280924,
"eval_dim_256_cosine_ndcg@10": 0.524333456443,
"eval_dim_256_cosine_precision@1": 0.37163575512480823,
"eval_dim_256_cosine_precision@10": 0.07309998605494351,
"eval_dim_256_cosine_precision@3": 0.16431924882629106,
"eval_dim_256_cosine_precision@5": 0.11362432017849672,
"eval_dim_256_cosine_recall@1": 0.37163575512480823,
"eval_dim_256_cosine_recall@10": 0.7309998605494352,
"eval_dim_256_cosine_recall@3": 0.49295774647887325,
"eval_dim_256_cosine_recall@5": 0.5681216008924836,
"eval_dim_512_cosine_accuracy@1": 0.3719146562543578,
"eval_dim_512_cosine_accuracy@10": 0.7375540370938503,
"eval_dim_512_cosine_accuracy@3": 0.4942128015618463,
"eval_dim_512_cosine_accuracy@5": 0.5781620415562683,
"eval_dim_512_cosine_map@100": 0.4735810945322351,
"eval_dim_512_cosine_mrr@10": 0.4641111797296452,
"eval_dim_512_cosine_ndcg@10": 0.5276527222739883,
"eval_dim_512_cosine_precision@1": 0.3719146562543578,
"eval_dim_512_cosine_precision@10": 0.07375540370938502,
"eval_dim_512_cosine_precision@3": 0.16473760052061545,
"eval_dim_512_cosine_precision@5": 0.11563240831125365,
"eval_dim_512_cosine_recall@1": 0.3719146562543578,
"eval_dim_512_cosine_recall@10": 0.7375540370938503,
"eval_dim_512_cosine_recall@3": 0.4942128015618463,
"eval_dim_512_cosine_recall@5": 0.5781620415562683,
"eval_dim_64_cosine_accuracy@1": 0.35852740203597827,
"eval_dim_64_cosine_accuracy@10": 0.7167759029424069,
"eval_dim_64_cosine_accuracy@3": 0.47413192023427697,
"eval_dim_64_cosine_accuracy@5": 0.5512480825547343,
"eval_dim_64_cosine_map@100": 0.4579655459168757,
"eval_dim_64_cosine_mrr@10": 0.44765800523714366,
"eval_dim_64_cosine_ndcg@10": 0.5100114350662204,
"eval_dim_64_cosine_precision@1": 0.35852740203597827,
"eval_dim_64_cosine_precision@10": 0.07167759029424069,
"eval_dim_64_cosine_precision@3": 0.15804397341142565,
"eval_dim_64_cosine_precision@5": 0.11024961651094686,
"eval_dim_64_cosine_recall@1": 0.35852740203597827,
"eval_dim_64_cosine_recall@10": 0.7167759029424069,
"eval_dim_64_cosine_recall@3": 0.47413192023427697,
"eval_dim_64_cosine_recall@5": 0.5512480825547343,
"eval_dim_768_cosine_accuracy@1": 0.37651652489192583,
"eval_dim_768_cosine_accuracy@10": 0.7325338167619578,
"eval_dim_768_cosine_accuracy@3": 0.4960256589039186,
"eval_dim_768_cosine_accuracy@5": 0.5675637986333845,
"eval_dim_768_cosine_map@100": 0.47495758740026794,
"eval_dim_768_cosine_mrr@10": 0.46505562528083916,
"eval_dim_768_cosine_ndcg@10": 0.5270382021795976,
"eval_dim_768_cosine_precision@1": 0.37651652489192583,
"eval_dim_768_cosine_precision@10": 0.07325338167619579,
"eval_dim_768_cosine_precision@3": 0.16534188630130617,
"eval_dim_768_cosine_precision@5": 0.11351275972667689,
"eval_dim_768_cosine_recall@1": 0.37651652489192583,
"eval_dim_768_cosine_recall@10": 0.7325338167619578,
"eval_dim_768_cosine_recall@3": 0.4960256589039186,
"eval_dim_768_cosine_recall@5": 0.5675637986333845,
"eval_loss": 0.12710943818092346,
"eval_runtime": 5133.9648,
"eval_samples_per_second": 1.397,
"eval_sequential_score": 0.5100114350662204,
"eval_steps_per_second": 0.013,
"step": 276
}
],
"logging_steps": 500,
"max_steps": 368,
"num_input_tokens_seen": 0,
"num_train_epochs": 8,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 1,
"early_stopping_threshold": 0.001
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 104,
"trial_name": null,
"trial_params": null
}