{ "best_global_step": 276, "best_metric": 0.12710943818092346, "best_model_checkpoint": "bkai-fine-tuned-legal/checkpoint-276", "epoch": 6.0, "eval_steps": 500, "global_step": 276, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 3.53977108001709, "learning_rate": 2.4107142857142858e-05, "loss": 1.016345148501189, "step": 46 }, { "epoch": 1.0, "eval_dim_128_cosine_accuracy@1": 0.213777715799749, "eval_dim_128_cosine_accuracy@10": 0.670199414307628, "eval_dim_128_cosine_accuracy@3": 0.41361037512201926, "eval_dim_128_cosine_accuracy@5": 0.5034165388369823, "eval_dim_128_cosine_map@100": 0.35224211836983055, "eval_dim_128_cosine_mrr@10": 0.3412989488083612, "eval_dim_128_cosine_ndcg@10": 0.41868129352359357, "eval_dim_128_cosine_precision@1": 0.213777715799749, "eval_dim_128_cosine_precision@10": 0.06701994143076279, "eval_dim_128_cosine_precision@3": 0.13787012504067309, "eval_dim_128_cosine_precision@5": 0.10068330776739645, "eval_dim_128_cosine_recall@1": 0.213777715799749, "eval_dim_128_cosine_recall@10": 0.670199414307628, "eval_dim_128_cosine_recall@3": 0.41361037512201926, "eval_dim_128_cosine_recall@5": 0.5034165388369823, "eval_dim_256_cosine_accuracy@1": 0.2169850787895691, "eval_dim_256_cosine_accuracy@10": 0.6794031515827639, "eval_dim_256_cosine_accuracy@3": 0.41486543020499234, "eval_dim_256_cosine_accuracy@5": 0.5027192860131083, "eval_dim_256_cosine_map@100": 0.3555720655683085, "eval_dim_256_cosine_mrr@10": 0.344718033171525, "eval_dim_256_cosine_ndcg@10": 0.4233294657573625, "eval_dim_256_cosine_precision@1": 0.2169850787895691, "eval_dim_256_cosine_precision@10": 0.06794031515827638, "eval_dim_256_cosine_precision@3": 0.13828847673499742, "eval_dim_256_cosine_precision@5": 0.10054385720262167, "eval_dim_256_cosine_recall@1": 0.2169850787895691, "eval_dim_256_cosine_recall@10": 0.6794031515827639, "eval_dim_256_cosine_recall@3": 0.41486543020499234, "eval_dim_256_cosine_recall@5": 0.5027192860131083, "eval_dim_512_cosine_accuracy@1": 0.22102914516803793, "eval_dim_512_cosine_accuracy@10": 0.684562822479431, "eval_dim_512_cosine_accuracy@3": 0.4230930135267048, "eval_dim_512_cosine_accuracy@5": 0.5092734625575234, "eval_dim_512_cosine_map@100": 0.36044654114667407, "eval_dim_512_cosine_mrr@10": 0.3498800282442726, "eval_dim_512_cosine_ndcg@10": 0.4285631830606908, "eval_dim_512_cosine_precision@1": 0.22102914516803793, "eval_dim_512_cosine_precision@10": 0.06845628224794309, "eval_dim_512_cosine_precision@3": 0.14103100450890158, "eval_dim_512_cosine_precision@5": 0.10185469251150465, "eval_dim_512_cosine_recall@1": 0.22102914516803793, "eval_dim_512_cosine_recall@10": 0.684562822479431, "eval_dim_512_cosine_recall@3": 0.4230930135267048, "eval_dim_512_cosine_recall@5": 0.5092734625575234, "eval_dim_64_cosine_accuracy@1": 0.19746199972109887, "eval_dim_64_cosine_accuracy@10": 0.6455166643424906, "eval_dim_64_cosine_accuracy@3": 0.38613861386138615, "eval_dim_64_cosine_accuracy@5": 0.4791521405661693, "eval_dim_64_cosine_map@100": 0.332107485720445, "eval_dim_64_cosine_mrr@10": 0.32084381979445287, "eval_dim_64_cosine_ndcg@10": 0.3971543906124372, "eval_dim_64_cosine_precision@1": 0.19746199972109887, "eval_dim_64_cosine_precision@10": 0.06455166643424905, "eval_dim_64_cosine_precision@3": 0.12871287128712872, "eval_dim_64_cosine_precision@5": 0.09583042811323385, "eval_dim_64_cosine_recall@1": 0.19746199972109887, "eval_dim_64_cosine_recall@10": 0.6455166643424906, "eval_dim_64_cosine_recall@3": 0.38613861386138615, "eval_dim_64_cosine_recall@5": 0.4791521405661693, "eval_dim_768_cosine_accuracy@1": 0.2259099149351555, "eval_dim_768_cosine_accuracy@10": 0.6851206247385302, "eval_dim_768_cosine_accuracy@3": 0.42978664063589456, "eval_dim_768_cosine_accuracy@5": 0.5170826941849115, "eval_dim_768_cosine_map@100": 0.3656884435565745, "eval_dim_768_cosine_mrr@10": 0.3550936532285014, "eval_dim_768_cosine_ndcg@10": 0.43283429692560266, "eval_dim_768_cosine_precision@1": 0.2259099149351555, "eval_dim_768_cosine_precision@10": 0.06851206247385301, "eval_dim_768_cosine_precision@3": 0.1432622135452982, "eval_dim_768_cosine_precision@5": 0.10341653883698228, "eval_dim_768_cosine_recall@1": 0.2259099149351555, "eval_dim_768_cosine_recall@10": 0.6851206247385302, "eval_dim_768_cosine_recall@3": 0.42978664063589456, "eval_dim_768_cosine_recall@5": 0.5170826941849115, "eval_loss": 0.38568806648254395, "eval_runtime": 5135.457, "eval_samples_per_second": 1.397, "eval_sequential_score": 0.3971543906124372, "eval_steps_per_second": 0.013, "step": 46 }, { "epoch": 2.0, "grad_norm": 3.2190756797790527, "learning_rate": 2.9078090590344733e-05, "loss": 0.2932926675547724, "step": 92 }, { "epoch": 2.0, "eval_dim_128_cosine_accuracy@1": 0.3525310277506624, "eval_dim_128_cosine_accuracy@10": 0.7081299679263701, "eval_dim_128_cosine_accuracy@3": 0.47273741458652907, "eval_dim_128_cosine_accuracy@5": 0.5437177520568959, "eval_dim_128_cosine_map@100": 0.45170521683593867, "eval_dim_128_cosine_mrr@10": 0.4414466446644676, "eval_dim_128_cosine_ndcg@10": 0.5032435031696283, "eval_dim_128_cosine_precision@1": 0.3525310277506624, "eval_dim_128_cosine_precision@10": 0.070812996792637, "eval_dim_128_cosine_precision@3": 0.1575791381955097, "eval_dim_128_cosine_precision@5": 0.10874355041137916, "eval_dim_128_cosine_recall@1": 0.3525310277506624, "eval_dim_128_cosine_recall@10": 0.7081299679263701, "eval_dim_128_cosine_recall@3": 0.47273741458652907, "eval_dim_128_cosine_recall@5": 0.5437177520568959, "eval_dim_256_cosine_accuracy@1": 0.3576906986473295, "eval_dim_256_cosine_accuracy@10": 0.7170548040719565, "eval_dim_256_cosine_accuracy@3": 0.4801282945195928, "eval_dim_256_cosine_accuracy@5": 0.5580811602286989, "eval_dim_256_cosine_map@100": 0.45920859350066756, "eval_dim_256_cosine_mrr@10": 0.4493326294400066, "eval_dim_256_cosine_ndcg@10": 0.5115508657442693, "eval_dim_256_cosine_precision@1": 0.3576906986473295, "eval_dim_256_cosine_precision@10": 0.07170548040719565, "eval_dim_256_cosine_precision@3": 0.16004276483986424, "eval_dim_256_cosine_precision@5": 0.11161623204573978, "eval_dim_256_cosine_recall@1": 0.3576906986473295, "eval_dim_256_cosine_recall@10": 0.7170548040719565, "eval_dim_256_cosine_recall@3": 0.4801282945195928, "eval_dim_256_cosine_recall@5": 0.5580811602286989, "eval_dim_512_cosine_accuracy@1": 0.3614558638962488, "eval_dim_512_cosine_accuracy@10": 0.7205410681913261, "eval_dim_512_cosine_accuracy@3": 0.4808255473434667, "eval_dim_512_cosine_accuracy@5": 0.5544554455445545, "eval_dim_512_cosine_map@100": 0.46086139918174507, "eval_dim_512_cosine_mrr@10": 0.45110962806542504, "eval_dim_512_cosine_ndcg@10": 0.5136290484927198, "eval_dim_512_cosine_precision@1": 0.3614558638962488, "eval_dim_512_cosine_precision@10": 0.07205410681913263, "eval_dim_512_cosine_precision@3": 0.16027518244782224, "eval_dim_512_cosine_precision@5": 0.11089108910891088, "eval_dim_512_cosine_recall@1": 0.3614558638962488, "eval_dim_512_cosine_recall@10": 0.7205410681913261, "eval_dim_512_cosine_recall@3": 0.4808255473434667, "eval_dim_512_cosine_recall@5": 0.5544554455445545, "eval_dim_64_cosine_accuracy@1": 0.34527959838237343, "eval_dim_64_cosine_accuracy@10": 0.6934876586250174, "eval_dim_64_cosine_accuracy@3": 0.4613024682749965, "eval_dim_64_cosine_accuracy@5": 0.5328406080044624, "eval_dim_64_cosine_map@100": 0.4429389789139068, "eval_dim_64_cosine_mrr@10": 0.43229392858802956, "eval_dim_64_cosine_ndcg@10": 0.49282395499966114, "eval_dim_64_cosine_precision@1": 0.34527959838237343, "eval_dim_64_cosine_precision@10": 0.06934876586250174, "eval_dim_64_cosine_precision@3": 0.15376748942499882, "eval_dim_64_cosine_precision@5": 0.10656812160089248, "eval_dim_64_cosine_recall@1": 0.34527959838237343, "eval_dim_64_cosine_recall@10": 0.6934876586250174, "eval_dim_64_cosine_recall@3": 0.4613024682749965, "eval_dim_64_cosine_recall@5": 0.5328406080044624, "eval_dim_768_cosine_accuracy@1": 0.36480267745084366, "eval_dim_768_cosine_accuracy@10": 0.7151024961651095, "eval_dim_768_cosine_accuracy@3": 0.48333565750941293, "eval_dim_768_cosine_accuracy@5": 0.5564077534514015, "eval_dim_768_cosine_map@100": 0.4630869213805269, "eval_dim_768_cosine_mrr@10": 0.45277440218871223, "eval_dim_768_cosine_ndcg@10": 0.5136830862608868, "eval_dim_768_cosine_precision@1": 0.36480267745084366, "eval_dim_768_cosine_precision@10": 0.07151024961651094, "eval_dim_768_cosine_precision@3": 0.16111188583647096, "eval_dim_768_cosine_precision@5": 0.11128155069028028, "eval_dim_768_cosine_recall@1": 0.36480267745084366, "eval_dim_768_cosine_recall@10": 0.7151024961651095, "eval_dim_768_cosine_recall@3": 0.48333565750941293, "eval_dim_768_cosine_recall@5": 0.5564077534514015, "eval_loss": 0.19174915552139282, "eval_runtime": 5125.6155, "eval_samples_per_second": 1.399, "eval_sequential_score": 0.49282395499966114, "eval_steps_per_second": 0.013, "step": 92 }, { "epoch": 3.0, "grad_norm": 1.435935139656067, "learning_rate": 2.5281411335025595e-05, "loss": 0.1528056186178456, "step": 138 }, { "epoch": 3.0, "eval_dim_128_cosine_accuracy@1": 0.3654999302747176, "eval_dim_128_cosine_accuracy@10": 0.7250034862641194, "eval_dim_128_cosine_accuracy@3": 0.4868219216287826, "eval_dim_128_cosine_accuracy@5": 0.5612885232185191, "eval_dim_128_cosine_map@100": 0.46579021075100474, "eval_dim_128_cosine_mrr@10": 0.4560526525489593, "eval_dim_128_cosine_ndcg@10": 0.5184617012059481, "eval_dim_128_cosine_precision@1": 0.3654999302747176, "eval_dim_128_cosine_precision@10": 0.07250034862641193, "eval_dim_128_cosine_precision@3": 0.16227397387626086, "eval_dim_128_cosine_precision@5": 0.11225770464370378, "eval_dim_128_cosine_recall@1": 0.3654999302747176, "eval_dim_128_cosine_recall@10": 0.7250034862641194, "eval_dim_128_cosine_recall@3": 0.4868219216287826, "eval_dim_128_cosine_recall@5": 0.5612885232185191, "eval_dim_256_cosine_accuracy@1": 0.36508157858039325, "eval_dim_256_cosine_accuracy@10": 0.7268163436061916, "eval_dim_256_cosine_accuracy@3": 0.48654302049923304, "eval_dim_256_cosine_accuracy@5": 0.5640775345140148, "eval_dim_256_cosine_map@100": 0.4656359359210204, "eval_dim_256_cosine_mrr@10": 0.4559058420932647, "eval_dim_256_cosine_ndcg@10": 0.5187585343172242, "eval_dim_256_cosine_precision@1": 0.36508157858039325, "eval_dim_256_cosine_precision@10": 0.07268163436061917, "eval_dim_256_cosine_precision@3": 0.16218100683307768, "eval_dim_256_cosine_precision@5": 0.11281550690280295, "eval_dim_256_cosine_recall@1": 0.36508157858039325, "eval_dim_256_cosine_recall@10": 0.7268163436061916, "eval_dim_256_cosine_recall@3": 0.48654302049923304, "eval_dim_256_cosine_recall@5": 0.5640775345140148, "eval_dim_512_cosine_accuracy@1": 0.37303026077255613, "eval_dim_512_cosine_accuracy@10": 0.7275135964300655, "eval_dim_512_cosine_accuracy@3": 0.49532840608004464, "eval_dim_512_cosine_accuracy@5": 0.5689583042811324, "eval_dim_512_cosine_map@100": 0.47218646353528615, "eval_dim_512_cosine_mrr@10": 0.46250495270855235, "eval_dim_512_cosine_ndcg@10": 0.5240494016636663, "eval_dim_512_cosine_precision@1": 0.37303026077255613, "eval_dim_512_cosine_precision@10": 0.07275135964300654, "eval_dim_512_cosine_precision@3": 0.16510946869334822, "eval_dim_512_cosine_precision@5": 0.11379166085622647, "eval_dim_512_cosine_recall@1": 0.37303026077255613, "eval_dim_512_cosine_recall@10": 0.7275135964300655, "eval_dim_512_cosine_recall@3": 0.49532840608004464, "eval_dim_512_cosine_recall@5": 0.5689583042811324, "eval_dim_64_cosine_accuracy@1": 0.354762236787059, "eval_dim_64_cosine_accuracy@10": 0.7084088690559197, "eval_dim_64_cosine_accuracy@3": 0.4685538976432855, "eval_dim_64_cosine_accuracy@5": 0.5463673127876167, "eval_dim_64_cosine_map@100": 0.4528341447756788, "eval_dim_64_cosine_mrr@10": 0.4426069619034351, "eval_dim_64_cosine_ndcg@10": 0.5042049654509246, "eval_dim_64_cosine_precision@1": 0.354762236787059, "eval_dim_64_cosine_precision@10": 0.07084088690559197, "eval_dim_64_cosine_precision@3": 0.15618463254776183, "eval_dim_64_cosine_precision@5": 0.10927346255752335, "eval_dim_64_cosine_recall@1": 0.354762236787059, "eval_dim_64_cosine_recall@10": 0.7084088690559197, "eval_dim_64_cosine_recall@3": 0.4685538976432855, "eval_dim_64_cosine_recall@5": 0.5463673127876167, "eval_dim_768_cosine_accuracy@1": 0.3689861943940873, "eval_dim_768_cosine_accuracy@10": 0.730442058290336, "eval_dim_768_cosine_accuracy@3": 0.4868219216287826, "eval_dim_768_cosine_accuracy@5": 0.5649142379026635, "eval_dim_768_cosine_map@100": 0.46810281327770226, "eval_dim_768_cosine_mrr@10": 0.45858146679859224, "eval_dim_768_cosine_ndcg@10": 0.5215840208749241, "eval_dim_768_cosine_precision@1": 0.3689861943940873, "eval_dim_768_cosine_precision@10": 0.0730442058290336, "eval_dim_768_cosine_precision@3": 0.16227397387626086, "eval_dim_768_cosine_precision@5": 0.11298284758053269, "eval_dim_768_cosine_recall@1": 0.3689861943940873, "eval_dim_768_cosine_recall@10": 0.730442058290336, "eval_dim_768_cosine_recall@3": 0.4868219216287826, "eval_dim_768_cosine_recall@5": 0.5649142379026635, "eval_loss": 0.15238769352436066, "eval_runtime": 5128.0318, "eval_samples_per_second": 1.399, "eval_sequential_score": 0.5042049654509246, "eval_steps_per_second": 0.013, "step": 138 }, { "epoch": 4.0, "grad_norm": 1.2450511455535889, "learning_rate": 1.9318122786371193e-05, "loss": 0.11104167026022206, "step": 184 }, { "epoch": 4.0, "eval_dim_128_cosine_accuracy@1": 0.36619718309859156, "eval_dim_128_cosine_accuracy@10": 0.7255612885232186, "eval_dim_128_cosine_accuracy@3": 0.4857063171105843, "eval_dim_128_cosine_accuracy@5": 0.5624041277367173, "eval_dim_128_cosine_map@100": 0.46578169439240885, "eval_dim_128_cosine_mrr@10": 0.45578664506289746, "eval_dim_128_cosine_ndcg@10": 0.5183564003053771, "eval_dim_128_cosine_precision@1": 0.36619718309859156, "eval_dim_128_cosine_precision@10": 0.07255612885232185, "eval_dim_128_cosine_precision@3": 0.1619021057035281, "eval_dim_128_cosine_precision@5": 0.11248082554734345, "eval_dim_128_cosine_recall@1": 0.36619718309859156, "eval_dim_128_cosine_recall@10": 0.7255612885232186, "eval_dim_128_cosine_recall@3": 0.4857063171105843, "eval_dim_128_cosine_recall@5": 0.5624041277367173, "eval_dim_256_cosine_accuracy@1": 0.36619718309859156, "eval_dim_256_cosine_accuracy@10": 0.7269557941709663, "eval_dim_256_cosine_accuracy@3": 0.4847301631571608, "eval_dim_256_cosine_accuracy@5": 0.5621252266071678, "eval_dim_256_cosine_map@100": 0.4659867486137855, "eval_dim_256_cosine_mrr@10": 0.4562129653609242, "eval_dim_256_cosine_ndcg@10": 0.5189825183389356, "eval_dim_256_cosine_precision@1": 0.36619718309859156, "eval_dim_256_cosine_precision@10": 0.07269557941709663, "eval_dim_256_cosine_precision@3": 0.1615767210523869, "eval_dim_256_cosine_precision@5": 0.11242504532143355, "eval_dim_256_cosine_recall@1": 0.36619718309859156, "eval_dim_256_cosine_recall@10": 0.7269557941709663, "eval_dim_256_cosine_recall@3": 0.4847301631571608, "eval_dim_256_cosine_recall@5": 0.5621252266071678, "eval_dim_512_cosine_accuracy@1": 0.3657788314042672, "eval_dim_512_cosine_accuracy@10": 0.7297448054664621, "eval_dim_512_cosine_accuracy@3": 0.48710082275833216, "eval_dim_512_cosine_accuracy@5": 0.561985776042393, "eval_dim_512_cosine_map@100": 0.4664999147434714, "eval_dim_512_cosine_mrr@10": 0.45684652469271236, "eval_dim_512_cosine_ndcg@10": 0.5201161952598219, "eval_dim_512_cosine_precision@1": 0.3657788314042672, "eval_dim_512_cosine_precision@10": 0.07297448054664621, "eval_dim_512_cosine_precision@3": 0.1623669409194441, "eval_dim_512_cosine_precision@5": 0.11239715520847858, "eval_dim_512_cosine_recall@1": 0.3657788314042672, "eval_dim_512_cosine_recall@10": 0.7297448054664621, "eval_dim_512_cosine_recall@3": 0.48710082275833216, "eval_dim_512_cosine_recall@5": 0.561985776042393, "eval_dim_64_cosine_accuracy@1": 0.3514154232324641, "eval_dim_64_cosine_accuracy@10": 0.7138474410821364, "eval_dim_64_cosine_accuracy@3": 0.4725979640217543, "eval_dim_64_cosine_accuracy@5": 0.5434388509273462, "eval_dim_64_cosine_map@100": 0.4521933044833062, "eval_dim_64_cosine_mrr@10": 0.4420436258917645, "eval_dim_64_cosine_ndcg@10": 0.5050332106839454, "eval_dim_64_cosine_precision@1": 0.3514154232324641, "eval_dim_64_cosine_precision@10": 0.07138474410821363, "eval_dim_64_cosine_precision@3": 0.15753265467391808, "eval_dim_64_cosine_precision@5": 0.10868777018546923, "eval_dim_64_cosine_recall@1": 0.3514154232324641, "eval_dim_64_cosine_recall@10": 0.7138474410821364, "eval_dim_64_cosine_recall@3": 0.4725979640217543, "eval_dim_64_cosine_recall@5": 0.5434388509273462, "eval_dim_768_cosine_accuracy@1": 0.3723330079486822, "eval_dim_768_cosine_accuracy@10": 0.7312787616789848, "eval_dim_768_cosine_accuracy@3": 0.4951889555152698, "eval_dim_768_cosine_accuracy@5": 0.570492260493655, "eval_dim_768_cosine_map@100": 0.47234757553193524, "eval_dim_768_cosine_mrr@10": 0.46258391935773235, "eval_dim_768_cosine_ndcg@10": 0.5249716949025338, "eval_dim_768_cosine_precision@1": 0.3723330079486822, "eval_dim_768_cosine_precision@10": 0.07312787616789848, "eval_dim_768_cosine_precision@3": 0.1650629851717566, "eval_dim_768_cosine_precision@5": 0.11409845209873098, "eval_dim_768_cosine_recall@1": 0.3723330079486822, "eval_dim_768_cosine_recall@10": 0.7312787616789848, "eval_dim_768_cosine_recall@3": 0.4951889555152698, "eval_dim_768_cosine_recall@5": 0.570492260493655, "eval_loss": 0.13917988538742065, "eval_runtime": 5127.8405, "eval_samples_per_second": 1.399, "eval_sequential_score": 0.5050332106839454, "eval_steps_per_second": 0.013, "step": 184 }, { "epoch": 5.0, "grad_norm": 1.0852317810058594, "learning_rate": 1.2444873080259475e-05, "loss": 0.08755316941634468, "step": 230 }, { "epoch": 5.0, "eval_dim_128_cosine_accuracy@1": 0.36926509552363684, "eval_dim_128_cosine_accuracy@10": 0.7303026077255613, "eval_dim_128_cosine_accuracy@3": 0.4847301631571608, "eval_dim_128_cosine_accuracy@5": 0.559336215311672, "eval_dim_128_cosine_map@100": 0.46751231421013056, "eval_dim_128_cosine_mrr@10": 0.4577717127849617, "eval_dim_128_cosine_ndcg@10": 0.5208050058986332, "eval_dim_128_cosine_precision@1": 0.36926509552363684, "eval_dim_128_cosine_precision@10": 0.07303026077255612, "eval_dim_128_cosine_precision@3": 0.1615767210523869, "eval_dim_128_cosine_precision@5": 0.11186724306233439, "eval_dim_128_cosine_recall@1": 0.36926509552363684, "eval_dim_128_cosine_recall@10": 0.7303026077255613, "eval_dim_128_cosine_recall@3": 0.4847301631571608, "eval_dim_128_cosine_recall@5": 0.559336215311672, "eval_dim_256_cosine_accuracy@1": 0.36745223818156464, "eval_dim_256_cosine_accuracy@10": 0.7289081020778134, "eval_dim_256_cosine_accuracy@3": 0.4869613721935574, "eval_dim_256_cosine_accuracy@5": 0.5610096220889694, "eval_dim_256_cosine_map@100": 0.4672783873145468, "eval_dim_256_cosine_mrr@10": 0.45734638856239845, "eval_dim_256_cosine_ndcg@10": 0.5202779962822549, "eval_dim_256_cosine_precision@1": 0.36745223818156464, "eval_dim_256_cosine_precision@10": 0.07289081020778135, "eval_dim_256_cosine_precision@3": 0.16232045739785247, "eval_dim_256_cosine_precision@5": 0.11220192441779388, "eval_dim_256_cosine_recall@1": 0.36745223818156464, "eval_dim_256_cosine_recall@10": 0.7289081020778134, "eval_dim_256_cosine_recall@3": 0.4869613721935574, "eval_dim_256_cosine_recall@5": 0.5610096220889694, "eval_dim_512_cosine_accuracy@1": 0.3733091619021057, "eval_dim_512_cosine_accuracy@10": 0.7358806303165528, "eval_dim_512_cosine_accuracy@3": 0.49714126342211684, "eval_dim_512_cosine_accuracy@5": 0.5745363268721239, "eval_dim_512_cosine_map@100": 0.47438800022814137, "eval_dim_512_cosine_mrr@10": 0.4649837971724749, "eval_dim_512_cosine_ndcg@10": 0.5278640631789735, "eval_dim_512_cosine_precision@1": 0.3733091619021057, "eval_dim_512_cosine_precision@10": 0.07358806303165527, "eval_dim_512_cosine_precision@3": 0.16571375447403894, "eval_dim_512_cosine_precision@5": 0.11490726537442476, "eval_dim_512_cosine_recall@1": 0.3733091619021057, "eval_dim_512_cosine_recall@10": 0.7358806303165528, "eval_dim_512_cosine_recall@3": 0.49714126342211684, "eval_dim_512_cosine_recall@5": 0.5745363268721239, "eval_dim_64_cosine_accuracy@1": 0.35601729187003206, "eval_dim_64_cosine_accuracy@10": 0.7197043648026774, "eval_dim_64_cosine_accuracy@3": 0.46980895272625856, "eval_dim_64_cosine_accuracy@5": 0.5460884116580672, "eval_dim_64_cosine_map@100": 0.45478631984265544, "eval_dim_64_cosine_mrr@10": 0.4447587062529192, "eval_dim_64_cosine_ndcg@10": 0.5083136533419047, "eval_dim_64_cosine_precision@1": 0.35601729187003206, "eval_dim_64_cosine_precision@10": 0.07197043648026773, "eval_dim_64_cosine_precision@3": 0.15660298424208619, "eval_dim_64_cosine_precision@5": 0.10921768233161343, "eval_dim_64_cosine_recall@1": 0.35601729187003206, "eval_dim_64_cosine_recall@10": 0.7197043648026774, "eval_dim_64_cosine_recall@3": 0.46980895272625856, "eval_dim_64_cosine_recall@5": 0.5460884116580672, "eval_dim_768_cosine_accuracy@1": 0.36954399665318644, "eval_dim_768_cosine_accuracy@10": 0.7333705201506067, "eval_dim_768_cosine_accuracy@3": 0.4882164272765305, "eval_dim_768_cosine_accuracy@5": 0.5631013805605912, "eval_dim_768_cosine_map@100": 0.4692423416522091, "eval_dim_768_cosine_mrr@10": 0.45940809875756333, "eval_dim_768_cosine_ndcg@10": 0.5228646825758372, "eval_dim_768_cosine_precision@1": 0.36954399665318644, "eval_dim_768_cosine_precision@10": 0.07333705201506065, "eval_dim_768_cosine_precision@3": 0.16273880909217683, "eval_dim_768_cosine_precision@5": 0.11262027611211824, "eval_dim_768_cosine_recall@1": 0.36954399665318644, "eval_dim_768_cosine_recall@10": 0.7333705201506067, "eval_dim_768_cosine_recall@3": 0.4882164272765305, "eval_dim_768_cosine_recall@5": 0.5631013805605912, "eval_loss": 0.1289709359407425, "eval_runtime": 5133.7392, "eval_samples_per_second": 1.397, "eval_sequential_score": 0.5083136533419047, "eval_steps_per_second": 0.013, "step": 230 }, { "epoch": 6.0, "grad_norm": 1.0251929759979248, "learning_rate": 6.11006712953975e-06, "loss": 0.07780430109604546, "step": 276 }, { "epoch": 6.0, "eval_dim_128_cosine_accuracy@1": 0.36424487519174453, "eval_dim_128_cosine_accuracy@10": 0.729187003207363, "eval_dim_128_cosine_accuracy@3": 0.48333565750941293, "eval_dim_128_cosine_accuracy@5": 0.5603123692650955, "eval_dim_128_cosine_map@100": 0.4650707051757783, "eval_dim_128_cosine_mrr@10": 0.4552743855874532, "eval_dim_128_cosine_ndcg@10": 0.5187763857272285, "eval_dim_128_cosine_precision@1": 0.36424487519174453, "eval_dim_128_cosine_precision@10": 0.0729187003207363, "eval_dim_128_cosine_precision@3": 0.16111188583647096, "eval_dim_128_cosine_precision@5": 0.1120624738530191, "eval_dim_128_cosine_recall@1": 0.36424487519174453, "eval_dim_128_cosine_recall@10": 0.729187003207363, "eval_dim_128_cosine_recall@3": 0.48333565750941293, "eval_dim_128_cosine_recall@5": 0.5603123692650955, "eval_dim_256_cosine_accuracy@1": 0.37163575512480823, "eval_dim_256_cosine_accuracy@10": 0.7309998605494352, "eval_dim_256_cosine_accuracy@3": 0.49295774647887325, "eval_dim_256_cosine_accuracy@5": 0.5681216008924836, "eval_dim_256_cosine_map@100": 0.4718786149307726, "eval_dim_256_cosine_mrr@10": 0.46190055625280924, "eval_dim_256_cosine_ndcg@10": 0.524333456443, "eval_dim_256_cosine_precision@1": 0.37163575512480823, "eval_dim_256_cosine_precision@10": 0.07309998605494351, "eval_dim_256_cosine_precision@3": 0.16431924882629106, "eval_dim_256_cosine_precision@5": 0.11362432017849672, "eval_dim_256_cosine_recall@1": 0.37163575512480823, "eval_dim_256_cosine_recall@10": 0.7309998605494352, "eval_dim_256_cosine_recall@3": 0.49295774647887325, "eval_dim_256_cosine_recall@5": 0.5681216008924836, "eval_dim_512_cosine_accuracy@1": 0.3719146562543578, "eval_dim_512_cosine_accuracy@10": 0.7375540370938503, "eval_dim_512_cosine_accuracy@3": 0.4942128015618463, "eval_dim_512_cosine_accuracy@5": 0.5781620415562683, "eval_dim_512_cosine_map@100": 0.4735810945322351, "eval_dim_512_cosine_mrr@10": 0.4641111797296452, "eval_dim_512_cosine_ndcg@10": 0.5276527222739883, "eval_dim_512_cosine_precision@1": 0.3719146562543578, "eval_dim_512_cosine_precision@10": 0.07375540370938502, "eval_dim_512_cosine_precision@3": 0.16473760052061545, "eval_dim_512_cosine_precision@5": 0.11563240831125365, "eval_dim_512_cosine_recall@1": 0.3719146562543578, "eval_dim_512_cosine_recall@10": 0.7375540370938503, "eval_dim_512_cosine_recall@3": 0.4942128015618463, "eval_dim_512_cosine_recall@5": 0.5781620415562683, "eval_dim_64_cosine_accuracy@1": 0.35852740203597827, "eval_dim_64_cosine_accuracy@10": 0.7167759029424069, "eval_dim_64_cosine_accuracy@3": 0.47413192023427697, "eval_dim_64_cosine_accuracy@5": 0.5512480825547343, "eval_dim_64_cosine_map@100": 0.4579655459168757, "eval_dim_64_cosine_mrr@10": 0.44765800523714366, "eval_dim_64_cosine_ndcg@10": 0.5100114350662204, "eval_dim_64_cosine_precision@1": 0.35852740203597827, "eval_dim_64_cosine_precision@10": 0.07167759029424069, "eval_dim_64_cosine_precision@3": 0.15804397341142565, "eval_dim_64_cosine_precision@5": 0.11024961651094686, "eval_dim_64_cosine_recall@1": 0.35852740203597827, "eval_dim_64_cosine_recall@10": 0.7167759029424069, "eval_dim_64_cosine_recall@3": 0.47413192023427697, "eval_dim_64_cosine_recall@5": 0.5512480825547343, "eval_dim_768_cosine_accuracy@1": 0.37651652489192583, "eval_dim_768_cosine_accuracy@10": 0.7325338167619578, "eval_dim_768_cosine_accuracy@3": 0.4960256589039186, "eval_dim_768_cosine_accuracy@5": 0.5675637986333845, "eval_dim_768_cosine_map@100": 0.47495758740026794, "eval_dim_768_cosine_mrr@10": 0.46505562528083916, "eval_dim_768_cosine_ndcg@10": 0.5270382021795976, "eval_dim_768_cosine_precision@1": 0.37651652489192583, "eval_dim_768_cosine_precision@10": 0.07325338167619579, "eval_dim_768_cosine_precision@3": 0.16534188630130617, "eval_dim_768_cosine_precision@5": 0.11351275972667689, "eval_dim_768_cosine_recall@1": 0.37651652489192583, "eval_dim_768_cosine_recall@10": 0.7325338167619578, "eval_dim_768_cosine_recall@3": 0.4960256589039186, "eval_dim_768_cosine_recall@5": 0.5675637986333845, "eval_loss": 0.12710943818092346, "eval_runtime": 5133.9648, "eval_samples_per_second": 1.397, "eval_sequential_score": 0.5100114350662204, "eval_steps_per_second": 0.013, "step": 276 } ], "logging_steps": 500, "max_steps": 368, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.001 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 104, "trial_name": null, "trial_params": null }