gte-modernbert-base / trainer_state.json
amentaphd's picture
Upload folder using huggingface_hub
44e0275 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.6253776435045317,
"eval_steps": 2000,
"global_step": 42000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04315925766076824,
"grad_norm": 30.676715850830078,
"learning_rate": 2.1579628830384117e-06,
"loss": 0.358,
"step": 500
},
{
"epoch": 0.08631851532153648,
"grad_norm": 19.524194717407227,
"learning_rate": 4.3159257660768235e-06,
"loss": 0.1048,
"step": 1000
},
{
"epoch": 0.1294777729823047,
"grad_norm": 0.00297492160461843,
"learning_rate": 6.473888649115235e-06,
"loss": 0.0827,
"step": 1500
},
{
"epoch": 0.17263703064307295,
"grad_norm": 8.900677680969238,
"learning_rate": 8.631851532153647e-06,
"loss": 0.067,
"step": 2000
},
{
"epoch": 0.17263703064307295,
"eval_cosine_accuracy@1": 0.6191955808734679,
"eval_cosine_accuracy@10": 0.9514931814258588,
"eval_cosine_accuracy@3": 0.8606939409632315,
"eval_cosine_accuracy@5": 0.909891248058001,
"eval_cosine_map@100": 0.748128574680106,
"eval_cosine_mrr@10": 0.7459635876906734,
"eval_cosine_ndcg@10": 0.7968614059582585,
"eval_cosine_precision@1": 0.6191955808734679,
"eval_cosine_precision@10": 0.09514931814258587,
"eval_cosine_precision@3": 0.28689798032107716,
"eval_cosine_precision@5": 0.18197824961160017,
"eval_cosine_recall@1": 0.6191955808734679,
"eval_cosine_recall@10": 0.9514931814258588,
"eval_cosine_recall@3": 0.8606939409632315,
"eval_cosine_recall@5": 0.909891248058001,
"eval_runtime": 468.0233,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 2000
},
{
"epoch": 0.21579628830384118,
"grad_norm": 0.12022869288921356,
"learning_rate": 1.0789814415192059e-05,
"loss": 0.0491,
"step": 2500
},
{
"epoch": 0.2589555459646094,
"grad_norm": 0.07568053156137466,
"learning_rate": 1.294777729823047e-05,
"loss": 0.0831,
"step": 3000
},
{
"epoch": 0.3021148036253776,
"grad_norm": 0.0246192067861557,
"learning_rate": 1.5105740181268884e-05,
"loss": 0.062,
"step": 3500
},
{
"epoch": 0.3452740612861459,
"grad_norm": 0.009853623807430267,
"learning_rate": 1.7263703064307294e-05,
"loss": 0.0657,
"step": 4000
},
{
"epoch": 0.3452740612861459,
"eval_cosine_accuracy@1": 0.6362851717590196,
"eval_cosine_accuracy@10": 0.9523562920766442,
"eval_cosine_accuracy@3": 0.8606939409632315,
"eval_cosine_accuracy@5": 0.9110996029691006,
"eval_cosine_map@100": 0.7589134849598074,
"eval_cosine_mrr@10": 0.756632799848751,
"eval_cosine_ndcg@10": 0.8050365772218437,
"eval_cosine_precision@1": 0.6362851717590196,
"eval_cosine_precision@10": 0.09523562920766442,
"eval_cosine_precision@3": 0.28689798032107716,
"eval_cosine_precision@5": 0.1822199205938201,
"eval_cosine_recall@1": 0.6362851717590196,
"eval_cosine_recall@10": 0.9523562920766442,
"eval_cosine_recall@3": 0.8606939409632315,
"eval_cosine_recall@5": 0.9110996029691006,
"eval_runtime": 467.8258,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 4000
},
{
"epoch": 0.38843331894691413,
"grad_norm": 0.017763391137123108,
"learning_rate": 1.9421665947345706e-05,
"loss": 0.0522,
"step": 4500
},
{
"epoch": 0.43159257660768235,
"grad_norm": 21.21623420715332,
"learning_rate": 1.982446885041485e-05,
"loss": 0.049,
"step": 5000
},
{
"epoch": 0.4747518342684506,
"grad_norm": 0.13613158464431763,
"learning_rate": 1.958467219797612e-05,
"loss": 0.0426,
"step": 5500
},
{
"epoch": 0.5179110919292188,
"grad_norm": 0.1645500212907791,
"learning_rate": 1.9344875545537384e-05,
"loss": 0.0708,
"step": 6000
},
{
"epoch": 0.5179110919292188,
"eval_cosine_accuracy@1": 0.6526842741239427,
"eval_cosine_accuracy@10": 0.9642672190574831,
"eval_cosine_accuracy@3": 0.8865872604867944,
"eval_cosine_accuracy@5": 0.9287070602451234,
"eval_cosine_map@100": 0.7759321604397249,
"eval_cosine_mrr@10": 0.7742270364616298,
"eval_cosine_ndcg@10": 0.8214808830487713,
"eval_cosine_precision@1": 0.6526842741239427,
"eval_cosine_precision@10": 0.0964267219057483,
"eval_cosine_precision@3": 0.2955290868289315,
"eval_cosine_precision@5": 0.1857414120490247,
"eval_cosine_recall@1": 0.6526842741239427,
"eval_cosine_recall@10": 0.9642672190574831,
"eval_cosine_recall@3": 0.8865872604867944,
"eval_cosine_recall@5": 0.9287070602451234,
"eval_runtime": 467.7458,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 6000
},
{
"epoch": 0.561070349589987,
"grad_norm": 0.3336288332939148,
"learning_rate": 1.9105078893098655e-05,
"loss": 0.0236,
"step": 6500
},
{
"epoch": 0.6042296072507553,
"grad_norm": 0.011359921656548977,
"learning_rate": 1.886528224065992e-05,
"loss": 0.024,
"step": 7000
},
{
"epoch": 0.6473888649115235,
"grad_norm": 0.0021573721896857023,
"learning_rate": 1.8625485588221192e-05,
"loss": 0.0256,
"step": 7500
},
{
"epoch": 0.6905481225722918,
"grad_norm": 0.024769997224211693,
"learning_rate": 1.8385688935782457e-05,
"loss": 0.041,
"step": 8000
},
{
"epoch": 0.6905481225722918,
"eval_cosine_accuracy@1": 0.6390471258415329,
"eval_cosine_accuracy@10": 0.9573623338511997,
"eval_cosine_accuracy@3": 0.8693250474710857,
"eval_cosine_accuracy@5": 0.9195580873467979,
"eval_cosine_map@100": 0.7640704294756044,
"eval_cosine_mrr@10": 0.762041421091137,
"eval_cosine_ndcg@10": 0.8104943817099518,
"eval_cosine_precision@1": 0.6390471258415329,
"eval_cosine_precision@10": 0.09573623338511995,
"eval_cosine_precision@3": 0.2897750158236953,
"eval_cosine_precision@5": 0.18391161746935958,
"eval_cosine_recall@1": 0.6390471258415329,
"eval_cosine_recall@10": 0.9573623338511997,
"eval_cosine_recall@3": 0.8693250474710857,
"eval_cosine_recall@5": 0.9195580873467979,
"eval_runtime": 467.5761,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 8000
},
{
"epoch": 0.73370738023306,
"grad_norm": 0.001473304582759738,
"learning_rate": 1.8145892283343725e-05,
"loss": 0.0285,
"step": 8500
},
{
"epoch": 0.7768666378938283,
"grad_norm": 0.002119662007316947,
"learning_rate": 1.7906095630904994e-05,
"loss": 0.0249,
"step": 9000
},
{
"epoch": 0.8200258955545965,
"grad_norm": 0.035019177943468094,
"learning_rate": 1.7666298978466262e-05,
"loss": 0.0368,
"step": 9500
},
{
"epoch": 0.8631851532153647,
"grad_norm": 0.2664908468723297,
"learning_rate": 1.742650232602753e-05,
"loss": 0.0588,
"step": 10000
},
{
"epoch": 0.8631851532153647,
"eval_cosine_accuracy@1": 0.6407733471431037,
"eval_cosine_accuracy@10": 0.9589159330226135,
"eval_cosine_accuracy@3": 0.8734679785948558,
"eval_cosine_accuracy@5": 0.9204211979975833,
"eval_cosine_map@100": 0.7652575174635105,
"eval_cosine_mrr@10": 0.7632412818974197,
"eval_cosine_ndcg@10": 0.811775458664963,
"eval_cosine_precision@1": 0.6407733471431037,
"eval_cosine_precision@10": 0.09589159330226135,
"eval_cosine_precision@3": 0.2911559928649519,
"eval_cosine_precision@5": 0.18408423959951664,
"eval_cosine_recall@1": 0.6407733471431037,
"eval_cosine_recall@10": 0.9589159330226135,
"eval_cosine_recall@3": 0.8734679785948558,
"eval_cosine_recall@5": 0.9204211979975833,
"eval_runtime": 467.8166,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 10000
},
{
"epoch": 0.9063444108761329,
"grad_norm": 0.032082412391901016,
"learning_rate": 1.71867056735888e-05,
"loss": 0.0386,
"step": 10500
},
{
"epoch": 0.9495036685369012,
"grad_norm": 8.98410415649414,
"learning_rate": 1.6946909021150067e-05,
"loss": 0.0456,
"step": 11000
},
{
"epoch": 0.9926629261976694,
"grad_norm": 0.002887778216972947,
"learning_rate": 1.6707112368711332e-05,
"loss": 0.0399,
"step": 11500
},
{
"epoch": 1.0358221838584376,
"grad_norm": 0.039170317351818085,
"learning_rate": 1.6467315716272604e-05,
"loss": 0.0424,
"step": 12000
},
{
"epoch": 1.0358221838584376,
"eval_cosine_accuracy@1": 0.6606248921111687,
"eval_cosine_accuracy@10": 0.9654755739685827,
"eval_cosine_accuracy@3": 0.8808907301916106,
"eval_cosine_accuracy@5": 0.9300880372863801,
"eval_cosine_map@100": 0.7789505370634054,
"eval_cosine_mrr@10": 0.7772537463112309,
"eval_cosine_ndcg@10": 0.8239196088222247,
"eval_cosine_precision@1": 0.6606248921111687,
"eval_cosine_precision@10": 0.09654755739685827,
"eval_cosine_precision@3": 0.2936302433972035,
"eval_cosine_precision@5": 0.186017607457276,
"eval_cosine_recall@1": 0.6606248921111687,
"eval_cosine_recall@10": 0.9654755739685827,
"eval_cosine_recall@3": 0.8808907301916106,
"eval_cosine_recall@5": 0.9300880372863801,
"eval_runtime": 467.7683,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 12000
},
{
"epoch": 1.0789814415192058,
"grad_norm": 0.07316175103187561,
"learning_rate": 1.622751906383387e-05,
"loss": 0.0107,
"step": 12500
},
{
"epoch": 1.122140699179974,
"grad_norm": 0.03618592023849487,
"learning_rate": 1.598772241139514e-05,
"loss": 0.0279,
"step": 13000
},
{
"epoch": 1.1652999568407423,
"grad_norm": 0.023356635123491287,
"learning_rate": 1.5747925758956405e-05,
"loss": 0.0236,
"step": 13500
},
{
"epoch": 1.2084592145015105,
"grad_norm": 0.002293772529810667,
"learning_rate": 1.5508129106517674e-05,
"loss": 0.024,
"step": 14000
},
{
"epoch": 1.2084592145015105,
"eval_cosine_accuracy@1": 0.6506128085620576,
"eval_cosine_accuracy@10": 0.9640945969273261,
"eval_cosine_accuracy@3": 0.8803728638011393,
"eval_cosine_accuracy@5": 0.9266355946832384,
"eval_cosine_map@100": 0.7732572758885798,
"eval_cosine_mrr@10": 0.7715017303313533,
"eval_cosine_ndcg@10": 0.8192838549207232,
"eval_cosine_precision@1": 0.6506128085620576,
"eval_cosine_precision@10": 0.09640945969273261,
"eval_cosine_precision@3": 0.29345762126704644,
"eval_cosine_precision@5": 0.18532711893664763,
"eval_cosine_recall@1": 0.6506128085620576,
"eval_cosine_recall@10": 0.9640945969273261,
"eval_cosine_recall@3": 0.8803728638011393,
"eval_cosine_recall@5": 0.9266355946832384,
"eval_runtime": 467.8783,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 14000
},
{
"epoch": 1.2516184721622787,
"grad_norm": 0.007560160476714373,
"learning_rate": 1.5268332454078942e-05,
"loss": 0.0143,
"step": 14500
},
{
"epoch": 1.2947777298230472,
"grad_norm": 0.004202102776616812,
"learning_rate": 1.5028535801640209e-05,
"loss": 0.0118,
"step": 15000
},
{
"epoch": 1.3379369874838152,
"grad_norm": 0.00022126469411887228,
"learning_rate": 1.4788739149201479e-05,
"loss": 0.0078,
"step": 15500
},
{
"epoch": 1.3810962451445836,
"grad_norm": 0.011956814676523209,
"learning_rate": 1.4548942496762745e-05,
"loss": 0.023,
"step": 16000
},
{
"epoch": 1.3810962451445836,
"eval_cosine_accuracy@1": 0.6533747626445711,
"eval_cosine_accuracy@10": 0.9642672190574831,
"eval_cosine_accuracy@3": 0.8826169514931814,
"eval_cosine_accuracy@5": 0.9302606594165372,
"eval_cosine_map@100": 0.7763076224553367,
"eval_cosine_mrr@10": 0.7745393318153555,
"eval_cosine_ndcg@10": 0.8216976031852626,
"eval_cosine_precision@1": 0.6533747626445711,
"eval_cosine_precision@10": 0.0964267219057483,
"eval_cosine_precision@3": 0.2942056504977271,
"eval_cosine_precision@5": 0.18605213188330738,
"eval_cosine_recall@1": 0.6533747626445711,
"eval_cosine_recall@10": 0.9642672190574831,
"eval_cosine_recall@3": 0.8826169514931814,
"eval_cosine_recall@5": 0.9302606594165372,
"eval_runtime": 467.7532,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 16000
},
{
"epoch": 1.4242555028053516,
"grad_norm": 0.008947977796196938,
"learning_rate": 1.4309145844324015e-05,
"loss": 0.0239,
"step": 16500
},
{
"epoch": 1.46741476046612,
"grad_norm": 0.20168237388134003,
"learning_rate": 1.4069349191885282e-05,
"loss": 0.0335,
"step": 17000
},
{
"epoch": 1.510574018126888,
"grad_norm": 0.003233299357816577,
"learning_rate": 1.3829552539446552e-05,
"loss": 0.0119,
"step": 17500
},
{
"epoch": 1.5537332757876565,
"grad_norm": 0.013063711114227772,
"learning_rate": 1.3589755887007819e-05,
"loss": 0.0411,
"step": 18000
},
{
"epoch": 1.5537332757876565,
"eval_cosine_accuracy@1": 0.6644225789746245,
"eval_cosine_accuracy@10": 0.9680649059209391,
"eval_cosine_accuracy@3": 0.8898670809597791,
"eval_cosine_accuracy@5": 0.9335404798895218,
"eval_cosine_map@100": 0.7848911785594413,
"eval_cosine_mrr@10": 0.7833323743214994,
"eval_cosine_ndcg@10": 0.8292454833247894,
"eval_cosine_precision@1": 0.6644225789746245,
"eval_cosine_precision@10": 0.09680649059209388,
"eval_cosine_precision@3": 0.2966223603199264,
"eval_cosine_precision@5": 0.18670809597790436,
"eval_cosine_recall@1": 0.6644225789746245,
"eval_cosine_recall@10": 0.9680649059209391,
"eval_cosine_recall@3": 0.8898670809597791,
"eval_cosine_recall@5": 0.9335404798895218,
"eval_runtime": 467.9161,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 18000
},
{
"epoch": 1.5968925334484245,
"grad_norm": 3.0231621265411377,
"learning_rate": 1.3349959234569087e-05,
"loss": 0.0168,
"step": 18500
},
{
"epoch": 1.640051791109193,
"grad_norm": 0.08278048038482666,
"learning_rate": 1.3110162582130355e-05,
"loss": 0.0059,
"step": 19000
},
{
"epoch": 1.6832110487699612,
"grad_norm": 0.10015950351953506,
"learning_rate": 1.2870365929691622e-05,
"loss": 0.0234,
"step": 19500
},
{
"epoch": 1.7263703064307294,
"grad_norm": 2.1657984256744385,
"learning_rate": 1.263056927725289e-05,
"loss": 0.0184,
"step": 20000
},
{
"epoch": 1.7263703064307294,
"eval_cosine_accuracy@1": 0.6768513723459347,
"eval_cosine_accuracy@10": 0.969963749352667,
"eval_cosine_accuracy@3": 0.897807698947005,
"eval_cosine_accuracy@5": 0.9369929224926635,
"eval_cosine_map@100": 0.7938770196077543,
"eval_cosine_mrr@10": 0.7923516066188262,
"eval_cosine_ndcg@10": 0.8365875778541227,
"eval_cosine_precision@1": 0.6768513723459347,
"eval_cosine_precision@10": 0.09699637493526668,
"eval_cosine_precision@3": 0.29926923298233504,
"eval_cosine_precision@5": 0.1873985844985327,
"eval_cosine_recall@1": 0.6768513723459347,
"eval_cosine_recall@10": 0.969963749352667,
"eval_cosine_recall@3": 0.897807698947005,
"eval_cosine_recall@5": 0.9369929224926635,
"eval_runtime": 467.8044,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 20000
},
{
"epoch": 1.7695295640914976,
"grad_norm": 1.5666255950927734,
"learning_rate": 1.2390772624814159e-05,
"loss": 0.0128,
"step": 20500
},
{
"epoch": 1.8126888217522659,
"grad_norm": 0.00032274972181767225,
"learning_rate": 1.2150975972375427e-05,
"loss": 0.0166,
"step": 21000
},
{
"epoch": 1.855848079413034,
"grad_norm": 0.051935628056526184,
"learning_rate": 1.1911179319936694e-05,
"loss": 0.0181,
"step": 21500
},
{
"epoch": 1.8990073370738023,
"grad_norm": 0.02546406351029873,
"learning_rate": 1.1671382667497964e-05,
"loss": 0.0148,
"step": 22000
},
{
"epoch": 1.8990073370738023,
"eval_cosine_accuracy@1": 0.6744346625237355,
"eval_cosine_accuracy@10": 0.9697911272225099,
"eval_cosine_accuracy@3": 0.8971172104263767,
"eval_cosine_accuracy@5": 0.9388917659243915,
"eval_cosine_map@100": 0.792274316391964,
"eval_cosine_mrr@10": 0.7907476593261165,
"eval_cosine_ndcg@10": 0.8353359235071491,
"eval_cosine_precision@1": 0.6744346625237355,
"eval_cosine_precision@10": 0.09697911272225099,
"eval_cosine_precision@3": 0.2990390701421256,
"eval_cosine_precision@5": 0.1877783531848783,
"eval_cosine_recall@1": 0.6744346625237355,
"eval_cosine_recall@10": 0.9697911272225099,
"eval_cosine_recall@3": 0.8971172104263767,
"eval_cosine_recall@5": 0.9388917659243915,
"eval_runtime": 467.8952,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 22000
},
{
"epoch": 1.9421665947345705,
"grad_norm": 0.009108115918934345,
"learning_rate": 1.143158601505923e-05,
"loss": 0.0225,
"step": 22500
},
{
"epoch": 1.9853258523953388,
"grad_norm": 0.06883949786424637,
"learning_rate": 1.1191789362620497e-05,
"loss": 0.0158,
"step": 23000
},
{
"epoch": 2.028485110056107,
"grad_norm": 0.00019052527204621583,
"learning_rate": 1.0951992710181767e-05,
"loss": 0.0123,
"step": 23500
},
{
"epoch": 2.071644367716875,
"grad_norm": 0.005655207671225071,
"learning_rate": 1.0712196057743034e-05,
"loss": 0.0173,
"step": 24000
},
{
"epoch": 2.071644367716875,
"eval_cosine_accuracy@1": 0.6718453305713793,
"eval_cosine_accuracy@10": 0.9685827723114103,
"eval_cosine_accuracy@3": 0.8934921456930779,
"eval_cosine_accuracy@5": 0.9383738995339203,
"eval_cosine_map@100": 0.7895192117982024,
"eval_cosine_mrr@10": 0.7879250134946668,
"eval_cosine_ndcg@10": 0.832874525127316,
"eval_cosine_precision@1": 0.6718453305713793,
"eval_cosine_precision@10": 0.09685827723114103,
"eval_cosine_precision@3": 0.297830715231026,
"eval_cosine_precision@5": 0.18767477990678402,
"eval_cosine_recall@1": 0.6718453305713793,
"eval_cosine_recall@10": 0.9685827723114103,
"eval_cosine_recall@3": 0.8934921456930779,
"eval_cosine_recall@5": 0.9383738995339203,
"eval_runtime": 468.4558,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 24000
},
{
"epoch": 2.1148036253776437,
"grad_norm": 0.1119648739695549,
"learning_rate": 1.0472399405304304e-05,
"loss": 0.0167,
"step": 24500
},
{
"epoch": 2.1579628830384117,
"grad_norm": 0.03796195238828659,
"learning_rate": 1.023260275286557e-05,
"loss": 0.0125,
"step": 25000
},
{
"epoch": 2.20112214069918,
"grad_norm": 0.012651159428060055,
"learning_rate": 9.992806100426838e-06,
"loss": 0.013,
"step": 25500
},
{
"epoch": 2.244281398359948,
"grad_norm": 0.0021349990274757147,
"learning_rate": 9.753009447988107e-06,
"loss": 0.0079,
"step": 26000
},
{
"epoch": 2.244281398359948,
"eval_cosine_accuracy@1": 0.669255998619023,
"eval_cosine_accuracy@10": 0.9709994821336095,
"eval_cosine_accuracy@3": 0.8950457448644916,
"eval_cosine_accuracy@5": 0.9390643880545486,
"eval_cosine_map@100": 0.7897457483356454,
"eval_cosine_mrr@10": 0.7882845059308039,
"eval_cosine_ndcg@10": 0.8337888145070348,
"eval_cosine_precision@1": 0.669255998619023,
"eval_cosine_precision@10": 0.09709994821336093,
"eval_cosine_precision@3": 0.29834858162149724,
"eval_cosine_precision@5": 0.18781287761090973,
"eval_cosine_recall@1": 0.669255998619023,
"eval_cosine_recall@10": 0.9709994821336095,
"eval_cosine_recall@3": 0.8950457448644916,
"eval_cosine_recall@5": 0.9390643880545486,
"eval_runtime": 467.762,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 26000
},
{
"epoch": 2.2874406560207166,
"grad_norm": 0.4521012306213379,
"learning_rate": 9.513212795549375e-06,
"loss": 0.007,
"step": 26500
},
{
"epoch": 2.3305999136814846,
"grad_norm": 0.0015283157117664814,
"learning_rate": 9.273416143110643e-06,
"loss": 0.0171,
"step": 27000
},
{
"epoch": 2.373759171342253,
"grad_norm": 0.0033215314615517855,
"learning_rate": 9.033619490671912e-06,
"loss": 0.0058,
"step": 27500
},
{
"epoch": 2.416918429003021,
"grad_norm": 4.302379131317139,
"learning_rate": 8.793822838233178e-06,
"loss": 0.0048,
"step": 28000
},
{
"epoch": 2.416918429003021,
"eval_cosine_accuracy@1": 0.6825479026411186,
"eval_cosine_accuracy@10": 0.9718625927843949,
"eval_cosine_accuracy@3": 0.8993612981184188,
"eval_cosine_accuracy@5": 0.9390643880545486,
"eval_cosine_map@100": 0.7983751737002095,
"eval_cosine_mrr@10": 0.7969948679166703,
"eval_cosine_ndcg@10": 0.8405363983140419,
"eval_cosine_precision@1": 0.6825479026411186,
"eval_cosine_precision@10": 0.09718625927843948,
"eval_cosine_precision@3": 0.2997870993728063,
"eval_cosine_precision@5": 0.18781287761090973,
"eval_cosine_recall@1": 0.6825479026411186,
"eval_cosine_recall@10": 0.9718625927843949,
"eval_cosine_recall@3": 0.8993612981184188,
"eval_cosine_recall@5": 0.9390643880545486,
"eval_runtime": 467.6926,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 28000
},
{
"epoch": 2.4600776866637895,
"grad_norm": 0.001049822778441012,
"learning_rate": 8.554026185794447e-06,
"loss": 0.005,
"step": 28500
},
{
"epoch": 2.5032369443245575,
"grad_norm": 0.0011170560028403997,
"learning_rate": 8.314229533355715e-06,
"loss": 0.0141,
"step": 29000
},
{
"epoch": 2.546396201985326,
"grad_norm": 0.0026090971659868956,
"learning_rate": 8.074432880916982e-06,
"loss": 0.0132,
"step": 29500
},
{
"epoch": 2.5895554596460943,
"grad_norm": 7.936817564768717e-05,
"learning_rate": 7.83463622847825e-06,
"loss": 0.006,
"step": 30000
},
{
"epoch": 2.5895554596460943,
"eval_cosine_accuracy@1": 0.6911790091489729,
"eval_cosine_accuracy@10": 0.9735888140859659,
"eval_cosine_accuracy@3": 0.9092007595373727,
"eval_cosine_accuracy@5": 0.9442430519592612,
"eval_cosine_map@100": 0.8050289389600185,
"eval_cosine_mrr@10": 0.8036913735515502,
"eval_cosine_ndcg@10": 0.8461133955612519,
"eval_cosine_precision@1": 0.6911790091489729,
"eval_cosine_precision@10": 0.09735888140859657,
"eval_cosine_precision@3": 0.3030669198457909,
"eval_cosine_precision@5": 0.18884861039185225,
"eval_cosine_recall@1": 0.6911790091489729,
"eval_cosine_recall@10": 0.9735888140859659,
"eval_cosine_recall@3": 0.9092007595373727,
"eval_cosine_recall@5": 0.9442430519592612,
"eval_runtime": 467.8028,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 30000
},
{
"epoch": 2.6327147173068624,
"grad_norm": 0.014025676064193249,
"learning_rate": 7.5948395760395184e-06,
"loss": 0.0095,
"step": 30500
},
{
"epoch": 2.6758739749676304,
"grad_norm": 0.0240753386169672,
"learning_rate": 7.355042923600787e-06,
"loss": 0.0061,
"step": 31000
},
{
"epoch": 2.719033232628399,
"grad_norm": 0.051389552652835846,
"learning_rate": 7.115246271162055e-06,
"loss": 0.0107,
"step": 31500
},
{
"epoch": 2.7621924902891672,
"grad_norm": 0.0053047193214297295,
"learning_rate": 6.875449618723323e-06,
"loss": 0.0157,
"step": 32000
},
{
"epoch": 2.7621924902891672,
"eval_cosine_accuracy@1": 0.689452787847402,
"eval_cosine_accuracy@10": 0.9723804591748663,
"eval_cosine_accuracy@3": 0.9074745382358018,
"eval_cosine_accuracy@5": 0.9442430519592612,
"eval_cosine_map@100": 0.8041420474637542,
"eval_cosine_mrr@10": 0.8027525694667068,
"eval_cosine_ndcg@10": 0.8451171490975874,
"eval_cosine_precision@1": 0.689452787847402,
"eval_cosine_precision@10": 0.09723804591748661,
"eval_cosine_precision@3": 0.3024915127452673,
"eval_cosine_precision@5": 0.18884861039185225,
"eval_cosine_recall@1": 0.689452787847402,
"eval_cosine_recall@10": 0.9723804591748663,
"eval_cosine_recall@3": 0.9074745382358018,
"eval_cosine_recall@5": 0.9442430519592612,
"eval_runtime": 467.7248,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 32000
},
{
"epoch": 2.8053517479499352,
"grad_norm": 0.005983938928693533,
"learning_rate": 6.635652966284592e-06,
"loss": 0.005,
"step": 32500
},
{
"epoch": 2.8485110056107033,
"grad_norm": 0.006458807270973921,
"learning_rate": 6.395856313845859e-06,
"loss": 0.0087,
"step": 33000
},
{
"epoch": 2.8916702632714717,
"grad_norm": 0.00440911203622818,
"learning_rate": 6.1560596614071276e-06,
"loss": 0.0064,
"step": 33500
},
{
"epoch": 2.93482952093224,
"grad_norm": 0.0034452094696462154,
"learning_rate": 5.916263008968395e-06,
"loss": 0.005,
"step": 34000
},
{
"epoch": 2.93482952093224,
"eval_cosine_accuracy@1": 0.6884170550664596,
"eval_cosine_accuracy@10": 0.9725530813050233,
"eval_cosine_accuracy@3": 0.9083376488865873,
"eval_cosine_accuracy@5": 0.9463145175211463,
"eval_cosine_map@100": 0.8037708008346327,
"eval_cosine_mrr@10": 0.8023887614773162,
"eval_cosine_ndcg@10": 0.8449160090668899,
"eval_cosine_precision@1": 0.6884170550664596,
"eval_cosine_precision@10": 0.0972553081305023,
"eval_cosine_precision@3": 0.30277921629552906,
"eval_cosine_precision@5": 0.18926290350422922,
"eval_cosine_recall@1": 0.6884170550664596,
"eval_cosine_recall@10": 0.9725530813050233,
"eval_cosine_recall@3": 0.9083376488865873,
"eval_cosine_recall@5": 0.9463145175211463,
"eval_runtime": 467.6593,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 34000
},
{
"epoch": 2.977988778593008,
"grad_norm": 1.5224103927612305,
"learning_rate": 5.6764663565296625e-06,
"loss": 0.0115,
"step": 34500
},
{
"epoch": 3.0211480362537766,
"grad_norm": 0.007577585522085428,
"learning_rate": 5.436669704090931e-06,
"loss": 0.0079,
"step": 35000
},
{
"epoch": 3.0643072939145446,
"grad_norm": 0.01359875500202179,
"learning_rate": 5.196873051652199e-06,
"loss": 0.0045,
"step": 35500
},
{
"epoch": 3.107466551575313,
"grad_norm": 0.005014342721551657,
"learning_rate": 4.9570763992134675e-06,
"loss": 0.0029,
"step": 36000
},
{
"epoch": 3.107466551575313,
"eval_cosine_accuracy@1": 0.6875539444156741,
"eval_cosine_accuracy@10": 0.972035214914552,
"eval_cosine_accuracy@3": 0.9067840497151735,
"eval_cosine_accuracy@5": 0.9442430519592612,
"eval_cosine_map@100": 0.8031759037555115,
"eval_cosine_mrr@10": 0.8017571836836468,
"eval_cosine_ndcg@10": 0.8443043752760462,
"eval_cosine_precision@1": 0.6875539444156741,
"eval_cosine_precision@10": 0.09720352149145518,
"eval_cosine_precision@3": 0.3022613499050578,
"eval_cosine_precision@5": 0.18884861039185225,
"eval_cosine_recall@1": 0.6875539444156741,
"eval_cosine_recall@10": 0.972035214914552,
"eval_cosine_recall@3": 0.9067840497151735,
"eval_cosine_recall@5": 0.9442430519592612,
"eval_runtime": 467.7266,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 36000
},
{
"epoch": 3.150625809236081,
"grad_norm": 0.015572451055049896,
"learning_rate": 4.717279746774736e-06,
"loss": 0.0161,
"step": 36500
},
{
"epoch": 3.1937850668968495,
"grad_norm": 0.004311546217650175,
"learning_rate": 4.477483094336003e-06,
"loss": 0.0144,
"step": 37000
},
{
"epoch": 3.2369443245576175,
"grad_norm": 0.0009289888548664749,
"learning_rate": 4.237686441897272e-06,
"loss": 0.0076,
"step": 37500
},
{
"epoch": 3.280103582218386,
"grad_norm": 0.0010557913919910789,
"learning_rate": 3.997889789458539e-06,
"loss": 0.0157,
"step": 38000
},
{
"epoch": 3.280103582218386,
"eval_cosine_accuracy@1": 0.6977386500949422,
"eval_cosine_accuracy@10": 0.9747971689970655,
"eval_cosine_accuracy@3": 0.909891248058001,
"eval_cosine_accuracy@5": 0.9470050060417745,
"eval_cosine_map@100": 0.809749193191093,
"eval_cosine_mrr@10": 0.8084805416498834,
"eval_cosine_ndcg@10": 0.8499994995327701,
"eval_cosine_precision@1": 0.6977386500949422,
"eval_cosine_precision@10": 0.09747971689970651,
"eval_cosine_precision@3": 0.30329708268600036,
"eval_cosine_precision@5": 0.18940100120835487,
"eval_cosine_recall@1": 0.6977386500949422,
"eval_cosine_recall@10": 0.9747971689970655,
"eval_cosine_recall@3": 0.909891248058001,
"eval_cosine_recall@5": 0.9470050060417745,
"eval_runtime": 467.9009,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 38000
},
{
"epoch": 3.323262839879154,
"grad_norm": 0.002490697894245386,
"learning_rate": 3.7580931370198075e-06,
"loss": 0.0039,
"step": 38500
},
{
"epoch": 3.3664220975399224,
"grad_norm": 0.0011037011863663793,
"learning_rate": 3.518296484581076e-06,
"loss": 0.0045,
"step": 39000
},
{
"epoch": 3.4095813552006904,
"grad_norm": 0.008491401560604572,
"learning_rate": 3.2784998321423433e-06,
"loss": 0.0033,
"step": 39500
},
{
"epoch": 3.452740612861459,
"grad_norm": 0.0002366910339333117,
"learning_rate": 3.0387031797036116e-06,
"loss": 0.0064,
"step": 40000
},
{
"epoch": 3.452740612861459,
"eval_cosine_accuracy@1": 0.6832383911617469,
"eval_cosine_accuracy@10": 0.97393405834628,
"eval_cosine_accuracy@3": 0.9062661833247022,
"eval_cosine_accuracy@5": 0.9464871396513033,
"eval_cosine_map@100": 0.8011659555812971,
"eval_cosine_mrr@10": 0.7998895081365299,
"eval_cosine_ndcg@10": 0.8433601615941685,
"eval_cosine_precision@1": 0.6832383911617469,
"eval_cosine_precision@10": 0.097393405834628,
"eval_cosine_precision@3": 0.30208872777490076,
"eval_cosine_precision@5": 0.18929742793026064,
"eval_cosine_recall@1": 0.6832383911617469,
"eval_cosine_recall@10": 0.97393405834628,
"eval_cosine_recall@3": 0.9062661833247022,
"eval_cosine_recall@5": 0.9464871396513033,
"eval_runtime": 467.6658,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 40000
},
{
"epoch": 3.495899870522227,
"grad_norm": 0.0015487176133319736,
"learning_rate": 2.7989065272648796e-06,
"loss": 0.0054,
"step": 40500
},
{
"epoch": 3.5390591281829953,
"grad_norm": 1.1207655668258667,
"learning_rate": 2.559109874826148e-06,
"loss": 0.0061,
"step": 41000
},
{
"epoch": 3.5822183858437633,
"grad_norm": 0.0002378961944486946,
"learning_rate": 2.319313222387416e-06,
"loss": 0.0051,
"step": 41500
},
{
"epoch": 3.6253776435045317,
"grad_norm": 0.0002853251644410193,
"learning_rate": 2.0795165699486837e-06,
"loss": 0.0019,
"step": 42000
},
{
"epoch": 3.6253776435045317,
"eval_cosine_accuracy@1": 0.6910063870188158,
"eval_cosine_accuracy@10": 0.9742793026065941,
"eval_cosine_accuracy@3": 0.9109269808389435,
"eval_cosine_accuracy@5": 0.9461418953909891,
"eval_cosine_map@100": 0.8061197699360279,
"eval_cosine_mrr@10": 0.804833419644399,
"eval_cosine_ndcg@10": 0.8471731447814336,
"eval_cosine_precision@1": 0.6910063870188158,
"eval_cosine_precision@10": 0.09742793026065939,
"eval_cosine_precision@3": 0.30364232694631454,
"eval_cosine_precision@5": 0.18922837907819778,
"eval_cosine_recall@1": 0.6910063870188158,
"eval_cosine_recall@10": 0.9742793026065941,
"eval_cosine_recall@3": 0.9109269808389435,
"eval_cosine_recall@5": 0.9461418953909891,
"eval_runtime": 467.6854,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 42000
}
],
"logging_steps": 500,
"max_steps": 46340,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 2000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}