{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 250, "global_step": 21340, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004686035613870665, "grad_norm": 0.2652921974658966, "learning_rate": 9.999414245548266e-06, "loss": 3.4545, "step": 10 }, { "epoch": 0.00937207122774133, "grad_norm": 0.9133270382881165, "learning_rate": 9.998828491096533e-06, "loss": 3.4005, "step": 20 }, { "epoch": 0.014058106841611996, "grad_norm": 1.7206453084945679, "learning_rate": 9.9982427366448e-06, "loss": 3.2559, "step": 30 }, { "epoch": 0.01874414245548266, "grad_norm": 1.3775238990783691, "learning_rate": 9.997656982193065e-06, "loss": 3.0152, "step": 40 }, { "epoch": 0.023430178069353328, "grad_norm": 1.5043880939483643, "learning_rate": 9.997071227741332e-06, "loss": 2.904, "step": 50 }, { "epoch": 0.028116213683223992, "grad_norm": 1.3289176225662231, "learning_rate": 9.996485473289597e-06, "loss": 2.7989, "step": 60 }, { "epoch": 0.03280224929709466, "grad_norm": 1.5171725749969482, "learning_rate": 9.995899718837864e-06, "loss": 2.7384, "step": 70 }, { "epoch": 0.03748828491096532, "grad_norm": 2.6390421390533447, "learning_rate": 9.99531396438613e-06, "loss": 2.6937, "step": 80 }, { "epoch": 0.04217432052483599, "grad_norm": 2.382201671600342, "learning_rate": 9.994728209934396e-06, "loss": 2.5994, "step": 90 }, { "epoch": 0.046860356138706656, "grad_norm": 3.3837928771972656, "learning_rate": 9.994142455482663e-06, "loss": 2.5344, "step": 100 }, { "epoch": 0.05154639175257732, "grad_norm": 3.612391710281372, "learning_rate": 9.993556701030928e-06, "loss": 2.4489, "step": 110 }, { "epoch": 0.056232427366447985, "grad_norm": 4.58568811416626, "learning_rate": 9.992970946579195e-06, "loss": 2.2835, "step": 120 }, { "epoch": 0.06091846298031865, "grad_norm": 4.746246814727783, "learning_rate": 9.99238519212746e-06, "loss": 2.2496, "step": 130 }, { "epoch": 0.06560449859418932, "grad_norm": 3.7526397705078125, "learning_rate": 9.991799437675727e-06, "loss": 2.0891, "step": 140 }, { "epoch": 0.07029053420805999, "grad_norm": 3.860639810562134, "learning_rate": 9.991213683223994e-06, "loss": 1.988, "step": 150 }, { "epoch": 0.07497656982193064, "grad_norm": 4.522952079772949, "learning_rate": 9.99062792877226e-06, "loss": 1.7019, "step": 160 }, { "epoch": 0.07966260543580131, "grad_norm": 4.140243053436279, "learning_rate": 9.990042174320525e-06, "loss": 1.5405, "step": 170 }, { "epoch": 0.08434864104967198, "grad_norm": 4.114628791809082, "learning_rate": 9.989456419868792e-06, "loss": 1.324, "step": 180 }, { "epoch": 0.08903467666354264, "grad_norm": 4.97472620010376, "learning_rate": 9.988870665417057e-06, "loss": 1.2381, "step": 190 }, { "epoch": 0.09372071227741331, "grad_norm": 4.743481636047363, "learning_rate": 9.988284910965324e-06, "loss": 1.1454, "step": 200 }, { "epoch": 0.09840674789128398, "grad_norm": 4.455767631530762, "learning_rate": 9.98769915651359e-06, "loss": 1.1716, "step": 210 }, { "epoch": 0.10309278350515463, "grad_norm": 4.199008941650391, "learning_rate": 9.987113402061856e-06, "loss": 1.1001, "step": 220 }, { "epoch": 0.1077788191190253, "grad_norm": 4.948373794555664, "learning_rate": 9.986527647610123e-06, "loss": 1.0628, "step": 230 }, { "epoch": 0.11246485473289597, "grad_norm": 4.091549396514893, "learning_rate": 9.985941893158388e-06, "loss": 0.9783, "step": 240 }, { "epoch": 0.11715089034676664, "grad_norm": 3.7287604808807373, "learning_rate": 9.985356138706655e-06, "loss": 1.0084, "step": 250 }, { "epoch": 0.11715089034676664, "eval_loss": 0.15787309408187866, "eval_pearson_cosine": 0.6837765056882574, "eval_pearson_dot": 0.3479516218854144, "eval_pearson_euclidean": 0.662064077308699, "eval_pearson_manhattan": 0.6615032406288108, "eval_runtime": 39.6668, "eval_samples_per_second": 37.815, "eval_spearman_cosine": 0.6994144273095011, "eval_spearman_dot": 0.34419950655316756, "eval_spearman_euclidean": 0.6693727884988351, "eval_spearman_manhattan": 0.66929953399832, "eval_steps_per_second": 37.815, "step": 250 }, { "epoch": 0.1218369259606373, "grad_norm": 4.118359565734863, "learning_rate": 9.984770384254922e-06, "loss": 1.0095, "step": 260 }, { "epoch": 0.12652296157450796, "grad_norm": 4.312327861785889, "learning_rate": 9.984184629803187e-06, "loss": 0.8829, "step": 270 }, { "epoch": 0.13120899718837864, "grad_norm": 4.199273586273193, "learning_rate": 9.983598875351454e-06, "loss": 0.9286, "step": 280 }, { "epoch": 0.1358950328022493, "grad_norm": 4.146886348724365, "learning_rate": 9.98301312089972e-06, "loss": 0.935, "step": 290 }, { "epoch": 0.14058106841611998, "grad_norm": 4.277599811553955, "learning_rate": 9.982427366447985e-06, "loss": 0.9089, "step": 300 }, { "epoch": 0.14526710402999063, "grad_norm": 4.278558731079102, "learning_rate": 9.981841611996253e-06, "loss": 0.8515, "step": 310 }, { "epoch": 0.14995313964386128, "grad_norm": 4.297761917114258, "learning_rate": 9.981255857544518e-06, "loss": 0.9178, "step": 320 }, { "epoch": 0.15463917525773196, "grad_norm": 4.595494747161865, "learning_rate": 9.980670103092784e-06, "loss": 0.8835, "step": 330 }, { "epoch": 0.15932521087160262, "grad_norm": 3.567511558532715, "learning_rate": 9.98008434864105e-06, "loss": 0.8519, "step": 340 }, { "epoch": 0.1640112464854733, "grad_norm": 4.046640872955322, "learning_rate": 9.979498594189316e-06, "loss": 0.8736, "step": 350 }, { "epoch": 0.16869728209934395, "grad_norm": 4.551926136016846, "learning_rate": 9.978912839737583e-06, "loss": 0.8348, "step": 360 }, { "epoch": 0.1733833177132146, "grad_norm": 4.367603778839111, "learning_rate": 9.97832708528585e-06, "loss": 0.7652, "step": 370 }, { "epoch": 0.1780693533270853, "grad_norm": 4.336862564086914, "learning_rate": 9.977741330834115e-06, "loss": 0.8035, "step": 380 }, { "epoch": 0.18275538894095594, "grad_norm": 3.89697265625, "learning_rate": 9.977155576382382e-06, "loss": 0.7218, "step": 390 }, { "epoch": 0.18744142455482662, "grad_norm": 4.190406799316406, "learning_rate": 9.976569821930647e-06, "loss": 0.8163, "step": 400 }, { "epoch": 0.19212746016869728, "grad_norm": 4.282820224761963, "learning_rate": 9.975984067478914e-06, "loss": 0.7987, "step": 410 }, { "epoch": 0.19681349578256796, "grad_norm": 3.7369320392608643, "learning_rate": 9.97539831302718e-06, "loss": 0.8085, "step": 420 }, { "epoch": 0.2014995313964386, "grad_norm": 4.697251319885254, "learning_rate": 9.974812558575446e-06, "loss": 0.82, "step": 430 }, { "epoch": 0.20618556701030927, "grad_norm": 3.3654944896698, "learning_rate": 9.974226804123713e-06, "loss": 0.7689, "step": 440 }, { "epoch": 0.21087160262417995, "grad_norm": 4.78535270690918, "learning_rate": 9.973641049671978e-06, "loss": 0.704, "step": 450 }, { "epoch": 0.2155576382380506, "grad_norm": 3.661980628967285, "learning_rate": 9.973055295220243e-06, "loss": 0.7202, "step": 460 }, { "epoch": 0.22024367385192128, "grad_norm": 3.4837870597839355, "learning_rate": 9.97246954076851e-06, "loss": 0.7008, "step": 470 }, { "epoch": 0.22492970946579194, "grad_norm": 4.407717704772949, "learning_rate": 9.971883786316777e-06, "loss": 0.7182, "step": 480 }, { "epoch": 0.2296157450796626, "grad_norm": 3.2862961292266846, "learning_rate": 9.971298031865042e-06, "loss": 0.7351, "step": 490 }, { "epoch": 0.23430178069353327, "grad_norm": 3.018371343612671, "learning_rate": 9.97071227741331e-06, "loss": 0.7072, "step": 500 }, { "epoch": 0.23430178069353327, "eval_loss": 0.13636364042758942, "eval_pearson_cosine": 0.7226338282187044, "eval_pearson_dot": 0.40017542467791145, "eval_pearson_euclidean": 0.7214409735618545, "eval_pearson_manhattan": 0.7207106754958978, "eval_runtime": 38.9491, "eval_samples_per_second": 38.512, "eval_spearman_cosine": 0.737511621673308, "eval_spearman_dot": 0.39096188952446115, "eval_spearman_euclidean": 0.7270501587145487, "eval_spearman_manhattan": 0.7263223218978012, "eval_steps_per_second": 38.512, "step": 500 }, { "epoch": 0.23898781630740393, "grad_norm": 4.259694576263428, "learning_rate": 9.970126522961575e-06, "loss": 0.7365, "step": 510 }, { "epoch": 0.2436738519212746, "grad_norm": 4.129746437072754, "learning_rate": 9.969540768509842e-06, "loss": 0.7005, "step": 520 }, { "epoch": 0.24835988753514526, "grad_norm": 3.5990495681762695, "learning_rate": 9.968955014058108e-06, "loss": 0.6892, "step": 530 }, { "epoch": 0.2530459231490159, "grad_norm": 4.118338584899902, "learning_rate": 9.968369259606374e-06, "loss": 0.7235, "step": 540 }, { "epoch": 0.25773195876288657, "grad_norm": 3.2775630950927734, "learning_rate": 9.96778350515464e-06, "loss": 0.6567, "step": 550 }, { "epoch": 0.2624179943767573, "grad_norm": 3.7363176345825195, "learning_rate": 9.967197750702906e-06, "loss": 0.6724, "step": 560 }, { "epoch": 0.26710402999062793, "grad_norm": 3.317964792251587, "learning_rate": 9.966611996251173e-06, "loss": 0.6603, "step": 570 }, { "epoch": 0.2717900656044986, "grad_norm": 4.1027703285217285, "learning_rate": 9.966026241799438e-06, "loss": 0.6384, "step": 580 }, { "epoch": 0.27647610121836924, "grad_norm": 3.434147357940674, "learning_rate": 9.965440487347705e-06, "loss": 0.6555, "step": 590 }, { "epoch": 0.28116213683223995, "grad_norm": 3.4407315254211426, "learning_rate": 9.964854732895972e-06, "loss": 0.6695, "step": 600 }, { "epoch": 0.2858481724461106, "grad_norm": 3.800232172012329, "learning_rate": 9.964268978444237e-06, "loss": 0.6727, "step": 610 }, { "epoch": 0.29053420805998126, "grad_norm": 3.191505193710327, "learning_rate": 9.963683223992502e-06, "loss": 0.6513, "step": 620 }, { "epoch": 0.2952202436738519, "grad_norm": 4.152797222137451, "learning_rate": 9.963097469540769e-06, "loss": 0.7115, "step": 630 }, { "epoch": 0.29990627928772257, "grad_norm": 3.339895248413086, "learning_rate": 9.962511715089036e-06, "loss": 0.6515, "step": 640 }, { "epoch": 0.3045923149015933, "grad_norm": 3.2941200733184814, "learning_rate": 9.961925960637301e-06, "loss": 0.6024, "step": 650 }, { "epoch": 0.30927835051546393, "grad_norm": 3.0853567123413086, "learning_rate": 9.961340206185568e-06, "loss": 0.6179, "step": 660 }, { "epoch": 0.3139643861293346, "grad_norm": 3.827177047729492, "learning_rate": 9.960754451733833e-06, "loss": 0.6678, "step": 670 }, { "epoch": 0.31865042174320524, "grad_norm": 3.8214621543884277, "learning_rate": 9.9601686972821e-06, "loss": 0.6248, "step": 680 }, { "epoch": 0.3233364573570759, "grad_norm": 4.431512832641602, "learning_rate": 9.959582942830366e-06, "loss": 0.6562, "step": 690 }, { "epoch": 0.3280224929709466, "grad_norm": 3.046006679534912, "learning_rate": 9.958997188378632e-06, "loss": 0.5865, "step": 700 }, { "epoch": 0.33270852858481725, "grad_norm": 3.6602745056152344, "learning_rate": 9.9584114339269e-06, "loss": 0.611, "step": 710 }, { "epoch": 0.3373945641986879, "grad_norm": 3.815204381942749, "learning_rate": 9.957825679475165e-06, "loss": 0.619, "step": 720 }, { "epoch": 0.34208059981255856, "grad_norm": 3.5464768409729004, "learning_rate": 9.957239925023431e-06, "loss": 0.563, "step": 730 }, { "epoch": 0.3467666354264292, "grad_norm": 3.254077911376953, "learning_rate": 9.956654170571697e-06, "loss": 0.6541, "step": 740 }, { "epoch": 0.3514526710402999, "grad_norm": 3.465792655944824, "learning_rate": 9.956068416119962e-06, "loss": 0.6207, "step": 750 }, { "epoch": 0.3514526710402999, "eval_loss": 0.11937286704778671, "eval_pearson_cosine": 0.7370582778898296, "eval_pearson_dot": 0.4517357649062319, "eval_pearson_euclidean": 0.7300011163349307, "eval_pearson_manhattan": 0.7294921991619532, "eval_runtime": 40.2019, "eval_samples_per_second": 37.312, "eval_spearman_cosine": 0.7509292468775599, "eval_spearman_dot": 0.44621372678317905, "eval_spearman_euclidean": 0.7400885968162902, "eval_spearman_manhattan": 0.7397623919578791, "eval_steps_per_second": 37.312, "step": 750 }, { "epoch": 0.3561387066541706, "grad_norm": 3.754749298095703, "learning_rate": 9.95548266166823e-06, "loss": 0.5565, "step": 760 }, { "epoch": 0.36082474226804123, "grad_norm": 4.001673698425293, "learning_rate": 9.954896907216496e-06, "loss": 0.6017, "step": 770 }, { "epoch": 0.3655107778819119, "grad_norm": 3.2223927974700928, "learning_rate": 9.954311152764761e-06, "loss": 0.6053, "step": 780 }, { "epoch": 0.3701968134957826, "grad_norm": 3.135162353515625, "learning_rate": 9.953725398313028e-06, "loss": 0.5703, "step": 790 }, { "epoch": 0.37488284910965325, "grad_norm": 3.6593549251556396, "learning_rate": 9.953139643861293e-06, "loss": 0.5683, "step": 800 }, { "epoch": 0.3795688847235239, "grad_norm": 3.2421703338623047, "learning_rate": 9.95255388940956e-06, "loss": 0.6306, "step": 810 }, { "epoch": 0.38425492033739456, "grad_norm": 3.2221267223358154, "learning_rate": 9.951968134957827e-06, "loss": 0.5942, "step": 820 }, { "epoch": 0.3889409559512652, "grad_norm": 3.55708646774292, "learning_rate": 9.951382380506092e-06, "loss": 0.5695, "step": 830 }, { "epoch": 0.3936269915651359, "grad_norm": 3.310382843017578, "learning_rate": 9.950796626054359e-06, "loss": 0.6249, "step": 840 }, { "epoch": 0.3983130271790066, "grad_norm": 3.126347303390503, "learning_rate": 9.950210871602624e-06, "loss": 0.5515, "step": 850 }, { "epoch": 0.4029990627928772, "grad_norm": 3.691459894180298, "learning_rate": 9.949625117150891e-06, "loss": 0.6074, "step": 860 }, { "epoch": 0.4076850984067479, "grad_norm": 3.8297834396362305, "learning_rate": 9.949039362699158e-06, "loss": 0.6538, "step": 870 }, { "epoch": 0.41237113402061853, "grad_norm": 3.097637414932251, "learning_rate": 9.948453608247423e-06, "loss": 0.5832, "step": 880 }, { "epoch": 0.41705716963448924, "grad_norm": 4.003639221191406, "learning_rate": 9.94786785379569e-06, "loss": 0.5407, "step": 890 }, { "epoch": 0.4217432052483599, "grad_norm": 3.648155689239502, "learning_rate": 9.947282099343956e-06, "loss": 0.5298, "step": 900 }, { "epoch": 0.42642924086223055, "grad_norm": 3.071843147277832, "learning_rate": 9.94669634489222e-06, "loss": 0.4969, "step": 910 }, { "epoch": 0.4311152764761012, "grad_norm": 3.8248066902160645, "learning_rate": 9.94611059044049e-06, "loss": 0.5894, "step": 920 }, { "epoch": 0.43580131208997186, "grad_norm": 3.3905293941497803, "learning_rate": 9.945524835988755e-06, "loss": 0.5924, "step": 930 }, { "epoch": 0.44048734770384257, "grad_norm": 3.8499722480773926, "learning_rate": 9.94493908153702e-06, "loss": 0.5636, "step": 940 }, { "epoch": 0.4451733833177132, "grad_norm": 3.613633155822754, "learning_rate": 9.944353327085287e-06, "loss": 0.5631, "step": 950 }, { "epoch": 0.4498594189315839, "grad_norm": 2.895521879196167, "learning_rate": 9.943767572633552e-06, "loss": 0.5791, "step": 960 }, { "epoch": 0.45454545454545453, "grad_norm": 3.783475160598755, "learning_rate": 9.943181818181819e-06, "loss": 0.583, "step": 970 }, { "epoch": 0.4592314901593252, "grad_norm": 3.0767059326171875, "learning_rate": 9.942596063730086e-06, "loss": 0.5625, "step": 980 }, { "epoch": 0.4639175257731959, "grad_norm": 3.0430691242218018, "learning_rate": 9.942010309278351e-06, "loss": 0.6107, "step": 990 }, { "epoch": 0.46860356138706655, "grad_norm": 3.4637937545776367, "learning_rate": 9.941424554826618e-06, "loss": 0.5767, "step": 1000 }, { "epoch": 0.46860356138706655, "eval_loss": 0.11473686993122101, "eval_pearson_cosine": 0.7508291149999735, "eval_pearson_dot": 0.5170270771997139, "eval_pearson_euclidean": 0.7400425527843311, "eval_pearson_manhattan": 0.7394957204847508, "eval_runtime": 40.3307, "eval_samples_per_second": 37.192, "eval_spearman_cosine": 0.7636438911546342, "eval_spearman_dot": 0.5181197481922067, "eval_spearman_euclidean": 0.7511102060810355, "eval_spearman_manhattan": 0.750245305109835, "eval_steps_per_second": 37.192, "step": 1000 }, { "epoch": 0.4732895970009372, "grad_norm": 3.62758207321167, "learning_rate": 9.940838800374883e-06, "loss": 0.5533, "step": 1010 }, { "epoch": 0.47797563261480785, "grad_norm": 2.8225605487823486, "learning_rate": 9.94025304592315e-06, "loss": 0.562, "step": 1020 }, { "epoch": 0.48266166822867856, "grad_norm": 3.6451101303100586, "learning_rate": 9.939667291471417e-06, "loss": 0.5854, "step": 1030 }, { "epoch": 0.4873477038425492, "grad_norm": 3.448962450027466, "learning_rate": 9.939081537019682e-06, "loss": 0.6032, "step": 1040 }, { "epoch": 0.49203373945641987, "grad_norm": 3.0601136684417725, "learning_rate": 9.938495782567949e-06, "loss": 0.5636, "step": 1050 }, { "epoch": 0.4967197750702905, "grad_norm": 3.154737710952759, "learning_rate": 9.937910028116214e-06, "loss": 0.5563, "step": 1060 }, { "epoch": 0.5014058106841612, "grad_norm": 3.4425132274627686, "learning_rate": 9.93732427366448e-06, "loss": 0.5956, "step": 1070 }, { "epoch": 0.5060918462980318, "grad_norm": 3.6232352256774902, "learning_rate": 9.936738519212746e-06, "loss": 0.5745, "step": 1080 }, { "epoch": 0.5107778819119025, "grad_norm": 3.701143980026245, "learning_rate": 9.936152764761013e-06, "loss": 0.583, "step": 1090 }, { "epoch": 0.5154639175257731, "grad_norm": 3.6037821769714355, "learning_rate": 9.935567010309279e-06, "loss": 0.5367, "step": 1100 }, { "epoch": 0.5201499531396439, "grad_norm": 3.333034038543701, "learning_rate": 9.934981255857546e-06, "loss": 0.5675, "step": 1110 }, { "epoch": 0.5248359887535146, "grad_norm": 3.335798501968384, "learning_rate": 9.93439550140581e-06, "loss": 0.582, "step": 1120 }, { "epoch": 0.5295220243673852, "grad_norm": 2.90682315826416, "learning_rate": 9.933809746954078e-06, "loss": 0.5866, "step": 1130 }, { "epoch": 0.5342080599812559, "grad_norm": 3.268754482269287, "learning_rate": 9.933223992502345e-06, "loss": 0.5832, "step": 1140 }, { "epoch": 0.5388940955951266, "grad_norm": 3.0026419162750244, "learning_rate": 9.93263823805061e-06, "loss": 0.5303, "step": 1150 }, { "epoch": 0.5435801312089972, "grad_norm": 3.3386096954345703, "learning_rate": 9.932052483598877e-06, "loss": 0.525, "step": 1160 }, { "epoch": 0.5482661668228679, "grad_norm": 4.069448947906494, "learning_rate": 9.931466729147142e-06, "loss": 0.5738, "step": 1170 }, { "epoch": 0.5529522024367385, "grad_norm": 2.9254677295684814, "learning_rate": 9.930880974695409e-06, "loss": 0.5529, "step": 1180 }, { "epoch": 0.5576382380506092, "grad_norm": 3.216367721557617, "learning_rate": 9.930295220243674e-06, "loss": 0.5646, "step": 1190 }, { "epoch": 0.5623242736644799, "grad_norm": 3.7063798904418945, "learning_rate": 9.929709465791941e-06, "loss": 0.5793, "step": 1200 }, { "epoch": 0.5670103092783505, "grad_norm": 3.1322641372680664, "learning_rate": 9.929123711340208e-06, "loss": 0.5415, "step": 1210 }, { "epoch": 0.5716963448922212, "grad_norm": 2.747403621673584, "learning_rate": 9.928537956888473e-06, "loss": 0.5468, "step": 1220 }, { "epoch": 0.5763823805060918, "grad_norm": 3.0426528453826904, "learning_rate": 9.927952202436738e-06, "loss": 0.5755, "step": 1230 }, { "epoch": 0.5810684161199625, "grad_norm": 2.9256389141082764, "learning_rate": 9.927366447985005e-06, "loss": 0.5237, "step": 1240 }, { "epoch": 0.5857544517338332, "grad_norm": 3.5383565425872803, "learning_rate": 9.92678069353327e-06, "loss": 0.5026, "step": 1250 }, { "epoch": 0.5857544517338332, "eval_loss": 0.10466309636831284, "eval_pearson_cosine": 0.7506514546227478, "eval_pearson_dot": 0.5487479231049726, "eval_pearson_euclidean": 0.7459287854123886, "eval_pearson_manhattan": 0.7455307169513361, "eval_runtime": 39.6161, "eval_samples_per_second": 37.863, "eval_spearman_cosine": 0.7634978803745991, "eval_spearman_dot": 0.5530948044816919, "eval_spearman_euclidean": 0.7564382757079575, "eval_spearman_manhattan": 0.7557623143729565, "eval_steps_per_second": 37.863, "step": 1250 }, { "epoch": 0.5904404873477038, "grad_norm": 4.084426403045654, "learning_rate": 9.926194939081537e-06, "loss": 0.5576, "step": 1260 }, { "epoch": 0.5951265229615745, "grad_norm": 3.5379457473754883, "learning_rate": 9.925609184629804e-06, "loss": 0.547, "step": 1270 }, { "epoch": 0.5998125585754451, "grad_norm": 2.8720273971557617, "learning_rate": 9.92502343017807e-06, "loss": 0.5385, "step": 1280 }, { "epoch": 0.6044985941893158, "grad_norm": 2.877453565597534, "learning_rate": 9.924437675726336e-06, "loss": 0.5114, "step": 1290 }, { "epoch": 0.6091846298031866, "grad_norm": 3.771329164505005, "learning_rate": 9.923851921274602e-06, "loss": 0.5133, "step": 1300 }, { "epoch": 0.6138706654170571, "grad_norm": 2.411574125289917, "learning_rate": 9.923266166822869e-06, "loss": 0.5207, "step": 1310 }, { "epoch": 0.6185567010309279, "grad_norm": 3.733808755874634, "learning_rate": 9.922680412371136e-06, "loss": 0.5043, "step": 1320 }, { "epoch": 0.6232427366447985, "grad_norm": 2.5523786544799805, "learning_rate": 9.9220946579194e-06, "loss": 0.5142, "step": 1330 }, { "epoch": 0.6279287722586692, "grad_norm": 2.743069887161255, "learning_rate": 9.921508903467668e-06, "loss": 0.5184, "step": 1340 }, { "epoch": 0.6326148078725399, "grad_norm": 3.086749792098999, "learning_rate": 9.920923149015933e-06, "loss": 0.5135, "step": 1350 }, { "epoch": 0.6373008434864105, "grad_norm": 3.3106496334075928, "learning_rate": 9.9203373945642e-06, "loss": 0.5107, "step": 1360 }, { "epoch": 0.6419868791002812, "grad_norm": 3.2946176528930664, "learning_rate": 9.919751640112467e-06, "loss": 0.5112, "step": 1370 }, { "epoch": 0.6466729147141518, "grad_norm": 3.7036850452423096, "learning_rate": 9.919165885660732e-06, "loss": 0.5112, "step": 1380 }, { "epoch": 0.6513589503280225, "grad_norm": 3.8145229816436768, "learning_rate": 9.918580131208997e-06, "loss": 0.555, "step": 1390 }, { "epoch": 0.6560449859418932, "grad_norm": 3.3321220874786377, "learning_rate": 9.917994376757264e-06, "loss": 0.4917, "step": 1400 }, { "epoch": 0.6607310215557638, "grad_norm": 2.517955780029297, "learning_rate": 9.91740862230553e-06, "loss": 0.527, "step": 1410 }, { "epoch": 0.6654170571696345, "grad_norm": 3.0029234886169434, "learning_rate": 9.916822867853796e-06, "loss": 0.5192, "step": 1420 }, { "epoch": 0.6701030927835051, "grad_norm": 2.7767951488494873, "learning_rate": 9.916237113402063e-06, "loss": 0.5526, "step": 1430 }, { "epoch": 0.6747891283973758, "grad_norm": 3.2969696521759033, "learning_rate": 9.915651358950328e-06, "loss": 0.5444, "step": 1440 }, { "epoch": 0.6794751640112465, "grad_norm": 3.046128034591675, "learning_rate": 9.915065604498595e-06, "loss": 0.479, "step": 1450 }, { "epoch": 0.6841611996251171, "grad_norm": 3.239370822906494, "learning_rate": 9.91447985004686e-06, "loss": 0.4626, "step": 1460 }, { "epoch": 0.6888472352389878, "grad_norm": 3.3885092735290527, "learning_rate": 9.913894095595127e-06, "loss": 0.5115, "step": 1470 }, { "epoch": 0.6935332708528584, "grad_norm": 3.279419422149658, "learning_rate": 9.913308341143394e-06, "loss": 0.5054, "step": 1480 }, { "epoch": 0.6982193064667291, "grad_norm": 3.451601028442383, "learning_rate": 9.91272258669166e-06, "loss": 0.5124, "step": 1490 }, { "epoch": 0.7029053420805998, "grad_norm": 2.9452903270721436, "learning_rate": 9.912136832239926e-06, "loss": 0.5192, "step": 1500 }, { "epoch": 0.7029053420805998, "eval_loss": 0.11661049723625183, "eval_pearson_cosine": 0.7522095786209633, "eval_pearson_dot": 0.5055151019348045, "eval_pearson_euclidean": 0.7488737825239156, "eval_pearson_manhattan": 0.7487340190517244, "eval_runtime": 39.6015, "eval_samples_per_second": 37.877, "eval_spearman_cosine": 0.7673251411045983, "eval_spearman_dot": 0.5052800577910852, "eval_spearman_euclidean": 0.7594003970324229, "eval_spearman_manhattan": 0.7590967348946761, "eval_steps_per_second": 37.877, "step": 1500 }, { "epoch": 0.7075913776944704, "grad_norm": 3.0128912925720215, "learning_rate": 9.911551077788192e-06, "loss": 0.4896, "step": 1510 }, { "epoch": 0.7122774133083412, "grad_norm": 2.955695152282715, "learning_rate": 9.910965323336459e-06, "loss": 0.5161, "step": 1520 }, { "epoch": 0.7169634489222118, "grad_norm": 2.8971164226531982, "learning_rate": 9.910379568884726e-06, "loss": 0.4868, "step": 1530 }, { "epoch": 0.7216494845360825, "grad_norm": 2.9848475456237793, "learning_rate": 9.90979381443299e-06, "loss": 0.4919, "step": 1540 }, { "epoch": 0.7263355201499532, "grad_norm": 3.1218748092651367, "learning_rate": 9.909208059981256e-06, "loss": 0.4954, "step": 1550 }, { "epoch": 0.7310215557638238, "grad_norm": 3.0474631786346436, "learning_rate": 9.908622305529523e-06, "loss": 0.5007, "step": 1560 }, { "epoch": 0.7357075913776945, "grad_norm": 2.8136134147644043, "learning_rate": 9.908036551077788e-06, "loss": 0.4998, "step": 1570 }, { "epoch": 0.7403936269915652, "grad_norm": 2.537595272064209, "learning_rate": 9.907450796626055e-06, "loss": 0.4759, "step": 1580 }, { "epoch": 0.7450796626054358, "grad_norm": 3.2701878547668457, "learning_rate": 9.906865042174322e-06, "loss": 0.5106, "step": 1590 }, { "epoch": 0.7497656982193065, "grad_norm": 3.289370536804199, "learning_rate": 9.906279287722587e-06, "loss": 0.513, "step": 1600 }, { "epoch": 0.7544517338331771, "grad_norm": 3.6762607097625732, "learning_rate": 9.905693533270854e-06, "loss": 0.4854, "step": 1610 }, { "epoch": 0.7591377694470478, "grad_norm": 3.083775281906128, "learning_rate": 9.90510777881912e-06, "loss": 0.4831, "step": 1620 }, { "epoch": 0.7638238050609185, "grad_norm": 3.9659175872802734, "learning_rate": 9.904522024367386e-06, "loss": 0.4634, "step": 1630 }, { "epoch": 0.7685098406747891, "grad_norm": 3.014983654022217, "learning_rate": 9.903936269915653e-06, "loss": 0.4979, "step": 1640 }, { "epoch": 0.7731958762886598, "grad_norm": 2.3927905559539795, "learning_rate": 9.903350515463918e-06, "loss": 0.528, "step": 1650 }, { "epoch": 0.7778819119025304, "grad_norm": 3.1232478618621826, "learning_rate": 9.902764761012185e-06, "loss": 0.5355, "step": 1660 }, { "epoch": 0.7825679475164011, "grad_norm": 3.5926144123077393, "learning_rate": 9.90217900656045e-06, "loss": 0.4849, "step": 1670 }, { "epoch": 0.7872539831302718, "grad_norm": 3.9513533115386963, "learning_rate": 9.901593252108717e-06, "loss": 0.505, "step": 1680 }, { "epoch": 0.7919400187441424, "grad_norm": 2.9392502307891846, "learning_rate": 9.901007497656983e-06, "loss": 0.5358, "step": 1690 }, { "epoch": 0.7966260543580131, "grad_norm": 2.7503395080566406, "learning_rate": 9.90042174320525e-06, "loss": 0.5267, "step": 1700 }, { "epoch": 0.8013120899718837, "grad_norm": 2.9122848510742188, "learning_rate": 9.899835988753515e-06, "loss": 0.508, "step": 1710 }, { "epoch": 0.8059981255857545, "grad_norm": 3.3603107929229736, "learning_rate": 9.899250234301782e-06, "loss": 0.4904, "step": 1720 }, { "epoch": 0.8106841611996252, "grad_norm": 2.9802725315093994, "learning_rate": 9.898664479850047e-06, "loss": 0.5019, "step": 1730 }, { "epoch": 0.8153701968134958, "grad_norm": 3.4380977153778076, "learning_rate": 9.898078725398314e-06, "loss": 0.4894, "step": 1740 }, { "epoch": 0.8200562324273665, "grad_norm": 3.106285572052002, "learning_rate": 9.897492970946579e-06, "loss": 0.5046, "step": 1750 }, { "epoch": 0.8200562324273665, "eval_loss": 0.11102449893951416, "eval_pearson_cosine": 0.7554957932917077, "eval_pearson_dot": 0.5302646386582808, "eval_pearson_euclidean": 0.7581196621481752, "eval_pearson_manhattan": 0.7582295870999474, "eval_runtime": 39.7307, "eval_samples_per_second": 37.754, "eval_spearman_cosine": 0.7675167675754492, "eval_spearman_dot": 0.5391446720291554, "eval_spearman_euclidean": 0.767156398991665, "eval_spearman_manhattan": 0.7675093547587875, "eval_steps_per_second": 37.754, "step": 1750 }, { "epoch": 0.8247422680412371, "grad_norm": 3.348632574081421, "learning_rate": 9.896907216494846e-06, "loss": 0.4758, "step": 1760 }, { "epoch": 0.8294283036551078, "grad_norm": 3.240816831588745, "learning_rate": 9.896321462043113e-06, "loss": 0.4631, "step": 1770 }, { "epoch": 0.8341143392689785, "grad_norm": 3.1542975902557373, "learning_rate": 9.895735707591378e-06, "loss": 0.492, "step": 1780 }, { "epoch": 0.8388003748828491, "grad_norm": 2.405806064605713, "learning_rate": 9.895149953139645e-06, "loss": 0.5083, "step": 1790 }, { "epoch": 0.8434864104967198, "grad_norm": 3.3465261459350586, "learning_rate": 9.89456419868791e-06, "loss": 0.4179, "step": 1800 }, { "epoch": 0.8481724461105904, "grad_norm": 3.382632255554199, "learning_rate": 9.893978444236177e-06, "loss": 0.4982, "step": 1810 }, { "epoch": 0.8528584817244611, "grad_norm": 2.5863983631134033, "learning_rate": 9.893392689784444e-06, "loss": 0.5203, "step": 1820 }, { "epoch": 0.8575445173383318, "grad_norm": 2.830101251602173, "learning_rate": 9.89280693533271e-06, "loss": 0.4997, "step": 1830 }, { "epoch": 0.8622305529522024, "grad_norm": 3.096226453781128, "learning_rate": 9.892221180880976e-06, "loss": 0.493, "step": 1840 }, { "epoch": 0.8669165885660731, "grad_norm": 3.1559505462646484, "learning_rate": 9.891635426429241e-06, "loss": 0.4968, "step": 1850 }, { "epoch": 0.8716026241799437, "grad_norm": 3.1153781414031982, "learning_rate": 9.891049671977507e-06, "loss": 0.5037, "step": 1860 }, { "epoch": 0.8762886597938144, "grad_norm": 3.222080945968628, "learning_rate": 9.890463917525774e-06, "loss": 0.538, "step": 1870 }, { "epoch": 0.8809746954076851, "grad_norm": 2.716965436935425, "learning_rate": 9.88987816307404e-06, "loss": 0.4525, "step": 1880 }, { "epoch": 0.8856607310215557, "grad_norm": 3.6638436317443848, "learning_rate": 9.889292408622306e-06, "loss": 0.4959, "step": 1890 }, { "epoch": 0.8903467666354264, "grad_norm": 2.795483350753784, "learning_rate": 9.888706654170573e-06, "loss": 0.5156, "step": 1900 }, { "epoch": 0.895032802249297, "grad_norm": 3.4826247692108154, "learning_rate": 9.888120899718838e-06, "loss": 0.5136, "step": 1910 }, { "epoch": 0.8997188378631678, "grad_norm": 3.1624643802642822, "learning_rate": 9.887535145267105e-06, "loss": 0.4488, "step": 1920 }, { "epoch": 0.9044048734770385, "grad_norm": 3.1892471313476562, "learning_rate": 9.886949390815372e-06, "loss": 0.4611, "step": 1930 }, { "epoch": 0.9090909090909091, "grad_norm": 3.075989007949829, "learning_rate": 9.886363636363637e-06, "loss": 0.4376, "step": 1940 }, { "epoch": 0.9137769447047798, "grad_norm": 3.629077672958374, "learning_rate": 9.885777881911904e-06, "loss": 0.5039, "step": 1950 }, { "epoch": 0.9184629803186504, "grad_norm": 3.424208402633667, "learning_rate": 9.885192127460169e-06, "loss": 0.5169, "step": 1960 }, { "epoch": 0.9231490159325211, "grad_norm": 2.9203758239746094, "learning_rate": 9.884606373008436e-06, "loss": 0.4944, "step": 1970 }, { "epoch": 0.9278350515463918, "grad_norm": 3.0997154712677, "learning_rate": 9.884020618556703e-06, "loss": 0.4586, "step": 1980 }, { "epoch": 0.9325210871602624, "grad_norm": 2.3105781078338623, "learning_rate": 9.883434864104968e-06, "loss": 0.5109, "step": 1990 }, { "epoch": 0.9372071227741331, "grad_norm": 3.348557472229004, "learning_rate": 9.882849109653235e-06, "loss": 0.5055, "step": 2000 }, { "epoch": 0.9372071227741331, "eval_loss": 0.10619169473648071, "eval_pearson_cosine": 0.7545903634746969, "eval_pearson_dot": 0.5637526434800861, "eval_pearson_euclidean": 0.7502010625678288, "eval_pearson_manhattan": 0.7501144315992683, "eval_runtime": 39.4727, "eval_samples_per_second": 38.001, "eval_spearman_cosine": 0.7726348299355251, "eval_spearman_dot": 0.571016162108862, "eval_spearman_euclidean": 0.7650905718991688, "eval_spearman_manhattan": 0.7650111430170369, "eval_steps_per_second": 38.001, "step": 2000 }, { "epoch": 0.9418931583880038, "grad_norm": 2.4951772689819336, "learning_rate": 9.8822633552015e-06, "loss": 0.4542, "step": 2010 }, { "epoch": 0.9465791940018744, "grad_norm": 2.8216359615325928, "learning_rate": 9.881677600749765e-06, "loss": 0.4823, "step": 2020 }, { "epoch": 0.9512652296157451, "grad_norm": 3.510946035385132, "learning_rate": 9.881091846298032e-06, "loss": 0.464, "step": 2030 }, { "epoch": 0.9559512652296157, "grad_norm": 3.307612419128418, "learning_rate": 9.8805060918463e-06, "loss": 0.4641, "step": 2040 }, { "epoch": 0.9606373008434864, "grad_norm": 3.4705116748809814, "learning_rate": 9.879920337394564e-06, "loss": 0.4643, "step": 2050 }, { "epoch": 0.9653233364573571, "grad_norm": 3.5456154346466064, "learning_rate": 9.879334582942831e-06, "loss": 0.4673, "step": 2060 }, { "epoch": 0.9700093720712277, "grad_norm": 3.027689218521118, "learning_rate": 9.878748828491097e-06, "loss": 0.4997, "step": 2070 }, { "epoch": 0.9746954076850984, "grad_norm": 3.0385146141052246, "learning_rate": 9.878163074039364e-06, "loss": 0.4941, "step": 2080 }, { "epoch": 0.979381443298969, "grad_norm": 2.4981963634490967, "learning_rate": 9.87757731958763e-06, "loss": 0.4553, "step": 2090 }, { "epoch": 0.9840674789128397, "grad_norm": 2.5605556964874268, "learning_rate": 9.876991565135896e-06, "loss": 0.4515, "step": 2100 }, { "epoch": 0.9887535145267105, "grad_norm": 3.263235330581665, "learning_rate": 9.876405810684163e-06, "loss": 0.5033, "step": 2110 }, { "epoch": 0.993439550140581, "grad_norm": 3.3904616832733154, "learning_rate": 9.875820056232428e-06, "loss": 0.4285, "step": 2120 }, { "epoch": 0.9981255857544518, "grad_norm": 3.5745186805725098, "learning_rate": 9.875234301780695e-06, "loss": 0.4793, "step": 2130 }, { "epoch": 1.0028116213683225, "grad_norm": 2.888436794281006, "learning_rate": 9.874648547328962e-06, "loss": 0.4291, "step": 2140 }, { "epoch": 1.007497656982193, "grad_norm": 2.378056526184082, "learning_rate": 9.874062792877227e-06, "loss": 0.3839, "step": 2150 }, { "epoch": 1.0121836925960637, "grad_norm": 3.4162449836730957, "learning_rate": 9.873477038425494e-06, "loss": 0.3953, "step": 2160 }, { "epoch": 1.0168697282099344, "grad_norm": 3.428281545639038, "learning_rate": 9.872891283973759e-06, "loss": 0.3927, "step": 2170 }, { "epoch": 1.021555763823805, "grad_norm": 2.3197989463806152, "learning_rate": 9.872305529522024e-06, "loss": 0.3879, "step": 2180 }, { "epoch": 1.0262417994376758, "grad_norm": 2.783487558364868, "learning_rate": 9.871719775070291e-06, "loss": 0.3992, "step": 2190 }, { "epoch": 1.0309278350515463, "grad_norm": 3.5884554386138916, "learning_rate": 9.871134020618558e-06, "loss": 0.3921, "step": 2200 }, { "epoch": 1.035613870665417, "grad_norm": 3.041579008102417, "learning_rate": 9.870548266166823e-06, "loss": 0.4212, "step": 2210 }, { "epoch": 1.0402999062792877, "grad_norm": 3.096118688583374, "learning_rate": 9.86996251171509e-06, "loss": 0.388, "step": 2220 }, { "epoch": 1.0449859418931584, "grad_norm": 3.3495426177978516, "learning_rate": 9.869376757263355e-06, "loss": 0.4283, "step": 2230 }, { "epoch": 1.0496719775070291, "grad_norm": 3.1030259132385254, "learning_rate": 9.868791002811622e-06, "loss": 0.3841, "step": 2240 }, { "epoch": 1.0543580131208996, "grad_norm": 2.785508155822754, "learning_rate": 9.868205248359888e-06, "loss": 0.4177, "step": 2250 }, { "epoch": 1.0543580131208996, "eval_loss": 0.09417638927698135, "eval_pearson_cosine": 0.7576901427376015, "eval_pearson_dot": 0.5576948875195811, "eval_pearson_euclidean": 0.750992670186065, "eval_pearson_manhattan": 0.7511405973753114, "eval_runtime": 39.4437, "eval_samples_per_second": 38.029, "eval_spearman_cosine": 0.7708766062647188, "eval_spearman_dot": 0.5635255554893936, "eval_spearman_euclidean": 0.7633148873618476, "eval_spearman_manhattan": 0.7634983981685618, "eval_steps_per_second": 38.029, "step": 2250 }, { "epoch": 1.0590440487347703, "grad_norm": 2.5779619216918945, "learning_rate": 9.867619493908154e-06, "loss": 0.4064, "step": 2260 }, { "epoch": 1.063730084348641, "grad_norm": 3.4719700813293457, "learning_rate": 9.867033739456421e-06, "loss": 0.424, "step": 2270 }, { "epoch": 1.0684161199625117, "grad_norm": 2.8868348598480225, "learning_rate": 9.866447985004687e-06, "loss": 0.3964, "step": 2280 }, { "epoch": 1.0731021555763824, "grad_norm": 3.0720372200012207, "learning_rate": 9.865862230552954e-06, "loss": 0.4319, "step": 2290 }, { "epoch": 1.077788191190253, "grad_norm": 2.9588916301727295, "learning_rate": 9.865276476101219e-06, "loss": 0.4045, "step": 2300 }, { "epoch": 1.0824742268041236, "grad_norm": 2.9125168323516846, "learning_rate": 9.864690721649486e-06, "loss": 0.4147, "step": 2310 }, { "epoch": 1.0871602624179943, "grad_norm": 3.1080446243286133, "learning_rate": 9.864104967197753e-06, "loss": 0.3792, "step": 2320 }, { "epoch": 1.091846298031865, "grad_norm": 3.130577802658081, "learning_rate": 9.863519212746018e-06, "loss": 0.3995, "step": 2330 }, { "epoch": 1.0965323336457358, "grad_norm": 3.0566961765289307, "learning_rate": 9.862933458294283e-06, "loss": 0.4044, "step": 2340 }, { "epoch": 1.1012183692596063, "grad_norm": 3.1854050159454346, "learning_rate": 9.86234770384255e-06, "loss": 0.3703, "step": 2350 }, { "epoch": 1.105904404873477, "grad_norm": 3.034507989883423, "learning_rate": 9.861761949390815e-06, "loss": 0.4062, "step": 2360 }, { "epoch": 1.1105904404873477, "grad_norm": 3.1277084350585938, "learning_rate": 9.861176194939082e-06, "loss": 0.3733, "step": 2370 }, { "epoch": 1.1152764761012184, "grad_norm": 2.593057155609131, "learning_rate": 9.860590440487349e-06, "loss": 0.4395, "step": 2380 }, { "epoch": 1.119962511715089, "grad_norm": 3.079909086227417, "learning_rate": 9.860004686035614e-06, "loss": 0.3849, "step": 2390 }, { "epoch": 1.1246485473289598, "grad_norm": 3.3298873901367188, "learning_rate": 9.859418931583881e-06, "loss": 0.3753, "step": 2400 }, { "epoch": 1.1293345829428303, "grad_norm": 3.211998224258423, "learning_rate": 9.858833177132146e-06, "loss": 0.4062, "step": 2410 }, { "epoch": 1.134020618556701, "grad_norm": 3.5661370754241943, "learning_rate": 9.858247422680413e-06, "loss": 0.3734, "step": 2420 }, { "epoch": 1.1387066541705717, "grad_norm": 3.004592180252075, "learning_rate": 9.85766166822868e-06, "loss": 0.3847, "step": 2430 }, { "epoch": 1.1433926897844424, "grad_norm": 3.070695400238037, "learning_rate": 9.857075913776945e-06, "loss": 0.3951, "step": 2440 }, { "epoch": 1.148078725398313, "grad_norm": 2.348797559738159, "learning_rate": 9.856490159325212e-06, "loss": 0.4213, "step": 2450 }, { "epoch": 1.1527647610121836, "grad_norm": 2.997864246368408, "learning_rate": 9.855904404873478e-06, "loss": 0.39, "step": 2460 }, { "epoch": 1.1574507966260543, "grad_norm": 2.844785451889038, "learning_rate": 9.855318650421743e-06, "loss": 0.3889, "step": 2470 }, { "epoch": 1.162136832239925, "grad_norm": 2.3173370361328125, "learning_rate": 9.854732895970011e-06, "loss": 0.387, "step": 2480 }, { "epoch": 1.1668228678537957, "grad_norm": 2.7600715160369873, "learning_rate": 9.854147141518277e-06, "loss": 0.3932, "step": 2490 }, { "epoch": 1.1715089034676662, "grad_norm": 2.941807270050049, "learning_rate": 9.853561387066542e-06, "loss": 0.4136, "step": 2500 }, { "epoch": 1.1715089034676662, "eval_loss": 0.09154797345399857, "eval_pearson_cosine": 0.7611853083767528, "eval_pearson_dot": 0.5553938407700798, "eval_pearson_euclidean": 0.7585919353320634, "eval_pearson_manhattan": 0.7584339332224772, "eval_runtime": 39.5654, "eval_samples_per_second": 37.912, "eval_spearman_cosine": 0.7727273086490055, "eval_spearman_dot": 0.5594934939736275, "eval_spearman_euclidean": 0.7696220909317546, "eval_spearman_manhattan": 0.7696255697874876, "eval_steps_per_second": 37.912, "step": 2500 }, { "epoch": 1.176194939081537, "grad_norm": 2.902358293533325, "learning_rate": 9.852975632614809e-06, "loss": 0.38, "step": 2510 }, { "epoch": 1.1808809746954076, "grad_norm": 2.7850334644317627, "learning_rate": 9.852389878163074e-06, "loss": 0.3899, "step": 2520 }, { "epoch": 1.1855670103092784, "grad_norm": 3.1563310623168945, "learning_rate": 9.851804123711341e-06, "loss": 0.3597, "step": 2530 }, { "epoch": 1.190253045923149, "grad_norm": 2.9025473594665527, "learning_rate": 9.851218369259608e-06, "loss": 0.376, "step": 2540 }, { "epoch": 1.1949390815370198, "grad_norm": 3.4233460426330566, "learning_rate": 9.850632614807873e-06, "loss": 0.4129, "step": 2550 }, { "epoch": 1.1996251171508903, "grad_norm": 2.8708131313323975, "learning_rate": 9.85004686035614e-06, "loss": 0.3979, "step": 2560 }, { "epoch": 1.204311152764761, "grad_norm": 2.335909843444824, "learning_rate": 9.849461105904405e-06, "loss": 0.3962, "step": 2570 }, { "epoch": 1.2089971883786317, "grad_norm": 3.7836697101593018, "learning_rate": 9.848875351452672e-06, "loss": 0.4116, "step": 2580 }, { "epoch": 1.2136832239925024, "grad_norm": 3.2370989322662354, "learning_rate": 9.848289597000939e-06, "loss": 0.3827, "step": 2590 }, { "epoch": 1.218369259606373, "grad_norm": 3.3212838172912598, "learning_rate": 9.847703842549204e-06, "loss": 0.4082, "step": 2600 }, { "epoch": 1.2230552952202436, "grad_norm": 3.7373929023742676, "learning_rate": 9.847118088097471e-06, "loss": 0.4193, "step": 2610 }, { "epoch": 1.2277413308341143, "grad_norm": 2.658543825149536, "learning_rate": 9.846532333645736e-06, "loss": 0.3539, "step": 2620 }, { "epoch": 1.232427366447985, "grad_norm": 3.5882198810577393, "learning_rate": 9.845946579194002e-06, "loss": 0.3966, "step": 2630 }, { "epoch": 1.2371134020618557, "grad_norm": 3.1050803661346436, "learning_rate": 9.84536082474227e-06, "loss": 0.4653, "step": 2640 }, { "epoch": 1.2417994376757264, "grad_norm": 2.8855292797088623, "learning_rate": 9.844775070290535e-06, "loss": 0.3762, "step": 2650 }, { "epoch": 1.246485473289597, "grad_norm": 3.595693588256836, "learning_rate": 9.8441893158388e-06, "loss": 0.4139, "step": 2660 }, { "epoch": 1.2511715089034676, "grad_norm": 3.307594060897827, "learning_rate": 9.843603561387068e-06, "loss": 0.3821, "step": 2670 }, { "epoch": 1.2558575445173383, "grad_norm": 2.86323618888855, "learning_rate": 9.843017806935333e-06, "loss": 0.4311, "step": 2680 }, { "epoch": 1.260543580131209, "grad_norm": 3.1539525985717773, "learning_rate": 9.8424320524836e-06, "loss": 0.381, "step": 2690 }, { "epoch": 1.2652296157450795, "grad_norm": 2.8784306049346924, "learning_rate": 9.841846298031867e-06, "loss": 0.4353, "step": 2700 }, { "epoch": 1.2699156513589505, "grad_norm": 2.426830291748047, "learning_rate": 9.841260543580132e-06, "loss": 0.387, "step": 2710 }, { "epoch": 1.274601686972821, "grad_norm": 2.350255012512207, "learning_rate": 9.840674789128399e-06, "loss": 0.3409, "step": 2720 }, { "epoch": 1.2792877225866917, "grad_norm": 2.589404344558716, "learning_rate": 9.840089034676664e-06, "loss": 0.3665, "step": 2730 }, { "epoch": 1.2839737582005624, "grad_norm": 2.6439192295074463, "learning_rate": 9.839503280224931e-06, "loss": 0.4118, "step": 2740 }, { "epoch": 1.2886597938144329, "grad_norm": 3.120351791381836, "learning_rate": 9.838917525773196e-06, "loss": 0.4425, "step": 2750 }, { "epoch": 1.2886597938144329, "eval_loss": 0.09277115762233734, "eval_pearson_cosine": 0.7605294387552419, "eval_pearson_dot": 0.5497643159360166, "eval_pearson_euclidean": 0.7462760938099731, "eval_pearson_manhattan": 0.7461382274954786, "eval_runtime": 39.1884, "eval_samples_per_second": 38.277, "eval_spearman_cosine": 0.7726058957512391, "eval_spearman_dot": 0.551187587856229, "eval_spearman_euclidean": 0.7591610142998378, "eval_spearman_manhattan": 0.7591112847270914, "eval_steps_per_second": 38.277, "step": 2750 }, { "epoch": 1.2933458294283038, "grad_norm": 2.9216575622558594, "learning_rate": 9.838331771321463e-06, "loss": 0.376, "step": 2760 }, { "epoch": 1.2980318650421743, "grad_norm": 2.9521021842956543, "learning_rate": 9.83774601686973e-06, "loss": 0.3597, "step": 2770 }, { "epoch": 1.302717900656045, "grad_norm": 3.4117023944854736, "learning_rate": 9.837160262417995e-06, "loss": 0.3833, "step": 2780 }, { "epoch": 1.3074039362699157, "grad_norm": 3.0038156509399414, "learning_rate": 9.83657450796626e-06, "loss": 0.3996, "step": 2790 }, { "epoch": 1.3120899718837864, "grad_norm": 2.500950813293457, "learning_rate": 9.835988753514527e-06, "loss": 0.3619, "step": 2800 }, { "epoch": 1.316776007497657, "grad_norm": 2.954556465148926, "learning_rate": 9.835402999062794e-06, "loss": 0.4053, "step": 2810 }, { "epoch": 1.3214620431115276, "grad_norm": 2.647721290588379, "learning_rate": 9.83481724461106e-06, "loss": 0.367, "step": 2820 }, { "epoch": 1.3261480787253983, "grad_norm": 3.3207929134368896, "learning_rate": 9.834231490159326e-06, "loss": 0.3857, "step": 2830 }, { "epoch": 1.330834114339269, "grad_norm": 2.4351987838745117, "learning_rate": 9.833645735707592e-06, "loss": 0.4087, "step": 2840 }, { "epoch": 1.3355201499531397, "grad_norm": 2.845399856567383, "learning_rate": 9.833059981255859e-06, "loss": 0.3628, "step": 2850 }, { "epoch": 1.3402061855670104, "grad_norm": 2.342569589614868, "learning_rate": 9.832474226804124e-06, "loss": 0.3656, "step": 2860 }, { "epoch": 1.344892221180881, "grad_norm": 3.218336582183838, "learning_rate": 9.83188847235239e-06, "loss": 0.3445, "step": 2870 }, { "epoch": 1.3495782567947516, "grad_norm": 3.147611141204834, "learning_rate": 9.831302717900658e-06, "loss": 0.3766, "step": 2880 }, { "epoch": 1.3542642924086223, "grad_norm": 2.8554821014404297, "learning_rate": 9.830716963448923e-06, "loss": 0.3719, "step": 2890 }, { "epoch": 1.358950328022493, "grad_norm": 3.0164778232574463, "learning_rate": 9.83013120899719e-06, "loss": 0.3625, "step": 2900 }, { "epoch": 1.3636363636363638, "grad_norm": 2.705883741378784, "learning_rate": 9.829545454545455e-06, "loss": 0.4185, "step": 2910 }, { "epoch": 1.3683223992502342, "grad_norm": 2.8652660846710205, "learning_rate": 9.828959700093722e-06, "loss": 0.3563, "step": 2920 }, { "epoch": 1.373008434864105, "grad_norm": 2.8929593563079834, "learning_rate": 9.828373945641989e-06, "loss": 0.3769, "step": 2930 }, { "epoch": 1.3776944704779757, "grad_norm": 2.8356854915618896, "learning_rate": 9.827788191190254e-06, "loss": 0.3796, "step": 2940 }, { "epoch": 1.3823805060918464, "grad_norm": 2.890596866607666, "learning_rate": 9.82720243673852e-06, "loss": 0.4299, "step": 2950 }, { "epoch": 1.387066541705717, "grad_norm": 3.0824170112609863, "learning_rate": 9.826616682286786e-06, "loss": 0.3807, "step": 2960 }, { "epoch": 1.3917525773195876, "grad_norm": 2.549909830093384, "learning_rate": 9.826030927835051e-06, "loss": 0.392, "step": 2970 }, { "epoch": 1.3964386129334583, "grad_norm": 2.3966541290283203, "learning_rate": 9.825445173383318e-06, "loss": 0.3743, "step": 2980 }, { "epoch": 1.401124648547329, "grad_norm": 2.7320258617401123, "learning_rate": 9.824859418931585e-06, "loss": 0.3664, "step": 2990 }, { "epoch": 1.4058106841611997, "grad_norm": 1.8689631223678589, "learning_rate": 9.82427366447985e-06, "loss": 0.3708, "step": 3000 }, { "epoch": 1.4058106841611997, "eval_loss": 0.08185213804244995, "eval_pearson_cosine": 0.7670467055300527, "eval_pearson_dot": 0.5834479149623917, "eval_pearson_euclidean": 0.7481055315337883, "eval_pearson_manhattan": 0.7477726537908893, "eval_runtime": 39.1851, "eval_samples_per_second": 38.28, "eval_spearman_cosine": 0.7783408589060774, "eval_spearman_dot": 0.5847220399435686, "eval_spearman_euclidean": 0.7636547096676689, "eval_spearman_manhattan": 0.7633506525118475, "eval_steps_per_second": 38.28, "step": 3000 }, { "epoch": 1.4104967197750704, "grad_norm": 3.3057873249053955, "learning_rate": 9.823687910028117e-06, "loss": 0.3923, "step": 3010 }, { "epoch": 1.415182755388941, "grad_norm": 2.955125331878662, "learning_rate": 9.823102155576383e-06, "loss": 0.3788, "step": 3020 }, { "epoch": 1.4198687910028116, "grad_norm": 2.7104790210723877, "learning_rate": 9.82251640112465e-06, "loss": 0.4158, "step": 3030 }, { "epoch": 1.4245548266166823, "grad_norm": 2.648211717605591, "learning_rate": 9.821930646672916e-06, "loss": 0.3619, "step": 3040 }, { "epoch": 1.429240862230553, "grad_norm": 2.5092930793762207, "learning_rate": 9.821344892221182e-06, "loss": 0.3995, "step": 3050 }, { "epoch": 1.4339268978444237, "grad_norm": 3.207554340362549, "learning_rate": 9.820759137769448e-06, "loss": 0.3602, "step": 3060 }, { "epoch": 1.4386129334582942, "grad_norm": 3.2078282833099365, "learning_rate": 9.820173383317714e-06, "loss": 0.3644, "step": 3070 }, { "epoch": 1.443298969072165, "grad_norm": 2.8371787071228027, "learning_rate": 9.819587628865979e-06, "loss": 0.368, "step": 3080 }, { "epoch": 1.4479850046860356, "grad_norm": 2.8893673419952393, "learning_rate": 9.819001874414248e-06, "loss": 0.3611, "step": 3090 }, { "epoch": 1.4526710402999063, "grad_norm": 3.0875115394592285, "learning_rate": 9.818416119962513e-06, "loss": 0.3809, "step": 3100 }, { "epoch": 1.457357075913777, "grad_norm": 3.2046687602996826, "learning_rate": 9.817830365510778e-06, "loss": 0.4254, "step": 3110 }, { "epoch": 1.4620431115276475, "grad_norm": 2.6490259170532227, "learning_rate": 9.817244611059045e-06, "loss": 0.3708, "step": 3120 }, { "epoch": 1.4667291471415183, "grad_norm": 2.928335189819336, "learning_rate": 9.81665885660731e-06, "loss": 0.4054, "step": 3130 }, { "epoch": 1.471415182755389, "grad_norm": 3.058704137802124, "learning_rate": 9.816073102155577e-06, "loss": 0.3795, "step": 3140 }, { "epoch": 1.4761012183692597, "grad_norm": 3.042874336242676, "learning_rate": 9.815487347703844e-06, "loss": 0.349, "step": 3150 }, { "epoch": 1.4807872539831304, "grad_norm": 3.230562210083008, "learning_rate": 9.81490159325211e-06, "loss": 0.3883, "step": 3160 }, { "epoch": 1.4854732895970009, "grad_norm": 2.735044240951538, "learning_rate": 9.814315838800376e-06, "loss": 0.3573, "step": 3170 }, { "epoch": 1.4901593252108716, "grad_norm": 3.3735740184783936, "learning_rate": 9.813730084348641e-06, "loss": 0.3969, "step": 3180 }, { "epoch": 1.4948453608247423, "grad_norm": 2.9641244411468506, "learning_rate": 9.813144329896908e-06, "loss": 0.4103, "step": 3190 }, { "epoch": 1.499531396438613, "grad_norm": 2.666172742843628, "learning_rate": 9.812558575445175e-06, "loss": 0.3739, "step": 3200 }, { "epoch": 1.5042174320524837, "grad_norm": 3.061447858810425, "learning_rate": 9.81197282099344e-06, "loss": 0.3759, "step": 3210 }, { "epoch": 1.5089034676663542, "grad_norm": 2.6585240364074707, "learning_rate": 9.811387066541707e-06, "loss": 0.3521, "step": 3220 }, { "epoch": 1.513589503280225, "grad_norm": 3.168820858001709, "learning_rate": 9.810801312089973e-06, "loss": 0.3681, "step": 3230 }, { "epoch": 1.5182755388940956, "grad_norm": 3.069749355316162, "learning_rate": 9.810215557638238e-06, "loss": 0.3696, "step": 3240 }, { "epoch": 1.522961574507966, "grad_norm": 3.588362693786621, "learning_rate": 9.809629803186505e-06, "loss": 0.3934, "step": 3250 }, { "epoch": 1.522961574507966, "eval_loss": 0.08483820408582687, "eval_pearson_cosine": 0.7708958105015995, "eval_pearson_dot": 0.5654852409557236, "eval_pearson_euclidean": 0.7542229262005833, "eval_pearson_manhattan": 0.7539007508537807, "eval_runtime": 40.1586, "eval_samples_per_second": 37.352, "eval_spearman_cosine": 0.7814200034454707, "eval_spearman_dot": 0.5667704812278335, "eval_spearman_euclidean": 0.7689048877801009, "eval_spearman_manhattan": 0.7692340606121537, "eval_steps_per_second": 37.352, "step": 3250 }, { "epoch": 1.527647610121837, "grad_norm": 2.5335776805877686, "learning_rate": 9.809044048734772e-06, "loss": 0.404, "step": 3260 }, { "epoch": 1.5323336457357075, "grad_norm": 3.6495397090911865, "learning_rate": 9.808458294283037e-06, "loss": 0.4017, "step": 3270 }, { "epoch": 1.5370196813495782, "grad_norm": 3.3083150386810303, "learning_rate": 9.807872539831304e-06, "loss": 0.3839, "step": 3280 }, { "epoch": 1.541705716963449, "grad_norm": 2.9375736713409424, "learning_rate": 9.807286785379569e-06, "loss": 0.4067, "step": 3290 }, { "epoch": 1.5463917525773194, "grad_norm": 2.8293914794921875, "learning_rate": 9.806701030927836e-06, "loss": 0.4506, "step": 3300 }, { "epoch": 1.5510777881911904, "grad_norm": 2.9611403942108154, "learning_rate": 9.806115276476103e-06, "loss": 0.3703, "step": 3310 }, { "epoch": 1.5557638238050608, "grad_norm": 2.821136713027954, "learning_rate": 9.805529522024368e-06, "loss": 0.3756, "step": 3320 }, { "epoch": 1.5604498594189316, "grad_norm": 2.8910715579986572, "learning_rate": 9.804943767572635e-06, "loss": 0.3911, "step": 3330 }, { "epoch": 1.5651358950328023, "grad_norm": 3.2398719787597656, "learning_rate": 9.8043580131209e-06, "loss": 0.4014, "step": 3340 }, { "epoch": 1.569821930646673, "grad_norm": 2.7479453086853027, "learning_rate": 9.803772258669167e-06, "loss": 0.3854, "step": 3350 }, { "epoch": 1.5745079662605437, "grad_norm": 3.2347192764282227, "learning_rate": 9.803186504217432e-06, "loss": 0.3913, "step": 3360 }, { "epoch": 1.5791940018744142, "grad_norm": 2.865525245666504, "learning_rate": 9.8026007497657e-06, "loss": 0.3685, "step": 3370 }, { "epoch": 1.5838800374882849, "grad_norm": 3.2018258571624756, "learning_rate": 9.802014995313966e-06, "loss": 0.3376, "step": 3380 }, { "epoch": 1.5885660731021556, "grad_norm": 2.9291999340057373, "learning_rate": 9.801429240862231e-06, "loss": 0.3673, "step": 3390 }, { "epoch": 1.5932521087160263, "grad_norm": 2.761934757232666, "learning_rate": 9.800843486410497e-06, "loss": 0.3885, "step": 3400 }, { "epoch": 1.597938144329897, "grad_norm": 2.509103775024414, "learning_rate": 9.800257731958763e-06, "loss": 0.3523, "step": 3410 }, { "epoch": 1.6026241799437675, "grad_norm": 2.377322196960449, "learning_rate": 9.79967197750703e-06, "loss": 0.3791, "step": 3420 }, { "epoch": 1.6073102155576382, "grad_norm": 3.3458712100982666, "learning_rate": 9.799086223055296e-06, "loss": 0.3897, "step": 3430 }, { "epoch": 1.611996251171509, "grad_norm": 2.7032053470611572, "learning_rate": 9.798500468603563e-06, "loss": 0.339, "step": 3440 }, { "epoch": 1.6166822867853796, "grad_norm": 3.09551739692688, "learning_rate": 9.797914714151828e-06, "loss": 0.384, "step": 3450 }, { "epoch": 1.6213683223992503, "grad_norm": 3.2145328521728516, "learning_rate": 9.797328959700095e-06, "loss": 0.3961, "step": 3460 }, { "epoch": 1.6260543580131208, "grad_norm": 3.6237032413482666, "learning_rate": 9.79674320524836e-06, "loss": 0.348, "step": 3470 }, { "epoch": 1.6307403936269915, "grad_norm": 2.706770420074463, "learning_rate": 9.796157450796627e-06, "loss": 0.3281, "step": 3480 }, { "epoch": 1.6354264292408622, "grad_norm": 3.071535348892212, "learning_rate": 9.795571696344894e-06, "loss": 0.3645, "step": 3490 }, { "epoch": 1.640112464854733, "grad_norm": 2.757957696914673, "learning_rate": 9.794985941893159e-06, "loss": 0.3203, "step": 3500 }, { "epoch": 1.640112464854733, "eval_loss": 0.07808861136436462, "eval_pearson_cosine": 0.7705724911864849, "eval_pearson_dot": 0.5870542264131444, "eval_pearson_euclidean": 0.7531019492466129, "eval_pearson_manhattan": 0.7529286527249113, "eval_runtime": 39.5374, "eval_samples_per_second": 37.939, "eval_spearman_cosine": 0.781003234465689, "eval_spearman_dot": 0.5891471444366018, "eval_spearman_euclidean": 0.7690560764997629, "eval_spearman_manhattan": 0.7689244287192701, "eval_steps_per_second": 37.939, "step": 3500 }, { "epoch": 1.6447985004686037, "grad_norm": 2.8622870445251465, "learning_rate": 9.794400187441426e-06, "loss": 0.361, "step": 3510 }, { "epoch": 1.6494845360824741, "grad_norm": 3.1786670684814453, "learning_rate": 9.793814432989691e-06, "loss": 0.376, "step": 3520 }, { "epoch": 1.6541705716963448, "grad_norm": 2.9098026752471924, "learning_rate": 9.793228678537958e-06, "loss": 0.383, "step": 3530 }, { "epoch": 1.6588566073102156, "grad_norm": 3.0556936264038086, "learning_rate": 9.792642924086225e-06, "loss": 0.3966, "step": 3540 }, { "epoch": 1.6635426429240863, "grad_norm": 2.920565128326416, "learning_rate": 9.79205716963449e-06, "loss": 0.3531, "step": 3550 }, { "epoch": 1.668228678537957, "grad_norm": 2.669887065887451, "learning_rate": 9.791471415182755e-06, "loss": 0.3784, "step": 3560 }, { "epoch": 1.6729147141518275, "grad_norm": 2.7581350803375244, "learning_rate": 9.790885660731022e-06, "loss": 0.3562, "step": 3570 }, { "epoch": 1.6776007497656982, "grad_norm": 3.3164896965026855, "learning_rate": 9.790299906279287e-06, "loss": 0.363, "step": 3580 }, { "epoch": 1.6822867853795689, "grad_norm": 3.1747140884399414, "learning_rate": 9.789714151827554e-06, "loss": 0.3816, "step": 3590 }, { "epoch": 1.6869728209934396, "grad_norm": 2.896433115005493, "learning_rate": 9.789128397375821e-06, "loss": 0.3727, "step": 3600 }, { "epoch": 1.6916588566073103, "grad_norm": 2.8748035430908203, "learning_rate": 9.788542642924087e-06, "loss": 0.3501, "step": 3610 }, { "epoch": 1.6963448922211808, "grad_norm": 3.0647246837615967, "learning_rate": 9.787956888472353e-06, "loss": 0.3742, "step": 3620 }, { "epoch": 1.7010309278350515, "grad_norm": 2.368314504623413, "learning_rate": 9.787371134020619e-06, "loss": 0.3509, "step": 3630 }, { "epoch": 1.7057169634489222, "grad_norm": 2.563969135284424, "learning_rate": 9.786785379568886e-06, "loss": 0.3205, "step": 3640 }, { "epoch": 1.710402999062793, "grad_norm": 3.2098007202148438, "learning_rate": 9.786199625117153e-06, "loss": 0.3607, "step": 3650 }, { "epoch": 1.7150890346766636, "grad_norm": 3.1060285568237305, "learning_rate": 9.785613870665418e-06, "loss": 0.3159, "step": 3660 }, { "epoch": 1.7197750702905341, "grad_norm": 2.471100330352783, "learning_rate": 9.785028116213685e-06, "loss": 0.375, "step": 3670 }, { "epoch": 1.7244611059044048, "grad_norm": 2.991903305053711, "learning_rate": 9.78444236176195e-06, "loss": 0.3944, "step": 3680 }, { "epoch": 1.7291471415182755, "grad_norm": 2.6646485328674316, "learning_rate": 9.783856607310215e-06, "loss": 0.4026, "step": 3690 }, { "epoch": 1.7338331771321462, "grad_norm": 2.392664670944214, "learning_rate": 9.783270852858484e-06, "loss": 0.3606, "step": 3700 }, { "epoch": 1.738519212746017, "grad_norm": 3.0139496326446533, "learning_rate": 9.782685098406749e-06, "loss": 0.3475, "step": 3710 }, { "epoch": 1.7432052483598874, "grad_norm": 2.6688876152038574, "learning_rate": 9.782099343955014e-06, "loss": 0.413, "step": 3720 }, { "epoch": 1.7478912839737581, "grad_norm": 3.372584581375122, "learning_rate": 9.781513589503281e-06, "loss": 0.3544, "step": 3730 }, { "epoch": 1.7525773195876289, "grad_norm": 2.9981796741485596, "learning_rate": 9.780927835051546e-06, "loss": 0.3747, "step": 3740 }, { "epoch": 1.7572633552014996, "grad_norm": 3.5128731727600098, "learning_rate": 9.780342080599813e-06, "loss": 0.4052, "step": 3750 }, { "epoch": 1.7572633552014996, "eval_loss": 0.08239996433258057, "eval_pearson_cosine": 0.7705098430219977, "eval_pearson_dot": 0.5909106805114561, "eval_pearson_euclidean": 0.7628273731383075, "eval_pearson_manhattan": 0.7627845021563395, "eval_runtime": 39.7472, "eval_samples_per_second": 37.738, "eval_spearman_cosine": 0.7816481068106005, "eval_spearman_dot": 0.5989182063745997, "eval_spearman_euclidean": 0.7771176907760753, "eval_spearman_manhattan": 0.7770550572795577, "eval_steps_per_second": 37.738, "step": 3750 }, { "epoch": 1.7619493908153703, "grad_norm": 3.215078353881836, "learning_rate": 9.77975632614808e-06, "loss": 0.3424, "step": 3760 }, { "epoch": 1.7666354264292408, "grad_norm": 3.310758113861084, "learning_rate": 9.779170571696345e-06, "loss": 0.3765, "step": 3770 }, { "epoch": 1.7713214620431117, "grad_norm": 2.7431821823120117, "learning_rate": 9.778584817244612e-06, "loss": 0.3545, "step": 3780 }, { "epoch": 1.7760074976569822, "grad_norm": 3.1174018383026123, "learning_rate": 9.777999062792877e-06, "loss": 0.3495, "step": 3790 }, { "epoch": 1.780693533270853, "grad_norm": 2.911485195159912, "learning_rate": 9.777413308341144e-06, "loss": 0.3684, "step": 3800 }, { "epoch": 1.7853795688847236, "grad_norm": 3.4018588066101074, "learning_rate": 9.776827553889411e-06, "loss": 0.3702, "step": 3810 }, { "epoch": 1.790065604498594, "grad_norm": 2.8226757049560547, "learning_rate": 9.776241799437677e-06, "loss": 0.3506, "step": 3820 }, { "epoch": 1.794751640112465, "grad_norm": 3.5894434452056885, "learning_rate": 9.775656044985943e-06, "loss": 0.3928, "step": 3830 }, { "epoch": 1.7994376757263355, "grad_norm": 2.49806547164917, "learning_rate": 9.775070290534209e-06, "loss": 0.3162, "step": 3840 }, { "epoch": 1.8041237113402062, "grad_norm": 3.0954976081848145, "learning_rate": 9.774484536082474e-06, "loss": 0.3657, "step": 3850 }, { "epoch": 1.808809746954077, "grad_norm": 2.629826307296753, "learning_rate": 9.77389878163074e-06, "loss": 0.3017, "step": 3860 }, { "epoch": 1.8134957825679474, "grad_norm": 3.1166768074035645, "learning_rate": 9.773313027179008e-06, "loss": 0.3778, "step": 3870 }, { "epoch": 1.8181818181818183, "grad_norm": 3.0856707096099854, "learning_rate": 9.772727272727273e-06, "loss": 0.3949, "step": 3880 }, { "epoch": 1.8228678537956888, "grad_norm": 2.7912850379943848, "learning_rate": 9.77214151827554e-06, "loss": 0.377, "step": 3890 }, { "epoch": 1.8275538894095595, "grad_norm": 2.9028005599975586, "learning_rate": 9.771555763823805e-06, "loss": 0.361, "step": 3900 }, { "epoch": 1.8322399250234302, "grad_norm": 3.034365177154541, "learning_rate": 9.770970009372072e-06, "loss": 0.3776, "step": 3910 }, { "epoch": 1.8369259606373007, "grad_norm": 2.7740864753723145, "learning_rate": 9.770384254920339e-06, "loss": 0.3322, "step": 3920 }, { "epoch": 1.8416119962511717, "grad_norm": 3.775836944580078, "learning_rate": 9.769798500468604e-06, "loss": 0.3668, "step": 3930 }, { "epoch": 1.8462980318650422, "grad_norm": 3.1334052085876465, "learning_rate": 9.769212746016871e-06, "loss": 0.3633, "step": 3940 }, { "epoch": 1.8509840674789129, "grad_norm": 3.311575174331665, "learning_rate": 9.768626991565136e-06, "loss": 0.3507, "step": 3950 }, { "epoch": 1.8556701030927836, "grad_norm": 3.3283636569976807, "learning_rate": 9.768041237113403e-06, "loss": 0.3393, "step": 3960 }, { "epoch": 1.860356138706654, "grad_norm": 2.8960700035095215, "learning_rate": 9.767455482661668e-06, "loss": 0.4164, "step": 3970 }, { "epoch": 1.865042174320525, "grad_norm": 2.63865327835083, "learning_rate": 9.766869728209935e-06, "loss": 0.3561, "step": 3980 }, { "epoch": 1.8697282099343955, "grad_norm": 2.750246286392212, "learning_rate": 9.766283973758202e-06, "loss": 0.3668, "step": 3990 }, { "epoch": 1.8744142455482662, "grad_norm": 2.736009359359741, "learning_rate": 9.765698219306467e-06, "loss": 0.3723, "step": 4000 }, { "epoch": 1.8744142455482662, "eval_loss": 0.0818546786904335, "eval_pearson_cosine": 0.7719873880440176, "eval_pearson_dot": 0.5710773246097212, "eval_pearson_euclidean": 0.7519982934890663, "eval_pearson_manhattan": 0.7515239555531821, "eval_runtime": 39.1942, "eval_samples_per_second": 38.271, "eval_spearman_cosine": 0.7840046796615474, "eval_spearman_dot": 0.5713176227902256, "eval_spearman_euclidean": 0.7685352754174982, "eval_spearman_manhattan": 0.76785681763944, "eval_steps_per_second": 38.271, "step": 4000 }, { "epoch": 1.879100281162137, "grad_norm": 3.1088342666625977, "learning_rate": 9.765112464854733e-06, "loss": 0.3162, "step": 4010 }, { "epoch": 1.8837863167760074, "grad_norm": 2.2759673595428467, "learning_rate": 9.764526710403e-06, "loss": 0.3647, "step": 4020 }, { "epoch": 1.8884723523898783, "grad_norm": 2.4756667613983154, "learning_rate": 9.763940955951267e-06, "loss": 0.3741, "step": 4030 }, { "epoch": 1.8931583880037488, "grad_norm": 2.7309701442718506, "learning_rate": 9.763355201499532e-06, "loss": 0.3732, "step": 4040 }, { "epoch": 1.8978444236176195, "grad_norm": 2.3570773601531982, "learning_rate": 9.762769447047799e-06, "loss": 0.3593, "step": 4050 }, { "epoch": 1.9025304592314902, "grad_norm": 3.3677115440368652, "learning_rate": 9.762183692596064e-06, "loss": 0.3859, "step": 4060 }, { "epoch": 1.9072164948453607, "grad_norm": 3.787653684616089, "learning_rate": 9.76159793814433e-06, "loss": 0.3775, "step": 4070 }, { "epoch": 1.9119025304592316, "grad_norm": 2.686065196990967, "learning_rate": 9.761012183692596e-06, "loss": 0.3531, "step": 4080 }, { "epoch": 1.9165885660731021, "grad_norm": 2.230189323425293, "learning_rate": 9.760426429240863e-06, "loss": 0.3291, "step": 4090 }, { "epoch": 1.9212746016869728, "grad_norm": 2.815934896469116, "learning_rate": 9.75984067478913e-06, "loss": 0.3644, "step": 4100 }, { "epoch": 1.9259606373008435, "grad_norm": 3.123044967651367, "learning_rate": 9.759254920337395e-06, "loss": 0.377, "step": 4110 }, { "epoch": 1.930646672914714, "grad_norm": 2.2790846824645996, "learning_rate": 9.758669165885662e-06, "loss": 0.3623, "step": 4120 }, { "epoch": 1.935332708528585, "grad_norm": 2.658747434616089, "learning_rate": 9.758083411433927e-06, "loss": 0.3289, "step": 4130 }, { "epoch": 1.9400187441424555, "grad_norm": 2.783327102661133, "learning_rate": 9.757497656982194e-06, "loss": 0.3903, "step": 4140 }, { "epoch": 1.9447047797563262, "grad_norm": 2.442927598953247, "learning_rate": 9.756911902530461e-06, "loss": 0.3539, "step": 4150 }, { "epoch": 1.9493908153701969, "grad_norm": 3.6274266242980957, "learning_rate": 9.756326148078726e-06, "loss": 0.3407, "step": 4160 }, { "epoch": 1.9540768509840674, "grad_norm": 3.194626808166504, "learning_rate": 9.755740393626991e-06, "loss": 0.3585, "step": 4170 }, { "epoch": 1.9587628865979383, "grad_norm": 2.9573676586151123, "learning_rate": 9.755154639175258e-06, "loss": 0.343, "step": 4180 }, { "epoch": 1.9634489222118088, "grad_norm": 2.608351707458496, "learning_rate": 9.754568884723524e-06, "loss": 0.3632, "step": 4190 }, { "epoch": 1.9681349578256795, "grad_norm": 2.9371848106384277, "learning_rate": 9.75398313027179e-06, "loss": 0.3341, "step": 4200 }, { "epoch": 1.9728209934395502, "grad_norm": 3.698261022567749, "learning_rate": 9.753397375820057e-06, "loss": 0.3903, "step": 4210 }, { "epoch": 1.9775070290534207, "grad_norm": 2.6606619358062744, "learning_rate": 9.752811621368323e-06, "loss": 0.3365, "step": 4220 }, { "epoch": 1.9821930646672916, "grad_norm": 3.170403480529785, "learning_rate": 9.75222586691659e-06, "loss": 0.3577, "step": 4230 }, { "epoch": 1.986879100281162, "grad_norm": 2.7663040161132812, "learning_rate": 9.751640112464855e-06, "loss": 0.3687, "step": 4240 }, { "epoch": 1.9915651358950328, "grad_norm": 2.217230796813965, "learning_rate": 9.751054358013122e-06, "loss": 0.3645, "step": 4250 }, { "epoch": 1.9915651358950328, "eval_loss": 0.08020295202732086, "eval_pearson_cosine": 0.7676372739184956, "eval_pearson_dot": 0.5685492783866799, "eval_pearson_euclidean": 0.7560020323373777, "eval_pearson_manhattan": 0.7559508884315278, "eval_runtime": 39.1696, "eval_samples_per_second": 38.295, "eval_spearman_cosine": 0.7804455454568248, "eval_spearman_dot": 0.570131474662444, "eval_spearman_euclidean": 0.770297760657173, "eval_spearman_manhattan": 0.7703714920263548, "eval_steps_per_second": 38.295, "step": 4250 }, { "epoch": 1.9962511715089035, "grad_norm": 2.7864959239959717, "learning_rate": 9.750468603561389e-06, "loss": 0.3761, "step": 4260 }, { "epoch": 2.000937207122774, "grad_norm": 2.399378776550293, "learning_rate": 9.749882849109654e-06, "loss": 0.3253, "step": 4270 }, { "epoch": 2.005623242736645, "grad_norm": 3.282205820083618, "learning_rate": 9.74929709465792e-06, "loss": 0.2838, "step": 4280 }, { "epoch": 2.0103092783505154, "grad_norm": 2.5611684322357178, "learning_rate": 9.748711340206186e-06, "loss": 0.2921, "step": 4290 }, { "epoch": 2.014995313964386, "grad_norm": 2.7141940593719482, "learning_rate": 9.748125585754453e-06, "loss": 0.274, "step": 4300 }, { "epoch": 2.019681349578257, "grad_norm": 2.566196918487549, "learning_rate": 9.74753983130272e-06, "loss": 0.2701, "step": 4310 }, { "epoch": 2.0243673851921273, "grad_norm": 2.6438400745391846, "learning_rate": 9.746954076850985e-06, "loss": 0.2898, "step": 4320 }, { "epoch": 2.0290534208059983, "grad_norm": 2.603959560394287, "learning_rate": 9.74636832239925e-06, "loss": 0.2588, "step": 4330 }, { "epoch": 2.0337394564198688, "grad_norm": 3.4404914379119873, "learning_rate": 9.745782567947517e-06, "loss": 0.3006, "step": 4340 }, { "epoch": 2.0384254920337392, "grad_norm": 2.573943853378296, "learning_rate": 9.745196813495782e-06, "loss": 0.2826, "step": 4350 }, { "epoch": 2.04311152764761, "grad_norm": 2.327101945877075, "learning_rate": 9.74461105904405e-06, "loss": 0.2842, "step": 4360 }, { "epoch": 2.0477975632614807, "grad_norm": 3.299482583999634, "learning_rate": 9.744025304592316e-06, "loss": 0.2649, "step": 4370 }, { "epoch": 2.0524835988753516, "grad_norm": 2.5444982051849365, "learning_rate": 9.743439550140581e-06, "loss": 0.293, "step": 4380 }, { "epoch": 2.057169634489222, "grad_norm": 2.7384984493255615, "learning_rate": 9.742853795688848e-06, "loss": 0.3027, "step": 4390 }, { "epoch": 2.0618556701030926, "grad_norm": 2.323399782180786, "learning_rate": 9.742268041237114e-06, "loss": 0.3082, "step": 4400 }, { "epoch": 2.0665417057169635, "grad_norm": 2.821531057357788, "learning_rate": 9.74168228678538e-06, "loss": 0.3044, "step": 4410 }, { "epoch": 2.071227741330834, "grad_norm": 3.8503706455230713, "learning_rate": 9.741096532333647e-06, "loss": 0.2886, "step": 4420 }, { "epoch": 2.075913776944705, "grad_norm": 3.2392382621765137, "learning_rate": 9.740510777881913e-06, "loss": 0.2589, "step": 4430 }, { "epoch": 2.0805998125585754, "grad_norm": 2.823723316192627, "learning_rate": 9.73992502343018e-06, "loss": 0.2924, "step": 4440 }, { "epoch": 2.085285848172446, "grad_norm": 2.604548931121826, "learning_rate": 9.739339268978445e-06, "loss": 0.3004, "step": 4450 }, { "epoch": 2.089971883786317, "grad_norm": 2.7885937690734863, "learning_rate": 9.738753514526712e-06, "loss": 0.2967, "step": 4460 }, { "epoch": 2.0946579194001873, "grad_norm": 2.909656524658203, "learning_rate": 9.738167760074977e-06, "loss": 0.2602, "step": 4470 }, { "epoch": 2.0993439550140582, "grad_norm": 3.450695514678955, "learning_rate": 9.737582005623244e-06, "loss": 0.2841, "step": 4480 }, { "epoch": 2.1040299906279287, "grad_norm": 2.1142079830169678, "learning_rate": 9.736996251171509e-06, "loss": 0.2492, "step": 4490 }, { "epoch": 2.108716026241799, "grad_norm": 3.1369121074676514, "learning_rate": 9.736410496719776e-06, "loss": 0.3007, "step": 4500 }, { "epoch": 2.108716026241799, "eval_loss": 0.06621846556663513, "eval_pearson_cosine": 0.768161993291983, "eval_pearson_dot": 0.5972572426991363, "eval_pearson_euclidean": 0.7574105370032385, "eval_pearson_manhattan": 0.7571659262987636, "eval_runtime": 40.2217, "eval_samples_per_second": 37.293, "eval_spearman_cosine": 0.7799039169576075, "eval_spearman_dot": 0.5981151115114701, "eval_spearman_euclidean": 0.7720521764960889, "eval_spearman_manhattan": 0.772146767310716, "eval_steps_per_second": 37.293, "step": 4500 }, { "epoch": 2.11340206185567, "grad_norm": 2.972123146057129, "learning_rate": 9.735824742268041e-06, "loss": 0.2515, "step": 4510 }, { "epoch": 2.1180880974695406, "grad_norm": 2.9807615280151367, "learning_rate": 9.735238987816308e-06, "loss": 0.2773, "step": 4520 }, { "epoch": 2.1227741330834116, "grad_norm": 2.88916015625, "learning_rate": 9.734653233364575e-06, "loss": 0.3021, "step": 4530 }, { "epoch": 2.127460168697282, "grad_norm": 2.4502129554748535, "learning_rate": 9.73406747891284e-06, "loss": 0.2657, "step": 4540 }, { "epoch": 2.1321462043111525, "grad_norm": 3.6442370414733887, "learning_rate": 9.733481724461107e-06, "loss": 0.2737, "step": 4550 }, { "epoch": 2.1368322399250235, "grad_norm": 3.181819438934326, "learning_rate": 9.732895970009372e-06, "loss": 0.3209, "step": 4560 }, { "epoch": 2.141518275538894, "grad_norm": 2.9747514724731445, "learning_rate": 9.73231021555764e-06, "loss": 0.2731, "step": 4570 }, { "epoch": 2.146204311152765, "grad_norm": 3.7340550422668457, "learning_rate": 9.731724461105905e-06, "loss": 0.3319, "step": 4580 }, { "epoch": 2.1508903467666354, "grad_norm": 2.3309686183929443, "learning_rate": 9.731138706654171e-06, "loss": 0.2854, "step": 4590 }, { "epoch": 2.155576382380506, "grad_norm": 2.801131010055542, "learning_rate": 9.730552952202438e-06, "loss": 0.2759, "step": 4600 }, { "epoch": 2.160262417994377, "grad_norm": 2.5506978034973145, "learning_rate": 9.729967197750704e-06, "loss": 0.2425, "step": 4610 }, { "epoch": 2.1649484536082473, "grad_norm": 2.3702712059020996, "learning_rate": 9.72938144329897e-06, "loss": 0.2645, "step": 4620 }, { "epoch": 2.169634489222118, "grad_norm": 2.560842752456665, "learning_rate": 9.728795688847236e-06, "loss": 0.28, "step": 4630 }, { "epoch": 2.1743205248359887, "grad_norm": 2.8994758129119873, "learning_rate": 9.728209934395501e-06, "loss": 0.2764, "step": 4640 }, { "epoch": 2.179006560449859, "grad_norm": 3.6292624473571777, "learning_rate": 9.727624179943768e-06, "loss": 0.2827, "step": 4650 }, { "epoch": 2.18369259606373, "grad_norm": 2.619485378265381, "learning_rate": 9.727038425492035e-06, "loss": 0.2869, "step": 4660 }, { "epoch": 2.1883786316776006, "grad_norm": 2.676844358444214, "learning_rate": 9.7264526710403e-06, "loss": 0.2862, "step": 4670 }, { "epoch": 2.1930646672914715, "grad_norm": 2.7725651264190674, "learning_rate": 9.725866916588567e-06, "loss": 0.2965, "step": 4680 }, { "epoch": 2.197750702905342, "grad_norm": 2.34023380279541, "learning_rate": 9.725281162136832e-06, "loss": 0.2483, "step": 4690 }, { "epoch": 2.2024367385192125, "grad_norm": 2.652027130126953, "learning_rate": 9.724695407685099e-06, "loss": 0.2901, "step": 4700 }, { "epoch": 2.2071227741330834, "grad_norm": 2.3790388107299805, "learning_rate": 9.724109653233366e-06, "loss": 0.2797, "step": 4710 }, { "epoch": 2.211808809746954, "grad_norm": 2.6680283546447754, "learning_rate": 9.723523898781631e-06, "loss": 0.2446, "step": 4720 }, { "epoch": 2.216494845360825, "grad_norm": 3.196193218231201, "learning_rate": 9.722938144329898e-06, "loss": 0.3073, "step": 4730 }, { "epoch": 2.2211808809746953, "grad_norm": 2.9742684364318848, "learning_rate": 9.722352389878163e-06, "loss": 0.2641, "step": 4740 }, { "epoch": 2.2258669165885663, "grad_norm": 1.8688490390777588, "learning_rate": 9.72176663542643e-06, "loss": 0.2397, "step": 4750 }, { "epoch": 2.2258669165885663, "eval_loss": 0.06173858791589737, "eval_pearson_cosine": 0.7692502208827889, "eval_pearson_dot": 0.5854569446239672, "eval_pearson_euclidean": 0.7502268637430376, "eval_pearson_manhattan": 0.7501450224167883, "eval_runtime": 40.4876, "eval_samples_per_second": 37.048, "eval_spearman_cosine": 0.7781711434828433, "eval_spearman_dot": 0.5898324527411585, "eval_spearman_euclidean": 0.7652357962017762, "eval_spearman_manhattan": 0.765505305290314, "eval_steps_per_second": 37.048, "step": 4750 }, { "epoch": 2.2305529522024368, "grad_norm": 3.1260733604431152, "learning_rate": 9.721180880974697e-06, "loss": 0.2686, "step": 4760 }, { "epoch": 2.2352389878163073, "grad_norm": 2.8164772987365723, "learning_rate": 9.720595126522962e-06, "loss": 0.2664, "step": 4770 }, { "epoch": 2.239925023430178, "grad_norm": 2.9061129093170166, "learning_rate": 9.72000937207123e-06, "loss": 0.2894, "step": 4780 }, { "epoch": 2.2446110590440487, "grad_norm": 2.427224636077881, "learning_rate": 9.719423617619495e-06, "loss": 0.2952, "step": 4790 }, { "epoch": 2.2492970946579196, "grad_norm": 3.4472455978393555, "learning_rate": 9.71883786316776e-06, "loss": 0.3036, "step": 4800 }, { "epoch": 2.25398313027179, "grad_norm": 3.032599925994873, "learning_rate": 9.718252108716027e-06, "loss": 0.2702, "step": 4810 }, { "epoch": 2.2586691658856606, "grad_norm": 2.9868416786193848, "learning_rate": 9.717666354264294e-06, "loss": 0.2547, "step": 4820 }, { "epoch": 2.2633552014995315, "grad_norm": 3.0330114364624023, "learning_rate": 9.717080599812559e-06, "loss": 0.2701, "step": 4830 }, { "epoch": 2.268041237113402, "grad_norm": 3.1494383811950684, "learning_rate": 9.716494845360826e-06, "loss": 0.2474, "step": 4840 }, { "epoch": 2.2727272727272725, "grad_norm": 2.392869472503662, "learning_rate": 9.715909090909091e-06, "loss": 0.2571, "step": 4850 }, { "epoch": 2.2774133083411434, "grad_norm": 2.711582660675049, "learning_rate": 9.715323336457358e-06, "loss": 0.2778, "step": 4860 }, { "epoch": 2.282099343955014, "grad_norm": 3.1134321689605713, "learning_rate": 9.714737582005625e-06, "loss": 0.2702, "step": 4870 }, { "epoch": 2.286785379568885, "grad_norm": 3.022188901901245, "learning_rate": 9.71415182755389e-06, "loss": 0.248, "step": 4880 }, { "epoch": 2.2914714151827553, "grad_norm": 2.618468999862671, "learning_rate": 9.713566073102157e-06, "loss": 0.2878, "step": 4890 }, { "epoch": 2.296157450796626, "grad_norm": 2.559990882873535, "learning_rate": 9.712980318650422e-06, "loss": 0.259, "step": 4900 }, { "epoch": 2.3008434864104967, "grad_norm": 3.009366750717163, "learning_rate": 9.712394564198689e-06, "loss": 0.2662, "step": 4910 }, { "epoch": 2.3055295220243672, "grad_norm": 2.6885673999786377, "learning_rate": 9.711808809746956e-06, "loss": 0.2488, "step": 4920 }, { "epoch": 2.310215557638238, "grad_norm": 2.8503615856170654, "learning_rate": 9.711223055295221e-06, "loss": 0.2508, "step": 4930 }, { "epoch": 2.3149015932521086, "grad_norm": 2.469139575958252, "learning_rate": 9.710637300843488e-06, "loss": 0.3025, "step": 4940 }, { "epoch": 2.319587628865979, "grad_norm": 3.6701242923736572, "learning_rate": 9.710051546391753e-06, "loss": 0.3029, "step": 4950 }, { "epoch": 2.32427366447985, "grad_norm": 2.860112428665161, "learning_rate": 9.709465791940019e-06, "loss": 0.2924, "step": 4960 }, { "epoch": 2.3289597000937206, "grad_norm": 1.8148601055145264, "learning_rate": 9.708880037488286e-06, "loss": 0.3011, "step": 4970 }, { "epoch": 2.3336457357075915, "grad_norm": 3.073507070541382, "learning_rate": 9.708294283036552e-06, "loss": 0.2664, "step": 4980 }, { "epoch": 2.338331771321462, "grad_norm": 3.759744882583618, "learning_rate": 9.707708528584818e-06, "loss": 0.2977, "step": 4990 }, { "epoch": 2.3430178069353325, "grad_norm": 2.8136818408966064, "learning_rate": 9.707122774133085e-06, "loss": 0.28, "step": 5000 }, { "epoch": 2.3430178069353325, "eval_loss": 0.0644521713256836, "eval_pearson_cosine": 0.7653835203754937, "eval_pearson_dot": 0.5924636721600756, "eval_pearson_euclidean": 0.7568919402450831, "eval_pearson_manhattan": 0.7567361379039284, "eval_runtime": 40.0042, "eval_samples_per_second": 37.496, "eval_spearman_cosine": 0.7760073163206256, "eval_spearman_dot": 0.5970251712536361, "eval_spearman_euclidean": 0.7705144458647132, "eval_spearman_manhattan": 0.7705196006726509, "eval_steps_per_second": 37.496, "step": 5000 }, { "epoch": 2.3477038425492034, "grad_norm": 2.8381240367889404, "learning_rate": 9.70653701968135e-06, "loss": 0.2584, "step": 5010 }, { "epoch": 2.352389878163074, "grad_norm": 2.48789381980896, "learning_rate": 9.705951265229617e-06, "loss": 0.2518, "step": 5020 }, { "epoch": 2.357075913776945, "grad_norm": 2.8016576766967773, "learning_rate": 9.705365510777884e-06, "loss": 0.2839, "step": 5030 }, { "epoch": 2.3617619493908153, "grad_norm": 3.750737428665161, "learning_rate": 9.704779756326149e-06, "loss": 0.2757, "step": 5040 }, { "epoch": 2.3664479850046862, "grad_norm": 3.028477668762207, "learning_rate": 9.704194001874416e-06, "loss": 0.2542, "step": 5050 }, { "epoch": 2.3711340206185567, "grad_norm": 2.5787017345428467, "learning_rate": 9.703608247422681e-06, "loss": 0.2623, "step": 5060 }, { "epoch": 2.375820056232427, "grad_norm": 3.54349422454834, "learning_rate": 9.703022492970948e-06, "loss": 0.2872, "step": 5070 }, { "epoch": 2.380506091846298, "grad_norm": 3.924848794937134, "learning_rate": 9.702436738519213e-06, "loss": 0.2641, "step": 5080 }, { "epoch": 2.3851921274601686, "grad_norm": 2.8141496181488037, "learning_rate": 9.70185098406748e-06, "loss": 0.2549, "step": 5090 }, { "epoch": 2.3898781630740396, "grad_norm": 3.0236399173736572, "learning_rate": 9.701265229615747e-06, "loss": 0.2715, "step": 5100 }, { "epoch": 2.39456419868791, "grad_norm": 2.611743688583374, "learning_rate": 9.700679475164012e-06, "loss": 0.2572, "step": 5110 }, { "epoch": 2.3992502343017805, "grad_norm": 2.905355930328369, "learning_rate": 9.700093720712277e-06, "loss": 0.2772, "step": 5120 }, { "epoch": 2.4039362699156515, "grad_norm": 1.6498035192489624, "learning_rate": 9.699507966260544e-06, "loss": 0.2776, "step": 5130 }, { "epoch": 2.408622305529522, "grad_norm": 4.166587829589844, "learning_rate": 9.69892221180881e-06, "loss": 0.283, "step": 5140 }, { "epoch": 2.413308341143393, "grad_norm": 2.960628032684326, "learning_rate": 9.698336457357076e-06, "loss": 0.2562, "step": 5150 }, { "epoch": 2.4179943767572634, "grad_norm": 3.179250955581665, "learning_rate": 9.697750702905343e-06, "loss": 0.2654, "step": 5160 }, { "epoch": 2.422680412371134, "grad_norm": 2.3080592155456543, "learning_rate": 9.697164948453609e-06, "loss": 0.2728, "step": 5170 }, { "epoch": 2.427366447985005, "grad_norm": 2.4087467193603516, "learning_rate": 9.696579194001876e-06, "loss": 0.3017, "step": 5180 }, { "epoch": 2.4320524835988753, "grad_norm": 2.0054640769958496, "learning_rate": 9.69599343955014e-06, "loss": 0.2592, "step": 5190 }, { "epoch": 2.436738519212746, "grad_norm": 3.7607529163360596, "learning_rate": 9.695407685098408e-06, "loss": 0.2501, "step": 5200 }, { "epoch": 2.4414245548266167, "grad_norm": 2.0960841178894043, "learning_rate": 9.694821930646675e-06, "loss": 0.2937, "step": 5210 }, { "epoch": 2.446110590440487, "grad_norm": 2.9174606800079346, "learning_rate": 9.69423617619494e-06, "loss": 0.2509, "step": 5220 }, { "epoch": 2.450796626054358, "grad_norm": 2.7972617149353027, "learning_rate": 9.693650421743207e-06, "loss": 0.2648, "step": 5230 }, { "epoch": 2.4554826616682286, "grad_norm": 2.465677261352539, "learning_rate": 9.693064667291472e-06, "loss": 0.2888, "step": 5240 }, { "epoch": 2.4601686972820995, "grad_norm": 2.6547045707702637, "learning_rate": 9.692478912839737e-06, "loss": 0.2631, "step": 5250 }, { "epoch": 2.4601686972820995, "eval_loss": 0.06392496824264526, "eval_pearson_cosine": 0.771214841842891, "eval_pearson_dot": 0.5715473025039159, "eval_pearson_euclidean": 0.75617638488007, "eval_pearson_manhattan": 0.7560991988119383, "eval_runtime": 40.3523, "eval_samples_per_second": 37.173, "eval_spearman_cosine": 0.7797840912495824, "eval_spearman_dot": 0.5730858147900617, "eval_spearman_euclidean": 0.7705235550970289, "eval_spearman_manhattan": 0.7705309049511561, "eval_steps_per_second": 37.173, "step": 5250 }, { "epoch": 2.46485473289597, "grad_norm": 3.2558162212371826, "learning_rate": 9.691893158388006e-06, "loss": 0.2874, "step": 5260 }, { "epoch": 2.4695407685098405, "grad_norm": 2.307089328765869, "learning_rate": 9.691307403936271e-06, "loss": 0.2681, "step": 5270 }, { "epoch": 2.4742268041237114, "grad_norm": 3.001068115234375, "learning_rate": 9.690721649484536e-06, "loss": 0.2815, "step": 5280 }, { "epoch": 2.478912839737582, "grad_norm": 2.5946176052093506, "learning_rate": 9.690135895032803e-06, "loss": 0.2639, "step": 5290 }, { "epoch": 2.483598875351453, "grad_norm": 2.6081533432006836, "learning_rate": 9.689550140581068e-06, "loss": 0.2707, "step": 5300 }, { "epoch": 2.4882849109653233, "grad_norm": 3.037405490875244, "learning_rate": 9.688964386129335e-06, "loss": 0.2844, "step": 5310 }, { "epoch": 2.492970946579194, "grad_norm": 2.944249391555786, "learning_rate": 9.688378631677602e-06, "loss": 0.2728, "step": 5320 }, { "epoch": 2.4976569821930648, "grad_norm": 2.7798855304718018, "learning_rate": 9.687792877225867e-06, "loss": 0.3013, "step": 5330 }, { "epoch": 2.5023430178069352, "grad_norm": 2.005322217941284, "learning_rate": 9.687207122774134e-06, "loss": 0.2523, "step": 5340 }, { "epoch": 2.5070290534208057, "grad_norm": 2.831803321838379, "learning_rate": 9.6866213683224e-06, "loss": 0.2542, "step": 5350 }, { "epoch": 2.5117150890346767, "grad_norm": 3.1902036666870117, "learning_rate": 9.686035613870666e-06, "loss": 0.241, "step": 5360 }, { "epoch": 2.5164011246485476, "grad_norm": 2.875300884246826, "learning_rate": 9.685449859418933e-06, "loss": 0.264, "step": 5370 }, { "epoch": 2.521087160262418, "grad_norm": 3.253399133682251, "learning_rate": 9.684864104967199e-06, "loss": 0.3013, "step": 5380 }, { "epoch": 2.5257731958762886, "grad_norm": 2.7400455474853516, "learning_rate": 9.684278350515465e-06, "loss": 0.3005, "step": 5390 }, { "epoch": 2.530459231490159, "grad_norm": 2.604724884033203, "learning_rate": 9.68369259606373e-06, "loss": 0.2805, "step": 5400 }, { "epoch": 2.53514526710403, "grad_norm": 2.5421054363250732, "learning_rate": 9.683106841611996e-06, "loss": 0.256, "step": 5410 }, { "epoch": 2.539831302717901, "grad_norm": 3.7680563926696777, "learning_rate": 9.682521087160265e-06, "loss": 0.2685, "step": 5420 }, { "epoch": 2.5445173383317714, "grad_norm": 3.140620708465576, "learning_rate": 9.68193533270853e-06, "loss": 0.288, "step": 5430 }, { "epoch": 2.549203373945642, "grad_norm": 2.77986478805542, "learning_rate": 9.681349578256795e-06, "loss": 0.2827, "step": 5440 }, { "epoch": 2.5538894095595124, "grad_norm": 3.1461918354034424, "learning_rate": 9.680763823805062e-06, "loss": 0.2907, "step": 5450 }, { "epoch": 2.5585754451733833, "grad_norm": 2.403411626815796, "learning_rate": 9.680178069353327e-06, "loss": 0.2997, "step": 5460 }, { "epoch": 2.5632614807872542, "grad_norm": 2.744910955429077, "learning_rate": 9.679592314901594e-06, "loss": 0.2615, "step": 5470 }, { "epoch": 2.5679475164011247, "grad_norm": 3.292695999145508, "learning_rate": 9.679006560449861e-06, "loss": 0.2679, "step": 5480 }, { "epoch": 2.572633552014995, "grad_norm": 2.8364174365997314, "learning_rate": 9.678420805998126e-06, "loss": 0.2763, "step": 5490 }, { "epoch": 2.5773195876288657, "grad_norm": 2.456524610519409, "learning_rate": 9.677835051546393e-06, "loss": 0.2488, "step": 5500 }, { "epoch": 2.5773195876288657, "eval_loss": 0.06359264999628067, "eval_pearson_cosine": 0.7736336142524749, "eval_pearson_dot": 0.5835126580557244, "eval_pearson_euclidean": 0.7537697181391394, "eval_pearson_manhattan": 0.7536913116270512, "eval_runtime": 39.7446, "eval_samples_per_second": 37.741, "eval_spearman_cosine": 0.7838309998226812, "eval_spearman_dot": 0.5860915978330865, "eval_spearman_euclidean": 0.76851014194766, "eval_spearman_manhattan": 0.7687268040235111, "eval_steps_per_second": 37.741, "step": 5500 }, { "epoch": 2.5820056232427366, "grad_norm": 2.6862993240356445, "learning_rate": 9.677249297094658e-06, "loss": 0.3049, "step": 5510 }, { "epoch": 2.5866916588566076, "grad_norm": 2.146784782409668, "learning_rate": 9.676663542642925e-06, "loss": 0.2844, "step": 5520 }, { "epoch": 2.591377694470478, "grad_norm": 2.9161078929901123, "learning_rate": 9.676077788191192e-06, "loss": 0.2505, "step": 5530 }, { "epoch": 2.5960637300843485, "grad_norm": 2.771800994873047, "learning_rate": 9.675492033739457e-06, "loss": 0.2741, "step": 5540 }, { "epoch": 2.600749765698219, "grad_norm": 2.6023566722869873, "learning_rate": 9.674906279287724e-06, "loss": 0.271, "step": 5550 }, { "epoch": 2.60543580131209, "grad_norm": 3.007667064666748, "learning_rate": 9.67432052483599e-06, "loss": 0.2451, "step": 5560 }, { "epoch": 2.610121836925961, "grad_norm": 3.014564275741577, "learning_rate": 9.673734770384255e-06, "loss": 0.3036, "step": 5570 }, { "epoch": 2.6148078725398314, "grad_norm": 2.6932876110076904, "learning_rate": 9.673149015932522e-06, "loss": 0.2642, "step": 5580 }, { "epoch": 2.619493908153702, "grad_norm": 3.396259307861328, "learning_rate": 9.672563261480789e-06, "loss": 0.26, "step": 5590 }, { "epoch": 2.624179943767573, "grad_norm": 3.203933000564575, "learning_rate": 9.671977507029054e-06, "loss": 0.2629, "step": 5600 }, { "epoch": 2.6288659793814433, "grad_norm": 3.077299118041992, "learning_rate": 9.67139175257732e-06, "loss": 0.2566, "step": 5610 }, { "epoch": 2.633552014995314, "grad_norm": 2.445908308029175, "learning_rate": 9.670805998125586e-06, "loss": 0.2751, "step": 5620 }, { "epoch": 2.6382380506091847, "grad_norm": 2.4680237770080566, "learning_rate": 9.670220243673853e-06, "loss": 0.2725, "step": 5630 }, { "epoch": 2.642924086223055, "grad_norm": 3.320460081100464, "learning_rate": 9.669634489222118e-06, "loss": 0.3, "step": 5640 }, { "epoch": 2.647610121836926, "grad_norm": 2.6988685131073, "learning_rate": 9.669048734770385e-06, "loss": 0.2754, "step": 5650 }, { "epoch": 2.6522961574507966, "grad_norm": 3.1843039989471436, "learning_rate": 9.668462980318652e-06, "loss": 0.2711, "step": 5660 }, { "epoch": 2.6569821930646675, "grad_norm": 2.5606629848480225, "learning_rate": 9.667877225866917e-06, "loss": 0.2772, "step": 5670 }, { "epoch": 2.661668228678538, "grad_norm": 2.2161924839019775, "learning_rate": 9.667291471415184e-06, "loss": 0.2501, "step": 5680 }, { "epoch": 2.6663542642924085, "grad_norm": 2.6044790744781494, "learning_rate": 9.66670571696345e-06, "loss": 0.2859, "step": 5690 }, { "epoch": 2.6710402999062794, "grad_norm": 2.9149224758148193, "learning_rate": 9.666119962511716e-06, "loss": 0.2834, "step": 5700 }, { "epoch": 2.67572633552015, "grad_norm": 3.0715954303741455, "learning_rate": 9.665534208059983e-06, "loss": 0.2802, "step": 5710 }, { "epoch": 2.680412371134021, "grad_norm": 2.8845441341400146, "learning_rate": 9.664948453608248e-06, "loss": 0.2594, "step": 5720 }, { "epoch": 2.6850984067478914, "grad_norm": 3.3252668380737305, "learning_rate": 9.664362699156514e-06, "loss": 0.2625, "step": 5730 }, { "epoch": 2.689784442361762, "grad_norm": 2.8618457317352295, "learning_rate": 9.66377694470478e-06, "loss": 0.2769, "step": 5740 }, { "epoch": 2.6944704779756328, "grad_norm": 3.196368455886841, "learning_rate": 9.663191190253046e-06, "loss": 0.2557, "step": 5750 }, { "epoch": 2.6944704779756328, "eval_loss": 0.06136869639158249, "eval_pearson_cosine": 0.7739160712003912, "eval_pearson_dot": 0.6008471845975407, "eval_pearson_euclidean": 0.7570902529490127, "eval_pearson_manhattan": 0.7569554961897609, "eval_runtime": 39.4871, "eval_samples_per_second": 37.987, "eval_spearman_cosine": 0.7830112643640406, "eval_spearman_dot": 0.6040995035629967, "eval_spearman_euclidean": 0.7716512847518021, "eval_spearman_manhattan": 0.7716299413202475, "eval_steps_per_second": 37.987, "step": 5750 }, { "epoch": 2.6991565135895033, "grad_norm": 2.449201822280884, "learning_rate": 9.662605435801313e-06, "loss": 0.2807, "step": 5760 }, { "epoch": 2.703842549203374, "grad_norm": 3.0767366886138916, "learning_rate": 9.66201968134958e-06, "loss": 0.2842, "step": 5770 }, { "epoch": 2.7085285848172447, "grad_norm": 2.7501325607299805, "learning_rate": 9.661433926897845e-06, "loss": 0.281, "step": 5780 }, { "epoch": 2.713214620431115, "grad_norm": 3.0382957458496094, "learning_rate": 9.660848172446112e-06, "loss": 0.2435, "step": 5790 }, { "epoch": 2.717900656044986, "grad_norm": 3.02397084236145, "learning_rate": 9.660262417994377e-06, "loss": 0.2797, "step": 5800 }, { "epoch": 2.7225866916588566, "grad_norm": 2.8019211292266846, "learning_rate": 9.659676663542644e-06, "loss": 0.2728, "step": 5810 }, { "epoch": 2.7272727272727275, "grad_norm": 2.9764068126678467, "learning_rate": 9.65909090909091e-06, "loss": 0.2796, "step": 5820 }, { "epoch": 2.731958762886598, "grad_norm": 3.254289150238037, "learning_rate": 9.658505154639176e-06, "loss": 0.2967, "step": 5830 }, { "epoch": 2.7366447985004685, "grad_norm": 3.0329315662384033, "learning_rate": 9.657919400187443e-06, "loss": 0.2631, "step": 5840 }, { "epoch": 2.7413308341143394, "grad_norm": 2.720784902572632, "learning_rate": 9.657333645735708e-06, "loss": 0.273, "step": 5850 }, { "epoch": 2.74601686972821, "grad_norm": 2.6841161251068115, "learning_rate": 9.656747891283973e-06, "loss": 0.2729, "step": 5860 }, { "epoch": 2.750702905342081, "grad_norm": 3.0265884399414062, "learning_rate": 9.656162136832242e-06, "loss": 0.2762, "step": 5870 }, { "epoch": 2.7553889409559513, "grad_norm": 2.6341817378997803, "learning_rate": 9.655576382380507e-06, "loss": 0.2918, "step": 5880 }, { "epoch": 2.760074976569822, "grad_norm": 3.1059679985046387, "learning_rate": 9.654990627928772e-06, "loss": 0.2448, "step": 5890 }, { "epoch": 2.7647610121836927, "grad_norm": 3.105215311050415, "learning_rate": 9.65440487347704e-06, "loss": 0.3158, "step": 5900 }, { "epoch": 2.7694470477975632, "grad_norm": 2.5665364265441895, "learning_rate": 9.653819119025304e-06, "loss": 0.2742, "step": 5910 }, { "epoch": 2.774133083411434, "grad_norm": 3.4417858123779297, "learning_rate": 9.653233364573571e-06, "loss": 0.2666, "step": 5920 }, { "epoch": 2.7788191190253047, "grad_norm": 3.3958632946014404, "learning_rate": 9.652647610121838e-06, "loss": 0.2861, "step": 5930 }, { "epoch": 2.783505154639175, "grad_norm": 2.7881741523742676, "learning_rate": 9.652061855670104e-06, "loss": 0.2979, "step": 5940 }, { "epoch": 2.788191190253046, "grad_norm": 3.3900156021118164, "learning_rate": 9.65147610121837e-06, "loss": 0.2919, "step": 5950 }, { "epoch": 2.7928772258669166, "grad_norm": 2.6520965099334717, "learning_rate": 9.650890346766636e-06, "loss": 0.3384, "step": 5960 }, { "epoch": 2.7975632614807875, "grad_norm": 2.6379964351654053, "learning_rate": 9.650304592314903e-06, "loss": 0.2553, "step": 5970 }, { "epoch": 2.802249297094658, "grad_norm": 3.325380802154541, "learning_rate": 9.64971883786317e-06, "loss": 0.2599, "step": 5980 }, { "epoch": 2.8069353327085285, "grad_norm": 2.9354546070098877, "learning_rate": 9.649133083411435e-06, "loss": 0.2981, "step": 5990 }, { "epoch": 2.8116213683223994, "grad_norm": 3.915391445159912, "learning_rate": 9.648547328959702e-06, "loss": 0.2699, "step": 6000 }, { "epoch": 2.8116213683223994, "eval_loss": 0.063628189265728, "eval_pearson_cosine": 0.7721771737399621, "eval_pearson_dot": 0.5844319866585721, "eval_pearson_euclidean": 0.7572362286473506, "eval_pearson_manhattan": 0.7569847240428516, "eval_runtime": 39.6806, "eval_samples_per_second": 37.802, "eval_spearman_cosine": 0.7795493971324328, "eval_spearman_dot": 0.586396480606851, "eval_spearman_euclidean": 0.7700741181493094, "eval_spearman_manhattan": 0.7699220379699384, "eval_steps_per_second": 37.802, "step": 6000 }, { "epoch": 2.81630740393627, "grad_norm": 2.758107900619507, "learning_rate": 9.647961574507967e-06, "loss": 0.277, "step": 6010 }, { "epoch": 2.820993439550141, "grad_norm": 2.5489895343780518, "learning_rate": 9.647375820056232e-06, "loss": 0.2776, "step": 6020 }, { "epoch": 2.8256794751640113, "grad_norm": 2.8726046085357666, "learning_rate": 9.6467900656045e-06, "loss": 0.2719, "step": 6030 }, { "epoch": 2.830365510777882, "grad_norm": 2.856821298599243, "learning_rate": 9.646204311152766e-06, "loss": 0.2717, "step": 6040 }, { "epoch": 2.8350515463917527, "grad_norm": 3.395746946334839, "learning_rate": 9.645618556701031e-06, "loss": 0.2657, "step": 6050 }, { "epoch": 2.839737582005623, "grad_norm": 2.8443150520324707, "learning_rate": 9.645032802249298e-06, "loss": 0.2889, "step": 6060 }, { "epoch": 2.844423617619494, "grad_norm": 2.680279016494751, "learning_rate": 9.644447047797563e-06, "loss": 0.3128, "step": 6070 }, { "epoch": 2.8491096532333646, "grad_norm": 3.375882148742676, "learning_rate": 9.64386129334583e-06, "loss": 0.2784, "step": 6080 }, { "epoch": 2.853795688847235, "grad_norm": 3.4931676387786865, "learning_rate": 9.643275538894097e-06, "loss": 0.2906, "step": 6090 }, { "epoch": 2.858481724461106, "grad_norm": 3.0119099617004395, "learning_rate": 9.642689784442362e-06, "loss": 0.3064, "step": 6100 }, { "epoch": 2.8631677600749765, "grad_norm": 2.9946563243865967, "learning_rate": 9.64210402999063e-06, "loss": 0.2708, "step": 6110 }, { "epoch": 2.8678537956888475, "grad_norm": 2.687286853790283, "learning_rate": 9.641518275538894e-06, "loss": 0.2827, "step": 6120 }, { "epoch": 2.872539831302718, "grad_norm": 2.7718350887298584, "learning_rate": 9.640932521087161e-06, "loss": 0.2815, "step": 6130 }, { "epoch": 2.8772258669165884, "grad_norm": 2.4918980598449707, "learning_rate": 9.640346766635427e-06, "loss": 0.2383, "step": 6140 }, { "epoch": 2.8819119025304594, "grad_norm": 3.1328234672546387, "learning_rate": 9.639761012183694e-06, "loss": 0.2917, "step": 6150 }, { "epoch": 2.88659793814433, "grad_norm": 3.1907870769500732, "learning_rate": 9.63917525773196e-06, "loss": 0.2682, "step": 6160 }, { "epoch": 2.891283973758201, "grad_norm": 2.851372241973877, "learning_rate": 9.638589503280226e-06, "loss": 0.2523, "step": 6170 }, { "epoch": 2.8959700093720713, "grad_norm": 2.8064658641815186, "learning_rate": 9.638003748828491e-06, "loss": 0.276, "step": 6180 }, { "epoch": 2.9006560449859418, "grad_norm": 3.0457820892333984, "learning_rate": 9.637417994376758e-06, "loss": 0.2713, "step": 6190 }, { "epoch": 2.9053420805998127, "grad_norm": 2.8154349327087402, "learning_rate": 9.636832239925025e-06, "loss": 0.2757, "step": 6200 }, { "epoch": 2.910028116213683, "grad_norm": 2.6084420680999756, "learning_rate": 9.63624648547329e-06, "loss": 0.2786, "step": 6210 }, { "epoch": 2.914714151827554, "grad_norm": 3.3972620964050293, "learning_rate": 9.635660731021557e-06, "loss": 0.284, "step": 6220 }, { "epoch": 2.9194001874414246, "grad_norm": 2.3089513778686523, "learning_rate": 9.635074976569822e-06, "loss": 0.2912, "step": 6230 }, { "epoch": 2.924086223055295, "grad_norm": 2.8329966068267822, "learning_rate": 9.634489222118089e-06, "loss": 0.2872, "step": 6240 }, { "epoch": 2.928772258669166, "grad_norm": 2.8177921772003174, "learning_rate": 9.633903467666354e-06, "loss": 0.2794, "step": 6250 }, { "epoch": 2.928772258669166, "eval_loss": 0.06385794281959534, "eval_pearson_cosine": 0.7704242421671665, "eval_pearson_dot": 0.5817298961264266, "eval_pearson_euclidean": 0.7580713117672779, "eval_pearson_manhattan": 0.7582345366438972, "eval_runtime": 39.8682, "eval_samples_per_second": 37.624, "eval_spearman_cosine": 0.7799516394114882, "eval_spearman_dot": 0.5793232888676765, "eval_spearman_euclidean": 0.7745810839892152, "eval_spearman_manhattan": 0.7744580162860011, "eval_steps_per_second": 37.624, "step": 6250 }, { "epoch": 2.9334582942830365, "grad_norm": 3.0042576789855957, "learning_rate": 9.633317713214621e-06, "loss": 0.3072, "step": 6260 }, { "epoch": 2.9381443298969074, "grad_norm": 2.3798370361328125, "learning_rate": 9.632731958762888e-06, "loss": 0.2472, "step": 6270 }, { "epoch": 2.942830365510778, "grad_norm": 3.12076735496521, "learning_rate": 9.632146204311153e-06, "loss": 0.2649, "step": 6280 }, { "epoch": 2.9475164011246484, "grad_norm": 2.4176595211029053, "learning_rate": 9.63156044985942e-06, "loss": 0.3011, "step": 6290 }, { "epoch": 2.9522024367385193, "grad_norm": 4.061246871948242, "learning_rate": 9.630974695407685e-06, "loss": 0.2905, "step": 6300 }, { "epoch": 2.95688847235239, "grad_norm": 3.4516801834106445, "learning_rate": 9.630388940955952e-06, "loss": 0.2757, "step": 6310 }, { "epoch": 2.9615745079662608, "grad_norm": 2.8717644214630127, "learning_rate": 9.62980318650422e-06, "loss": 0.2566, "step": 6320 }, { "epoch": 2.9662605435801312, "grad_norm": 2.948293685913086, "learning_rate": 9.629217432052484e-06, "loss": 0.2602, "step": 6330 }, { "epoch": 2.9709465791940017, "grad_norm": 2.5591063499450684, "learning_rate": 9.62863167760075e-06, "loss": 0.2637, "step": 6340 }, { "epoch": 2.9756326148078727, "grad_norm": 3.3883564472198486, "learning_rate": 9.628045923149017e-06, "loss": 0.3073, "step": 6350 }, { "epoch": 2.980318650421743, "grad_norm": 2.641995668411255, "learning_rate": 9.627460168697282e-06, "loss": 0.2711, "step": 6360 }, { "epoch": 2.985004686035614, "grad_norm": 2.4701485633850098, "learning_rate": 9.626874414245549e-06, "loss": 0.2458, "step": 6370 }, { "epoch": 2.9896907216494846, "grad_norm": 3.103329658508301, "learning_rate": 9.626288659793816e-06, "loss": 0.2854, "step": 6380 }, { "epoch": 2.994376757263355, "grad_norm": 2.9376931190490723, "learning_rate": 9.625702905342081e-06, "loss": 0.3098, "step": 6390 }, { "epoch": 2.999062792877226, "grad_norm": 2.806793212890625, "learning_rate": 9.625117150890348e-06, "loss": 0.2574, "step": 6400 }, { "epoch": 3.0037488284910965, "grad_norm": 2.9033491611480713, "learning_rate": 9.624531396438613e-06, "loss": 0.2101, "step": 6410 }, { "epoch": 3.0084348641049674, "grad_norm": 2.3773772716522217, "learning_rate": 9.62394564198688e-06, "loss": 0.1983, "step": 6420 }, { "epoch": 3.013120899718838, "grad_norm": 2.7549703121185303, "learning_rate": 9.623359887535147e-06, "loss": 0.1925, "step": 6430 }, { "epoch": 3.0178069353327084, "grad_norm": 2.121821880340576, "learning_rate": 9.622774133083412e-06, "loss": 0.1893, "step": 6440 }, { "epoch": 3.0224929709465793, "grad_norm": 1.9096688032150269, "learning_rate": 9.622188378631679e-06, "loss": 0.1832, "step": 6450 }, { "epoch": 3.02717900656045, "grad_norm": 1.793131947517395, "learning_rate": 9.621602624179944e-06, "loss": 0.1903, "step": 6460 }, { "epoch": 3.0318650421743207, "grad_norm": 2.6187257766723633, "learning_rate": 9.62101686972821e-06, "loss": 0.1846, "step": 6470 }, { "epoch": 3.036551077788191, "grad_norm": 3.1435282230377197, "learning_rate": 9.620431115276478e-06, "loss": 0.2057, "step": 6480 }, { "epoch": 3.0412371134020617, "grad_norm": 1.9570343494415283, "learning_rate": 9.619845360824743e-06, "loss": 0.2094, "step": 6490 }, { "epoch": 3.0459231490159326, "grad_norm": 1.9423924684524536, "learning_rate": 9.619259606373008e-06, "loss": 0.1778, "step": 6500 }, { "epoch": 3.0459231490159326, "eval_loss": 0.05262026935815811, "eval_pearson_cosine": 0.7738324758503552, "eval_pearson_dot": 0.6192771918549269, "eval_pearson_euclidean": 0.7572697799605308, "eval_pearson_manhattan": 0.7574249862527225, "eval_runtime": 39.2621, "eval_samples_per_second": 38.205, "eval_spearman_cosine": 0.7811305074925675, "eval_spearman_dot": 0.6255090522219068, "eval_spearman_euclidean": 0.7739030959623808, "eval_spearman_manhattan": 0.7738816457691842, "eval_steps_per_second": 38.205, "step": 6500 }, { "epoch": 3.050609184629803, "grad_norm": 2.544872760772705, "learning_rate": 9.618673851921275e-06, "loss": 0.2161, "step": 6510 }, { "epoch": 3.055295220243674, "grad_norm": 3.1003711223602295, "learning_rate": 9.61808809746954e-06, "loss": 0.1964, "step": 6520 }, { "epoch": 3.0599812558575445, "grad_norm": 2.2699685096740723, "learning_rate": 9.617502343017808e-06, "loss": 0.1992, "step": 6530 }, { "epoch": 3.064667291471415, "grad_norm": 1.655086636543274, "learning_rate": 9.616916588566074e-06, "loss": 0.1834, "step": 6540 }, { "epoch": 3.069353327085286, "grad_norm": 2.532160758972168, "learning_rate": 9.61633083411434e-06, "loss": 0.1978, "step": 6550 }, { "epoch": 3.0740393626991565, "grad_norm": 3.1178247928619385, "learning_rate": 9.615745079662607e-06, "loss": 0.2249, "step": 6560 }, { "epoch": 3.0787253983130274, "grad_norm": 3.530034303665161, "learning_rate": 9.615159325210872e-06, "loss": 0.214, "step": 6570 }, { "epoch": 3.083411433926898, "grad_norm": 2.2964463233947754, "learning_rate": 9.614573570759139e-06, "loss": 0.1695, "step": 6580 }, { "epoch": 3.0880974695407684, "grad_norm": 2.613321304321289, "learning_rate": 9.613987816307406e-06, "loss": 0.1888, "step": 6590 }, { "epoch": 3.0927835051546393, "grad_norm": 3.2321712970733643, "learning_rate": 9.613402061855671e-06, "loss": 0.1721, "step": 6600 }, { "epoch": 3.0974695407685098, "grad_norm": 2.779963731765747, "learning_rate": 9.612816307403938e-06, "loss": 0.2098, "step": 6610 }, { "epoch": 3.1021555763823807, "grad_norm": 2.531707763671875, "learning_rate": 9.612230552952203e-06, "loss": 0.1906, "step": 6620 }, { "epoch": 3.106841611996251, "grad_norm": 3.4784843921661377, "learning_rate": 9.611644798500468e-06, "loss": 0.1856, "step": 6630 }, { "epoch": 3.1115276476101217, "grad_norm": 2.682420253753662, "learning_rate": 9.611059044048735e-06, "loss": 0.2047, "step": 6640 }, { "epoch": 3.1162136832239926, "grad_norm": 2.4793782234191895, "learning_rate": 9.610473289597002e-06, "loss": 0.2062, "step": 6650 }, { "epoch": 3.120899718837863, "grad_norm": 2.4194202423095703, "learning_rate": 9.609887535145267e-06, "loss": 0.184, "step": 6660 }, { "epoch": 3.125585754451734, "grad_norm": 2.717632293701172, "learning_rate": 9.609301780693534e-06, "loss": 0.2148, "step": 6670 }, { "epoch": 3.1302717900656045, "grad_norm": 2.401958703994751, "learning_rate": 9.6087160262418e-06, "loss": 0.1912, "step": 6680 }, { "epoch": 3.134957825679475, "grad_norm": 2.585925340652466, "learning_rate": 9.608130271790066e-06, "loss": 0.1784, "step": 6690 }, { "epoch": 3.139643861293346, "grad_norm": 3.0962131023406982, "learning_rate": 9.607544517338333e-06, "loss": 0.2255, "step": 6700 }, { "epoch": 3.1443298969072164, "grad_norm": 2.8932714462280273, "learning_rate": 9.606958762886598e-06, "loss": 0.2022, "step": 6710 }, { "epoch": 3.1490159325210874, "grad_norm": 3.122772693634033, "learning_rate": 9.606373008434865e-06, "loss": 0.224, "step": 6720 }, { "epoch": 3.153701968134958, "grad_norm": 2.3057713508605957, "learning_rate": 9.60578725398313e-06, "loss": 0.1793, "step": 6730 }, { "epoch": 3.1583880037488283, "grad_norm": 2.4614179134368896, "learning_rate": 9.605201499531398e-06, "loss": 0.2021, "step": 6740 }, { "epoch": 3.1630740393626993, "grad_norm": 2.207731008529663, "learning_rate": 9.604615745079663e-06, "loss": 0.1791, "step": 6750 }, { "epoch": 3.1630740393626993, "eval_loss": 0.051918212324380875, "eval_pearson_cosine": 0.7727540079103647, "eval_pearson_dot": 0.6115735313924304, "eval_pearson_euclidean": 0.7538142936820122, "eval_pearson_manhattan": 0.7540359717423257, "eval_runtime": 40.2509, "eval_samples_per_second": 37.266, "eval_spearman_cosine": 0.7783352094358659, "eval_spearman_dot": 0.6182056058746139, "eval_spearman_euclidean": 0.7700146198863392, "eval_spearman_manhattan": 0.7703753603565089, "eval_steps_per_second": 37.266, "step": 6750 }, { "epoch": 3.1677600749765698, "grad_norm": 3.1614291667938232, "learning_rate": 9.60402999062793e-06, "loss": 0.2023, "step": 6760 }, { "epoch": 3.1724461105904407, "grad_norm": 2.1920371055603027, "learning_rate": 9.603444236176197e-06, "loss": 0.2102, "step": 6770 }, { "epoch": 3.177132146204311, "grad_norm": 2.640230894088745, "learning_rate": 9.602858481724462e-06, "loss": 0.1945, "step": 6780 }, { "epoch": 3.1818181818181817, "grad_norm": 2.749765396118164, "learning_rate": 9.602272727272727e-06, "loss": 0.1929, "step": 6790 }, { "epoch": 3.1865042174320526, "grad_norm": 2.674724578857422, "learning_rate": 9.601686972820994e-06, "loss": 0.1615, "step": 6800 }, { "epoch": 3.191190253045923, "grad_norm": 2.891061782836914, "learning_rate": 9.601101218369261e-06, "loss": 0.1957, "step": 6810 }, { "epoch": 3.195876288659794, "grad_norm": 2.452010154724121, "learning_rate": 9.600515463917526e-06, "loss": 0.1728, "step": 6820 }, { "epoch": 3.2005623242736645, "grad_norm": 2.2987475395202637, "learning_rate": 9.599929709465793e-06, "loss": 0.2001, "step": 6830 }, { "epoch": 3.205248359887535, "grad_norm": 1.8266733884811401, "learning_rate": 9.599343955014058e-06, "loss": 0.178, "step": 6840 }, { "epoch": 3.209934395501406, "grad_norm": 2.5674402713775635, "learning_rate": 9.598758200562325e-06, "loss": 0.1998, "step": 6850 }, { "epoch": 3.2146204311152764, "grad_norm": 2.6671390533447266, "learning_rate": 9.59817244611059e-06, "loss": 0.1953, "step": 6860 }, { "epoch": 3.2193064667291473, "grad_norm": 2.2907798290252686, "learning_rate": 9.597586691658857e-06, "loss": 0.2099, "step": 6870 }, { "epoch": 3.223992502343018, "grad_norm": 3.427845001220703, "learning_rate": 9.597000937207124e-06, "loss": 0.2197, "step": 6880 }, { "epoch": 3.2286785379568883, "grad_norm": 2.7697179317474365, "learning_rate": 9.59641518275539e-06, "loss": 0.2015, "step": 6890 }, { "epoch": 3.2333645735707592, "grad_norm": 2.18623423576355, "learning_rate": 9.595829428303656e-06, "loss": 0.186, "step": 6900 }, { "epoch": 3.2380506091846297, "grad_norm": 3.5410492420196533, "learning_rate": 9.595243673851922e-06, "loss": 0.1952, "step": 6910 }, { "epoch": 3.2427366447985007, "grad_norm": 2.631042242050171, "learning_rate": 9.594657919400188e-06, "loss": 0.1987, "step": 6920 }, { "epoch": 3.247422680412371, "grad_norm": 2.416839122772217, "learning_rate": 9.594072164948455e-06, "loss": 0.1882, "step": 6930 }, { "epoch": 3.2521087160262416, "grad_norm": 1.9796010255813599, "learning_rate": 9.59348641049672e-06, "loss": 0.2133, "step": 6940 }, { "epoch": 3.2567947516401126, "grad_norm": 2.4925854206085205, "learning_rate": 9.592900656044986e-06, "loss": 0.1915, "step": 6950 }, { "epoch": 3.261480787253983, "grad_norm": 2.323317766189575, "learning_rate": 9.592314901593253e-06, "loss": 0.1951, "step": 6960 }, { "epoch": 3.266166822867854, "grad_norm": 2.2892370223999023, "learning_rate": 9.591729147141518e-06, "loss": 0.1938, "step": 6970 }, { "epoch": 3.2708528584817245, "grad_norm": 2.5155346393585205, "learning_rate": 9.591143392689785e-06, "loss": 0.2233, "step": 6980 }, { "epoch": 3.275538894095595, "grad_norm": 2.344982624053955, "learning_rate": 9.590557638238052e-06, "loss": 0.2029, "step": 6990 }, { "epoch": 3.280224929709466, "grad_norm": 2.588749408721924, "learning_rate": 9.589971883786317e-06, "loss": 0.201, "step": 7000 }, { "epoch": 3.280224929709466, "eval_loss": 0.05114726722240448, "eval_pearson_cosine": 0.7755109260376774, "eval_pearson_dot": 0.6038894215473363, "eval_pearson_euclidean": 0.7503042403542959, "eval_pearson_manhattan": 0.7506062835400229, "eval_runtime": 39.6185, "eval_samples_per_second": 37.861, "eval_spearman_cosine": 0.782480390320873, "eval_spearman_dot": 0.6071370021112881, "eval_spearman_euclidean": 0.7669617369314659, "eval_spearman_manhattan": 0.767145329588772, "eval_steps_per_second": 37.861, "step": 7000 }, { "epoch": 3.2849109653233364, "grad_norm": 2.2868146896362305, "learning_rate": 9.589386129334584e-06, "loss": 0.1909, "step": 7010 }, { "epoch": 3.2895970009372073, "grad_norm": 2.2928576469421387, "learning_rate": 9.58880037488285e-06, "loss": 0.1867, "step": 7020 }, { "epoch": 3.294283036551078, "grad_norm": 2.36120343208313, "learning_rate": 9.588214620431116e-06, "loss": 0.2003, "step": 7030 }, { "epoch": 3.2989690721649483, "grad_norm": 2.88966965675354, "learning_rate": 9.587628865979383e-06, "loss": 0.1916, "step": 7040 }, { "epoch": 3.303655107778819, "grad_norm": 3.188649892807007, "learning_rate": 9.587043111527648e-06, "loss": 0.1839, "step": 7050 }, { "epoch": 3.3083411433926897, "grad_norm": 2.4474985599517822, "learning_rate": 9.586457357075915e-06, "loss": 0.2007, "step": 7060 }, { "epoch": 3.3130271790065606, "grad_norm": 3.1308703422546387, "learning_rate": 9.58587160262418e-06, "loss": 0.2049, "step": 7070 }, { "epoch": 3.317713214620431, "grad_norm": 2.200005292892456, "learning_rate": 9.585285848172447e-06, "loss": 0.1926, "step": 7080 }, { "epoch": 3.3223992502343016, "grad_norm": 3.012788772583008, "learning_rate": 9.584700093720714e-06, "loss": 0.1753, "step": 7090 }, { "epoch": 3.3270852858481725, "grad_norm": 2.8640036582946777, "learning_rate": 9.58411433926898e-06, "loss": 0.2004, "step": 7100 }, { "epoch": 3.331771321462043, "grad_norm": 2.8414249420166016, "learning_rate": 9.583528584817245e-06, "loss": 0.22, "step": 7110 }, { "epoch": 3.336457357075914, "grad_norm": 3.137575149536133, "learning_rate": 9.582942830365512e-06, "loss": 0.1985, "step": 7120 }, { "epoch": 3.3411433926897844, "grad_norm": 3.0193729400634766, "learning_rate": 9.582357075913777e-06, "loss": 0.1918, "step": 7130 }, { "epoch": 3.345829428303655, "grad_norm": 2.8278656005859375, "learning_rate": 9.581771321462044e-06, "loss": 0.2042, "step": 7140 }, { "epoch": 3.350515463917526, "grad_norm": 3.6021783351898193, "learning_rate": 9.58118556701031e-06, "loss": 0.2013, "step": 7150 }, { "epoch": 3.3552014995313963, "grad_norm": 2.698171615600586, "learning_rate": 9.580599812558576e-06, "loss": 0.1776, "step": 7160 }, { "epoch": 3.3598875351452673, "grad_norm": 2.7866642475128174, "learning_rate": 9.580014058106843e-06, "loss": 0.2219, "step": 7170 }, { "epoch": 3.3645735707591378, "grad_norm": 2.216646671295166, "learning_rate": 9.579428303655108e-06, "loss": 0.1971, "step": 7180 }, { "epoch": 3.3692596063730083, "grad_norm": 2.300733804702759, "learning_rate": 9.578842549203375e-06, "loss": 0.1866, "step": 7190 }, { "epoch": 3.373945641986879, "grad_norm": 2.985243320465088, "learning_rate": 9.578256794751642e-06, "loss": 0.1884, "step": 7200 }, { "epoch": 3.3786316776007497, "grad_norm": 2.4142348766326904, "learning_rate": 9.577671040299907e-06, "loss": 0.2, "step": 7210 }, { "epoch": 3.3833177132146206, "grad_norm": 3.3411073684692383, "learning_rate": 9.577085285848174e-06, "loss": 0.1961, "step": 7220 }, { "epoch": 3.388003748828491, "grad_norm": 2.240095376968384, "learning_rate": 9.57649953139644e-06, "loss": 0.1759, "step": 7230 }, { "epoch": 3.3926897844423616, "grad_norm": 2.9961328506469727, "learning_rate": 9.575913776944706e-06, "loss": 0.2184, "step": 7240 }, { "epoch": 3.3973758200562325, "grad_norm": 3.188340663909912, "learning_rate": 9.575328022492971e-06, "loss": 0.225, "step": 7250 }, { "epoch": 3.3973758200562325, "eval_loss": 0.05125829204916954, "eval_pearson_cosine": 0.7683713936958014, "eval_pearson_dot": 0.5867459849479424, "eval_pearson_euclidean": 0.7513948436830518, "eval_pearson_manhattan": 0.7514791526214619, "eval_runtime": 39.3707, "eval_samples_per_second": 38.099, "eval_spearman_cosine": 0.7749219401036014, "eval_spearman_dot": 0.589399728241704, "eval_spearman_euclidean": 0.7692196211001016, "eval_spearman_manhattan": 0.7689230301417627, "eval_steps_per_second": 38.099, "step": 7250 }, { "epoch": 3.402061855670103, "grad_norm": 2.632519006729126, "learning_rate": 9.574742268041238e-06, "loss": 0.2081, "step": 7260 }, { "epoch": 3.406747891283974, "grad_norm": 2.775071620941162, "learning_rate": 9.574156513589503e-06, "loss": 0.2054, "step": 7270 }, { "epoch": 3.4114339268978444, "grad_norm": 2.4009063243865967, "learning_rate": 9.57357075913777e-06, "loss": 0.188, "step": 7280 }, { "epoch": 3.416119962511715, "grad_norm": 3.2851004600524902, "learning_rate": 9.572985004686036e-06, "loss": 0.1901, "step": 7290 }, { "epoch": 3.420805998125586, "grad_norm": 2.978980541229248, "learning_rate": 9.572399250234303e-06, "loss": 0.1906, "step": 7300 }, { "epoch": 3.4254920337394563, "grad_norm": 3.4765729904174805, "learning_rate": 9.57181349578257e-06, "loss": 0.163, "step": 7310 }, { "epoch": 3.4301780693533273, "grad_norm": 2.3053479194641113, "learning_rate": 9.571227741330835e-06, "loss": 0.2051, "step": 7320 }, { "epoch": 3.4348641049671977, "grad_norm": 2.520573139190674, "learning_rate": 9.570641986879102e-06, "loss": 0.2171, "step": 7330 }, { "epoch": 3.4395501405810682, "grad_norm": 2.7145869731903076, "learning_rate": 9.570056232427367e-06, "loss": 0.1946, "step": 7340 }, { "epoch": 3.444236176194939, "grad_norm": 2.9998998641967773, "learning_rate": 9.569470477975634e-06, "loss": 0.1942, "step": 7350 }, { "epoch": 3.4489222118088096, "grad_norm": 3.165696620941162, "learning_rate": 9.568884723523899e-06, "loss": 0.2204, "step": 7360 }, { "epoch": 3.4536082474226806, "grad_norm": 2.271026849746704, "learning_rate": 9.568298969072166e-06, "loss": 0.1974, "step": 7370 }, { "epoch": 3.458294283036551, "grad_norm": 2.50876522064209, "learning_rate": 9.567713214620433e-06, "loss": 0.1998, "step": 7380 }, { "epoch": 3.4629803186504216, "grad_norm": 2.734961986541748, "learning_rate": 9.567127460168698e-06, "loss": 0.1918, "step": 7390 }, { "epoch": 3.4676663542642925, "grad_norm": 2.7971320152282715, "learning_rate": 9.566541705716965e-06, "loss": 0.2187, "step": 7400 }, { "epoch": 3.472352389878163, "grad_norm": 2.5854151248931885, "learning_rate": 9.56595595126523e-06, "loss": 0.1825, "step": 7410 }, { "epoch": 3.477038425492034, "grad_norm": 3.7421298027038574, "learning_rate": 9.565370196813497e-06, "loss": 0.2332, "step": 7420 }, { "epoch": 3.4817244611059044, "grad_norm": 3.0620522499084473, "learning_rate": 9.564784442361762e-06, "loss": 0.1976, "step": 7430 }, { "epoch": 3.486410496719775, "grad_norm": 2.0397024154663086, "learning_rate": 9.56419868791003e-06, "loss": 0.1765, "step": 7440 }, { "epoch": 3.491096532333646, "grad_norm": 3.286588191986084, "learning_rate": 9.563612933458294e-06, "loss": 0.2395, "step": 7450 }, { "epoch": 3.4957825679475163, "grad_norm": 2.812507390975952, "learning_rate": 9.563027179006561e-06, "loss": 0.1873, "step": 7460 }, { "epoch": 3.5004686035613872, "grad_norm": 3.213799476623535, "learning_rate": 9.562441424554827e-06, "loss": 0.2309, "step": 7470 }, { "epoch": 3.5051546391752577, "grad_norm": 2.9520084857940674, "learning_rate": 9.561855670103093e-06, "loss": 0.1993, "step": 7480 }, { "epoch": 3.509840674789128, "grad_norm": 2.5035924911499023, "learning_rate": 9.56126991565136e-06, "loss": 0.1745, "step": 7490 }, { "epoch": 3.514526710402999, "grad_norm": 1.8526318073272705, "learning_rate": 9.560684161199626e-06, "loss": 0.1748, "step": 7500 }, { "epoch": 3.514526710402999, "eval_loss": 0.050157614052295685, "eval_pearson_cosine": 0.7752481977984473, "eval_pearson_dot": 0.5877283656585206, "eval_pearson_euclidean": 0.7460582363929404, "eval_pearson_manhattan": 0.7459100147594686, "eval_runtime": 42.2146, "eval_samples_per_second": 35.533, "eval_spearman_cosine": 0.7800582604837842, "eval_spearman_dot": 0.5948918375623936, "eval_spearman_euclidean": 0.7635782236371852, "eval_spearman_manhattan": 0.7629943017245161, "eval_steps_per_second": 35.533, "step": 7500 }, { "epoch": 3.5192127460168696, "grad_norm": 2.638265371322632, "learning_rate": 9.560098406747893e-06, "loss": 0.1919, "step": 7510 }, { "epoch": 3.5238987816307406, "grad_norm": 2.9258508682250977, "learning_rate": 9.559512652296158e-06, "loss": 0.2036, "step": 7520 }, { "epoch": 3.528584817244611, "grad_norm": 3.6239101886749268, "learning_rate": 9.558926897844425e-06, "loss": 0.1855, "step": 7530 }, { "epoch": 3.5332708528584815, "grad_norm": 3.1037397384643555, "learning_rate": 9.558341143392692e-06, "loss": 0.2081, "step": 7540 }, { "epoch": 3.5379568884723525, "grad_norm": 2.8824028968811035, "learning_rate": 9.557755388940957e-06, "loss": 0.2028, "step": 7550 }, { "epoch": 3.542642924086223, "grad_norm": 2.8066060543060303, "learning_rate": 9.557169634489224e-06, "loss": 0.2031, "step": 7560 }, { "epoch": 3.547328959700094, "grad_norm": 3.691577911376953, "learning_rate": 9.556583880037489e-06, "loss": 0.2095, "step": 7570 }, { "epoch": 3.5520149953139644, "grad_norm": 1.769102692604065, "learning_rate": 9.555998125585754e-06, "loss": 0.1927, "step": 7580 }, { "epoch": 3.556701030927835, "grad_norm": 3.4198482036590576, "learning_rate": 9.555412371134021e-06, "loss": 0.217, "step": 7590 }, { "epoch": 3.561387066541706, "grad_norm": 2.0379226207733154, "learning_rate": 9.554826616682288e-06, "loss": 0.1977, "step": 7600 }, { "epoch": 3.5660731021555763, "grad_norm": 2.7821481227874756, "learning_rate": 9.554240862230553e-06, "loss": 0.2084, "step": 7610 }, { "epoch": 3.570759137769447, "grad_norm": 2.9597930908203125, "learning_rate": 9.55365510777882e-06, "loss": 0.1858, "step": 7620 }, { "epoch": 3.5754451733833177, "grad_norm": 3.193235397338867, "learning_rate": 9.553069353327085e-06, "loss": 0.1936, "step": 7630 }, { "epoch": 3.580131208997188, "grad_norm": 2.8011372089385986, "learning_rate": 9.552483598875352e-06, "loss": 0.1958, "step": 7640 }, { "epoch": 3.584817244611059, "grad_norm": 2.2420766353607178, "learning_rate": 9.55189784442362e-06, "loss": 0.1988, "step": 7650 }, { "epoch": 3.5895032802249296, "grad_norm": 2.6297037601470947, "learning_rate": 9.551312089971884e-06, "loss": 0.1809, "step": 7660 }, { "epoch": 3.5941893158388005, "grad_norm": 2.1531927585601807, "learning_rate": 9.550726335520151e-06, "loss": 0.197, "step": 7670 }, { "epoch": 3.598875351452671, "grad_norm": 3.1594254970550537, "learning_rate": 9.550140581068417e-06, "loss": 0.2049, "step": 7680 }, { "epoch": 3.6035613870665415, "grad_norm": 3.0111396312713623, "learning_rate": 9.549554826616683e-06, "loss": 0.2213, "step": 7690 }, { "epoch": 3.6082474226804124, "grad_norm": 2.9145541191101074, "learning_rate": 9.54896907216495e-06, "loss": 0.2027, "step": 7700 }, { "epoch": 3.612933458294283, "grad_norm": 2.465318441390991, "learning_rate": 9.548383317713216e-06, "loss": 0.1937, "step": 7710 }, { "epoch": 3.617619493908154, "grad_norm": 2.6440296173095703, "learning_rate": 9.547797563261482e-06, "loss": 0.2379, "step": 7720 }, { "epoch": 3.6223055295220243, "grad_norm": 3.0767295360565186, "learning_rate": 9.547211808809748e-06, "loss": 0.2245, "step": 7730 }, { "epoch": 3.626991565135895, "grad_norm": 2.5648956298828125, "learning_rate": 9.546626054358013e-06, "loss": 0.2085, "step": 7740 }, { "epoch": 3.6316776007497658, "grad_norm": 2.750375509262085, "learning_rate": 9.54604029990628e-06, "loss": 0.2045, "step": 7750 }, { "epoch": 3.6316776007497658, "eval_loss": 0.0512368269264698, "eval_pearson_cosine": 0.7786877204785156, "eval_pearson_dot": 0.6112588074907244, "eval_pearson_euclidean": 0.7460156578487087, "eval_pearson_manhattan": 0.7457383323528859, "eval_runtime": 40.2911, "eval_samples_per_second": 37.229, "eval_spearman_cosine": 0.7855704741784334, "eval_spearman_dot": 0.615574294440821, "eval_spearman_euclidean": 0.764218948503257, "eval_spearman_manhattan": 0.7636335157024128, "eval_steps_per_second": 37.229, "step": 7750 }, { "epoch": 3.6363636363636362, "grad_norm": 2.703327178955078, "learning_rate": 9.545454545454547e-06, "loss": 0.2241, "step": 7760 }, { "epoch": 3.641049671977507, "grad_norm": 2.124490976333618, "learning_rate": 9.544868791002812e-06, "loss": 0.1919, "step": 7770 }, { "epoch": 3.6457357075913777, "grad_norm": 2.9296751022338867, "learning_rate": 9.544283036551079e-06, "loss": 0.2316, "step": 7780 }, { "epoch": 3.650421743205248, "grad_norm": 2.1380982398986816, "learning_rate": 9.543697282099344e-06, "loss": 0.2119, "step": 7790 }, { "epoch": 3.655107778819119, "grad_norm": 2.6718966960906982, "learning_rate": 9.543111527647611e-06, "loss": 0.1828, "step": 7800 }, { "epoch": 3.6597938144329896, "grad_norm": 2.4571399688720703, "learning_rate": 9.542525773195878e-06, "loss": 0.2349, "step": 7810 }, { "epoch": 3.6644798500468605, "grad_norm": 2.056894540786743, "learning_rate": 9.541940018744143e-06, "loss": 0.1866, "step": 7820 }, { "epoch": 3.669165885660731, "grad_norm": 2.6094396114349365, "learning_rate": 9.54135426429241e-06, "loss": 0.1974, "step": 7830 }, { "epoch": 3.6738519212746015, "grad_norm": 2.887286901473999, "learning_rate": 9.540768509840675e-06, "loss": 0.2095, "step": 7840 }, { "epoch": 3.6785379568884724, "grad_norm": 2.494635581970215, "learning_rate": 9.540182755388942e-06, "loss": 0.2114, "step": 7850 }, { "epoch": 3.683223992502343, "grad_norm": 3.544060707092285, "learning_rate": 9.539597000937207e-06, "loss": 0.2046, "step": 7860 }, { "epoch": 3.687910028116214, "grad_norm": 2.7348105907440186, "learning_rate": 9.539011246485474e-06, "loss": 0.1959, "step": 7870 }, { "epoch": 3.6925960637300843, "grad_norm": 2.1513376235961914, "learning_rate": 9.538425492033741e-06, "loss": 0.1886, "step": 7880 }, { "epoch": 3.697282099343955, "grad_norm": 3.102924346923828, "learning_rate": 9.537839737582007e-06, "loss": 0.1918, "step": 7890 }, { "epoch": 3.7019681349578257, "grad_norm": 3.166285276412964, "learning_rate": 9.537253983130272e-06, "loss": 0.241, "step": 7900 }, { "epoch": 3.706654170571696, "grad_norm": 2.2208495140075684, "learning_rate": 9.536668228678539e-06, "loss": 0.2227, "step": 7910 }, { "epoch": 3.711340206185567, "grad_norm": 2.3546998500823975, "learning_rate": 9.536082474226806e-06, "loss": 0.1939, "step": 7920 }, { "epoch": 3.7160262417994376, "grad_norm": 2.840576171875, "learning_rate": 9.53549671977507e-06, "loss": 0.1999, "step": 7930 }, { "epoch": 3.720712277413308, "grad_norm": 2.5227537155151367, "learning_rate": 9.534910965323338e-06, "loss": 0.1939, "step": 7940 }, { "epoch": 3.725398313027179, "grad_norm": 2.9870660305023193, "learning_rate": 9.534325210871603e-06, "loss": 0.1905, "step": 7950 }, { "epoch": 3.7300843486410495, "grad_norm": 2.755246877670288, "learning_rate": 9.53373945641987e-06, "loss": 0.2267, "step": 7960 }, { "epoch": 3.7347703842549205, "grad_norm": 2.6030502319335938, "learning_rate": 9.533153701968135e-06, "loss": 0.2079, "step": 7970 }, { "epoch": 3.739456419868791, "grad_norm": 3.440239906311035, "learning_rate": 9.532567947516402e-06, "loss": 0.2159, "step": 7980 }, { "epoch": 3.7441424554826614, "grad_norm": 2.320068359375, "learning_rate": 9.531982193064669e-06, "loss": 0.204, "step": 7990 }, { "epoch": 3.7488284910965324, "grad_norm": 1.8982993364334106, "learning_rate": 9.531396438612934e-06, "loss": 0.1821, "step": 8000 }, { "epoch": 3.7488284910965324, "eval_loss": 0.05020766332745552, "eval_pearson_cosine": 0.7782144788553325, "eval_pearson_dot": 0.6044765942520343, "eval_pearson_euclidean": 0.7544531777073331, "eval_pearson_manhattan": 0.7542794078830823, "eval_runtime": 40.4619, "eval_samples_per_second": 37.072, "eval_spearman_cosine": 0.784196959157774, "eval_spearman_dot": 0.6068889930424554, "eval_spearman_euclidean": 0.7710302682957489, "eval_spearman_manhattan": 0.7707235385401614, "eval_steps_per_second": 37.072, "step": 8000 }, { "epoch": 3.753514526710403, "grad_norm": 3.368011713027954, "learning_rate": 9.530810684161201e-06, "loss": 0.2193, "step": 8010 }, { "epoch": 3.758200562324274, "grad_norm": 2.9850523471832275, "learning_rate": 9.530224929709466e-06, "loss": 0.2016, "step": 8020 }, { "epoch": 3.7628865979381443, "grad_norm": 2.8147449493408203, "learning_rate": 9.529639175257731e-06, "loss": 0.2215, "step": 8030 }, { "epoch": 3.7675726335520148, "grad_norm": 2.6306607723236084, "learning_rate": 9.529053420806e-06, "loss": 0.2015, "step": 8040 }, { "epoch": 3.7722586691658857, "grad_norm": 2.9150195121765137, "learning_rate": 9.528467666354265e-06, "loss": 0.2181, "step": 8050 }, { "epoch": 3.776944704779756, "grad_norm": 2.7011146545410156, "learning_rate": 9.52788191190253e-06, "loss": 0.2087, "step": 8060 }, { "epoch": 3.781630740393627, "grad_norm": 2.658383846282959, "learning_rate": 9.527296157450797e-06, "loss": 0.2274, "step": 8070 }, { "epoch": 3.7863167760074976, "grad_norm": 3.214911460876465, "learning_rate": 9.526710402999063e-06, "loss": 0.2062, "step": 8080 }, { "epoch": 3.791002811621368, "grad_norm": 2.258542537689209, "learning_rate": 9.52612464854733e-06, "loss": 0.1993, "step": 8090 }, { "epoch": 3.795688847235239, "grad_norm": 2.8734090328216553, "learning_rate": 9.525538894095597e-06, "loss": 0.1992, "step": 8100 }, { "epoch": 3.8003748828491095, "grad_norm": 2.079141616821289, "learning_rate": 9.524953139643862e-06, "loss": 0.1942, "step": 8110 }, { "epoch": 3.8050609184629804, "grad_norm": 2.8030195236206055, "learning_rate": 9.524367385192129e-06, "loss": 0.2289, "step": 8120 }, { "epoch": 3.809746954076851, "grad_norm": 3.562373399734497, "learning_rate": 9.523781630740394e-06, "loss": 0.2177, "step": 8130 }, { "epoch": 3.8144329896907214, "grad_norm": 3.045858144760132, "learning_rate": 9.52319587628866e-06, "loss": 0.1953, "step": 8140 }, { "epoch": 3.8191190253045924, "grad_norm": 1.9901701211929321, "learning_rate": 9.522610121836928e-06, "loss": 0.1873, "step": 8150 }, { "epoch": 3.823805060918463, "grad_norm": 3.8275070190429688, "learning_rate": 9.522024367385193e-06, "loss": 0.2081, "step": 8160 }, { "epoch": 3.8284910965323338, "grad_norm": 3.9077577590942383, "learning_rate": 9.52143861293346e-06, "loss": 0.2145, "step": 8170 }, { "epoch": 3.8331771321462043, "grad_norm": 2.799581289291382, "learning_rate": 9.520852858481725e-06, "loss": 0.2139, "step": 8180 }, { "epoch": 3.8378631677600747, "grad_norm": 2.7552547454833984, "learning_rate": 9.52026710402999e-06, "loss": 0.236, "step": 8190 }, { "epoch": 3.8425492033739457, "grad_norm": 2.6394495964050293, "learning_rate": 9.519681349578259e-06, "loss": 0.1724, "step": 8200 }, { "epoch": 3.847235238987816, "grad_norm": 2.702173948287964, "learning_rate": 9.519095595126524e-06, "loss": 0.1896, "step": 8210 }, { "epoch": 3.851921274601687, "grad_norm": 2.436673402786255, "learning_rate": 9.51850984067479e-06, "loss": 0.2018, "step": 8220 }, { "epoch": 3.8566073102155576, "grad_norm": 2.3034989833831787, "learning_rate": 9.517924086223056e-06, "loss": 0.1939, "step": 8230 }, { "epoch": 3.861293345829428, "grad_norm": 3.2450108528137207, "learning_rate": 9.517338331771321e-06, "loss": 0.209, "step": 8240 }, { "epoch": 3.865979381443299, "grad_norm": 2.502262830734253, "learning_rate": 9.516752577319588e-06, "loss": 0.1783, "step": 8250 }, { "epoch": 3.865979381443299, "eval_loss": 0.049117717891931534, "eval_pearson_cosine": 0.7772195489479836, "eval_pearson_dot": 0.5914535101164233, "eval_pearson_euclidean": 0.7459002868961946, "eval_pearson_manhattan": 0.7454618646316931, "eval_runtime": 39.5982, "eval_samples_per_second": 37.881, "eval_spearman_cosine": 0.7829045810141201, "eval_spearman_dot": 0.5984134337824631, "eval_spearman_euclidean": 0.7637144641585795, "eval_spearman_manhattan": 0.7630402590525859, "eval_steps_per_second": 37.881, "step": 8250 }, { "epoch": 3.8706654170571695, "grad_norm": 2.68709397315979, "learning_rate": 9.516166822867855e-06, "loss": 0.2099, "step": 8260 }, { "epoch": 3.8753514526710404, "grad_norm": 2.731020927429199, "learning_rate": 9.51558106841612e-06, "loss": 0.2032, "step": 8270 }, { "epoch": 3.880037488284911, "grad_norm": 2.9886014461517334, "learning_rate": 9.514995313964387e-06, "loss": 0.2206, "step": 8280 }, { "epoch": 3.8847235238987814, "grad_norm": 3.2847719192504883, "learning_rate": 9.514409559512653e-06, "loss": 0.2006, "step": 8290 }, { "epoch": 3.8894095595126523, "grad_norm": 2.6451122760772705, "learning_rate": 9.51382380506092e-06, "loss": 0.197, "step": 8300 }, { "epoch": 3.894095595126523, "grad_norm": 2.8909411430358887, "learning_rate": 9.513238050609187e-06, "loss": 0.2122, "step": 8310 }, { "epoch": 3.8987816307403937, "grad_norm": 2.6058146953582764, "learning_rate": 9.512652296157452e-06, "loss": 0.1812, "step": 8320 }, { "epoch": 3.9034676663542642, "grad_norm": 2.27011775970459, "learning_rate": 9.512066541705719e-06, "loss": 0.2184, "step": 8330 }, { "epoch": 3.9081537019681347, "grad_norm": 3.1832330226898193, "learning_rate": 9.511480787253984e-06, "loss": 0.2016, "step": 8340 }, { "epoch": 3.9128397375820057, "grad_norm": 2.791018009185791, "learning_rate": 9.510895032802249e-06, "loss": 0.215, "step": 8350 }, { "epoch": 3.917525773195876, "grad_norm": 2.7976248264312744, "learning_rate": 9.510309278350516e-06, "loss": 0.1905, "step": 8360 }, { "epoch": 3.922211808809747, "grad_norm": 3.055813789367676, "learning_rate": 9.509723523898783e-06, "loss": 0.1749, "step": 8370 }, { "epoch": 3.9268978444236176, "grad_norm": 2.684330940246582, "learning_rate": 9.509137769447048e-06, "loss": 0.2205, "step": 8380 }, { "epoch": 3.931583880037488, "grad_norm": 2.509272336959839, "learning_rate": 9.508552014995315e-06, "loss": 0.1996, "step": 8390 }, { "epoch": 3.936269915651359, "grad_norm": 3.171687126159668, "learning_rate": 9.50796626054358e-06, "loss": 0.2153, "step": 8400 }, { "epoch": 3.9409559512652295, "grad_norm": 2.5855712890625, "learning_rate": 9.507380506091847e-06, "loss": 0.1948, "step": 8410 }, { "epoch": 3.9456419868791004, "grad_norm": 2.6451563835144043, "learning_rate": 9.506794751640114e-06, "loss": 0.2016, "step": 8420 }, { "epoch": 3.950328022492971, "grad_norm": 2.763460159301758, "learning_rate": 9.50620899718838e-06, "loss": 0.1993, "step": 8430 }, { "epoch": 3.9550140581068414, "grad_norm": 2.799182891845703, "learning_rate": 9.505623242736646e-06, "loss": 0.2042, "step": 8440 }, { "epoch": 3.9597000937207123, "grad_norm": 3.084116220474243, "learning_rate": 9.505037488284911e-06, "loss": 0.1866, "step": 8450 }, { "epoch": 3.964386129334583, "grad_norm": 3.3838253021240234, "learning_rate": 9.504451733833178e-06, "loss": 0.1707, "step": 8460 }, { "epoch": 3.9690721649484537, "grad_norm": 2.19954776763916, "learning_rate": 9.503865979381444e-06, "loss": 0.2248, "step": 8470 }, { "epoch": 3.973758200562324, "grad_norm": 2.2361807823181152, "learning_rate": 9.50328022492971e-06, "loss": 0.1799, "step": 8480 }, { "epoch": 3.9784442361761947, "grad_norm": 2.890557050704956, "learning_rate": 9.502694470477977e-06, "loss": 0.1973, "step": 8490 }, { "epoch": 3.9831302717900656, "grad_norm": 2.7707271575927734, "learning_rate": 9.502108716026243e-06, "loss": 0.2055, "step": 8500 }, { "epoch": 3.9831302717900656, "eval_loss": 0.05037970840930939, "eval_pearson_cosine": 0.7775524083612311, "eval_pearson_dot": 0.5959320587932737, "eval_pearson_euclidean": 0.7480333124520797, "eval_pearson_manhattan": 0.7476197144949026, "eval_runtime": 40.1455, "eval_samples_per_second": 37.364, "eval_spearman_cosine": 0.7831727093187573, "eval_spearman_dot": 0.6016926673738828, "eval_spearman_euclidean": 0.7661681011355215, "eval_spearman_manhattan": 0.7658464354262474, "eval_steps_per_second": 37.364, "step": 8500 }, { "epoch": 3.987816307403936, "grad_norm": 2.6210579872131348, "learning_rate": 9.501522961574508e-06, "loss": 0.2097, "step": 8510 }, { "epoch": 3.992502343017807, "grad_norm": 2.6655242443084717, "learning_rate": 9.500937207122775e-06, "loss": 0.2149, "step": 8520 }, { "epoch": 3.9971883786316775, "grad_norm": 2.6789777278900146, "learning_rate": 9.50035145267104e-06, "loss": 0.2045, "step": 8530 }, { "epoch": 4.001874414245548, "grad_norm": 2.4452877044677734, "learning_rate": 9.499765698219307e-06, "loss": 0.1679, "step": 8540 }, { "epoch": 4.0065604498594185, "grad_norm": 2.454794406890869, "learning_rate": 9.499179943767574e-06, "loss": 0.1334, "step": 8550 }, { "epoch": 4.01124648547329, "grad_norm": 2.799318790435791, "learning_rate": 9.498594189315839e-06, "loss": 0.1677, "step": 8560 }, { "epoch": 4.01593252108716, "grad_norm": 2.66549015045166, "learning_rate": 9.498008434864106e-06, "loss": 0.1337, "step": 8570 }, { "epoch": 4.020618556701031, "grad_norm": 2.5295207500457764, "learning_rate": 9.497422680412371e-06, "loss": 0.1396, "step": 8580 }, { "epoch": 4.025304592314901, "grad_norm": 1.6832513809204102, "learning_rate": 9.496836925960638e-06, "loss": 0.1628, "step": 8590 }, { "epoch": 4.029990627928772, "grad_norm": 2.8461272716522217, "learning_rate": 9.496251171508905e-06, "loss": 0.1558, "step": 8600 }, { "epoch": 4.034676663542643, "grad_norm": 1.970751404762268, "learning_rate": 9.49566541705717e-06, "loss": 0.1166, "step": 8610 }, { "epoch": 4.039362699156514, "grad_norm": 2.8301377296447754, "learning_rate": 9.495079662605437e-06, "loss": 0.1616, "step": 8620 }, { "epoch": 4.044048734770384, "grad_norm": 2.252061367034912, "learning_rate": 9.494493908153702e-06, "loss": 0.1331, "step": 8630 }, { "epoch": 4.048734770384255, "grad_norm": 2.675797700881958, "learning_rate": 9.493908153701968e-06, "loss": 0.1256, "step": 8640 }, { "epoch": 4.053420805998125, "grad_norm": 2.7920572757720947, "learning_rate": 9.493322399250236e-06, "loss": 0.1327, "step": 8650 }, { "epoch": 4.0581068416119965, "grad_norm": 2.8503355979919434, "learning_rate": 9.492736644798501e-06, "loss": 0.141, "step": 8660 }, { "epoch": 4.062792877225867, "grad_norm": 3.100353956222534, "learning_rate": 9.492150890346767e-06, "loss": 0.1501, "step": 8670 }, { "epoch": 4.0674789128397375, "grad_norm": 2.2261886596679688, "learning_rate": 9.491565135895034e-06, "loss": 0.1352, "step": 8680 }, { "epoch": 4.072164948453608, "grad_norm": 2.125128984451294, "learning_rate": 9.490979381443299e-06, "loss": 0.1541, "step": 8690 }, { "epoch": 4.0768509840674785, "grad_norm": 2.3368821144104004, "learning_rate": 9.490393626991566e-06, "loss": 0.144, "step": 8700 }, { "epoch": 4.08153701968135, "grad_norm": 2.3971216678619385, "learning_rate": 9.489807872539833e-06, "loss": 0.1461, "step": 8710 }, { "epoch": 4.08622305529522, "grad_norm": 2.2589287757873535, "learning_rate": 9.489222118088098e-06, "loss": 0.1758, "step": 8720 }, { "epoch": 4.090909090909091, "grad_norm": 2.8072731494903564, "learning_rate": 9.488636363636365e-06, "loss": 0.1454, "step": 8730 }, { "epoch": 4.095595126522961, "grad_norm": 1.921200156211853, "learning_rate": 9.48805060918463e-06, "loss": 0.1404, "step": 8740 }, { "epoch": 4.100281162136832, "grad_norm": 2.808347225189209, "learning_rate": 9.487464854732897e-06, "loss": 0.1345, "step": 8750 }, { "epoch": 4.100281162136832, "eval_loss": 0.0466812327504158, "eval_pearson_cosine": 0.7762485382429531, "eval_pearson_dot": 0.6205844136544023, "eval_pearson_euclidean": 0.7434752403993485, "eval_pearson_manhattan": 0.7429057889387991, "eval_runtime": 39.0625, "eval_samples_per_second": 38.4, "eval_spearman_cosine": 0.7802221022373591, "eval_spearman_dot": 0.6303325253186769, "eval_spearman_euclidean": 0.7611130904217269, "eval_spearman_manhattan": 0.7606144833054299, "eval_steps_per_second": 38.4, "step": 8750 }, { "epoch": 4.104967197750703, "grad_norm": 1.7242465019226074, "learning_rate": 9.486879100281164e-06, "loss": 0.1288, "step": 8760 }, { "epoch": 4.109653233364574, "grad_norm": 2.2536211013793945, "learning_rate": 9.486293345829429e-06, "loss": 0.1313, "step": 8770 }, { "epoch": 4.114339268978444, "grad_norm": 2.289796829223633, "learning_rate": 9.485707591377696e-06, "loss": 0.1689, "step": 8780 }, { "epoch": 4.119025304592315, "grad_norm": 1.961803913116455, "learning_rate": 9.485121836925961e-06, "loss": 0.127, "step": 8790 }, { "epoch": 4.123711340206185, "grad_norm": 1.667372465133667, "learning_rate": 9.484536082474226e-06, "loss": 0.1267, "step": 8800 }, { "epoch": 4.1283973758200565, "grad_norm": 1.8963170051574707, "learning_rate": 9.483950328022495e-06, "loss": 0.1556, "step": 8810 }, { "epoch": 4.133083411433927, "grad_norm": 2.9656922817230225, "learning_rate": 9.48336457357076e-06, "loss": 0.1486, "step": 8820 }, { "epoch": 4.1377694470477975, "grad_norm": 1.9788974523544312, "learning_rate": 9.482778819119025e-06, "loss": 0.1542, "step": 8830 }, { "epoch": 4.142455482661668, "grad_norm": 2.4442574977874756, "learning_rate": 9.482193064667292e-06, "loss": 0.1318, "step": 8840 }, { "epoch": 4.147141518275539, "grad_norm": 2.5937938690185547, "learning_rate": 9.481607310215558e-06, "loss": 0.1451, "step": 8850 }, { "epoch": 4.15182755388941, "grad_norm": 2.443493366241455, "learning_rate": 9.481021555763825e-06, "loss": 0.1569, "step": 8860 }, { "epoch": 4.15651358950328, "grad_norm": 2.4432411193847656, "learning_rate": 9.480435801312091e-06, "loss": 0.1371, "step": 8870 }, { "epoch": 4.161199625117151, "grad_norm": 2.259110689163208, "learning_rate": 9.479850046860357e-06, "loss": 0.1492, "step": 8880 }, { "epoch": 4.165885660731021, "grad_norm": 2.7767279148101807, "learning_rate": 9.479264292408624e-06, "loss": 0.1248, "step": 8890 }, { "epoch": 4.170571696344892, "grad_norm": 2.309516191482544, "learning_rate": 9.478678537956889e-06, "loss": 0.14, "step": 8900 }, { "epoch": 4.175257731958763, "grad_norm": 1.9368172883987427, "learning_rate": 9.478092783505156e-06, "loss": 0.1343, "step": 8910 }, { "epoch": 4.179943767572634, "grad_norm": 3.124490976333618, "learning_rate": 9.477507029053423e-06, "loss": 0.1358, "step": 8920 }, { "epoch": 4.184629803186504, "grad_norm": 2.3121657371520996, "learning_rate": 9.476921274601688e-06, "loss": 0.1516, "step": 8930 }, { "epoch": 4.189315838800375, "grad_norm": 2.1902709007263184, "learning_rate": 9.476335520149955e-06, "loss": 0.1253, "step": 8940 }, { "epoch": 4.194001874414246, "grad_norm": 3.0760653018951416, "learning_rate": 9.47574976569822e-06, "loss": 0.1678, "step": 8950 }, { "epoch": 4.1986879100281165, "grad_norm": 2.8899242877960205, "learning_rate": 9.475164011246485e-06, "loss": 0.137, "step": 8960 }, { "epoch": 4.203373945641987, "grad_norm": 2.4512765407562256, "learning_rate": 9.474578256794752e-06, "loss": 0.1598, "step": 8970 }, { "epoch": 4.2080599812558575, "grad_norm": 2.9443392753601074, "learning_rate": 9.473992502343019e-06, "loss": 0.143, "step": 8980 }, { "epoch": 4.212746016869728, "grad_norm": 3.5128183364868164, "learning_rate": 9.473406747891284e-06, "loss": 0.1556, "step": 8990 }, { "epoch": 4.217432052483598, "grad_norm": 2.6914291381835938, "learning_rate": 9.472820993439551e-06, "loss": 0.1506, "step": 9000 }, { "epoch": 4.217432052483598, "eval_loss": 0.04765741899609566, "eval_pearson_cosine": 0.771092218670848, "eval_pearson_dot": 0.5978116743652393, "eval_pearson_euclidean": 0.7473151449481621, "eval_pearson_manhattan": 0.7466335380915066, "eval_runtime": 39.8559, "eval_samples_per_second": 37.636, "eval_spearman_cosine": 0.7759072486337746, "eval_spearman_dot": 0.6025471021302765, "eval_spearman_euclidean": 0.7630945437887154, "eval_spearman_manhattan": 0.7624729408213863, "eval_steps_per_second": 37.636, "step": 9000 }, { "epoch": 4.22211808809747, "grad_norm": 1.666392207145691, "learning_rate": 9.472235238987816e-06, "loss": 0.1418, "step": 9010 }, { "epoch": 4.22680412371134, "grad_norm": 2.7508490085601807, "learning_rate": 9.471649484536083e-06, "loss": 0.1448, "step": 9020 }, { "epoch": 4.231490159325211, "grad_norm": 2.883143663406372, "learning_rate": 9.471063730084349e-06, "loss": 0.166, "step": 9030 }, { "epoch": 4.236176194939081, "grad_norm": 2.5505452156066895, "learning_rate": 9.470477975632615e-06, "loss": 0.1417, "step": 9040 }, { "epoch": 4.240862230552953, "grad_norm": 2.3343942165374756, "learning_rate": 9.469892221180882e-06, "loss": 0.1569, "step": 9050 }, { "epoch": 4.245548266166823, "grad_norm": 2.2727818489074707, "learning_rate": 9.469306466729148e-06, "loss": 0.1346, "step": 9060 }, { "epoch": 4.250234301780694, "grad_norm": 2.8398542404174805, "learning_rate": 9.468720712277415e-06, "loss": 0.151, "step": 9070 }, { "epoch": 4.254920337394564, "grad_norm": 2.0750465393066406, "learning_rate": 9.46813495782568e-06, "loss": 0.1555, "step": 9080 }, { "epoch": 4.259606373008435, "grad_norm": 2.1490373611450195, "learning_rate": 9.467549203373947e-06, "loss": 0.1719, "step": 9090 }, { "epoch": 4.264292408622305, "grad_norm": 1.9958566427230835, "learning_rate": 9.466963448922214e-06, "loss": 0.1332, "step": 9100 }, { "epoch": 4.2689784442361765, "grad_norm": 2.586869478225708, "learning_rate": 9.466377694470479e-06, "loss": 0.1545, "step": 9110 }, { "epoch": 4.273664479850047, "grad_norm": 1.7282586097717285, "learning_rate": 9.465791940018744e-06, "loss": 0.1608, "step": 9120 }, { "epoch": 4.278350515463917, "grad_norm": 2.7652742862701416, "learning_rate": 9.465206185567011e-06, "loss": 0.1319, "step": 9130 }, { "epoch": 4.283036551077788, "grad_norm": 2.3922176361083984, "learning_rate": 9.464620431115276e-06, "loss": 0.1569, "step": 9140 }, { "epoch": 4.287722586691659, "grad_norm": 2.223822832107544, "learning_rate": 9.464034676663543e-06, "loss": 0.1494, "step": 9150 }, { "epoch": 4.29240862230553, "grad_norm": 2.0714290142059326, "learning_rate": 9.46344892221181e-06, "loss": 0.1506, "step": 9160 }, { "epoch": 4.2970946579194, "grad_norm": 2.4368040561676025, "learning_rate": 9.462863167760075e-06, "loss": 0.1413, "step": 9170 }, { "epoch": 4.301780693533271, "grad_norm": 2.760032892227173, "learning_rate": 9.462277413308342e-06, "loss": 0.1599, "step": 9180 }, { "epoch": 4.306466729147141, "grad_norm": 2.8202686309814453, "learning_rate": 9.461691658856607e-06, "loss": 0.1444, "step": 9190 }, { "epoch": 4.311152764761012, "grad_norm": 2.5001349449157715, "learning_rate": 9.461105904404874e-06, "loss": 0.1322, "step": 9200 }, { "epoch": 4.315838800374883, "grad_norm": 2.5955960750579834, "learning_rate": 9.460520149953141e-06, "loss": 0.1522, "step": 9210 }, { "epoch": 4.320524835988754, "grad_norm": 1.966848611831665, "learning_rate": 9.459934395501406e-06, "loss": 0.133, "step": 9220 }, { "epoch": 4.325210871602624, "grad_norm": 2.4600915908813477, "learning_rate": 9.459348641049673e-06, "loss": 0.1381, "step": 9230 }, { "epoch": 4.329896907216495, "grad_norm": 2.6355576515197754, "learning_rate": 9.458762886597939e-06, "loss": 0.1671, "step": 9240 }, { "epoch": 4.334582942830366, "grad_norm": 1.9884010553359985, "learning_rate": 9.458177132146204e-06, "loss": 0.1565, "step": 9250 }, { "epoch": 4.334582942830366, "eval_loss": 0.04769788682460785, "eval_pearson_cosine": 0.7716762907463419, "eval_pearson_dot": 0.6025868220654615, "eval_pearson_euclidean": 0.7486154713041202, "eval_pearson_manhattan": 0.7480947777024909, "eval_runtime": 39.7245, "eval_samples_per_second": 37.76, "eval_spearman_cosine": 0.7767828328865971, "eval_spearman_dot": 0.6102486537359278, "eval_spearman_euclidean": 0.764536418480009, "eval_spearman_manhattan": 0.7641318665907693, "eval_steps_per_second": 37.76, "step": 9250 }, { "epoch": 4.339268978444236, "grad_norm": 2.462095260620117, "learning_rate": 9.457591377694472e-06, "loss": 0.1574, "step": 9260 }, { "epoch": 4.343955014058107, "grad_norm": 2.8646531105041504, "learning_rate": 9.457005623242738e-06, "loss": 0.1447, "step": 9270 }, { "epoch": 4.348641049671977, "grad_norm": 2.456422805786133, "learning_rate": 9.456419868791003e-06, "loss": 0.1481, "step": 9280 }, { "epoch": 4.353327085285848, "grad_norm": 3.1123046875, "learning_rate": 9.45583411433927e-06, "loss": 0.1465, "step": 9290 }, { "epoch": 4.358013120899718, "grad_norm": 3.3074183464050293, "learning_rate": 9.455248359887535e-06, "loss": 0.1642, "step": 9300 }, { "epoch": 4.36269915651359, "grad_norm": 1.6989983320236206, "learning_rate": 9.454662605435802e-06, "loss": 0.1282, "step": 9310 }, { "epoch": 4.36738519212746, "grad_norm": 1.7987381219863892, "learning_rate": 9.454076850984069e-06, "loss": 0.1456, "step": 9320 }, { "epoch": 4.372071227741331, "grad_norm": 1.6666337251663208, "learning_rate": 9.453491096532334e-06, "loss": 0.1618, "step": 9330 }, { "epoch": 4.376757263355201, "grad_norm": 2.1631813049316406, "learning_rate": 9.452905342080601e-06, "loss": 0.142, "step": 9340 }, { "epoch": 4.381443298969073, "grad_norm": 2.5329623222351074, "learning_rate": 9.452319587628866e-06, "loss": 0.1436, "step": 9350 }, { "epoch": 4.386129334582943, "grad_norm": 1.9239972829818726, "learning_rate": 9.451733833177133e-06, "loss": 0.1361, "step": 9360 }, { "epoch": 4.390815370196814, "grad_norm": 2.091343641281128, "learning_rate": 9.4511480787254e-06, "loss": 0.1525, "step": 9370 }, { "epoch": 4.395501405810684, "grad_norm": 2.544008255004883, "learning_rate": 9.450562324273665e-06, "loss": 0.1454, "step": 9380 }, { "epoch": 4.4001874414245545, "grad_norm": 2.5722694396972656, "learning_rate": 9.449976569821932e-06, "loss": 0.1619, "step": 9390 }, { "epoch": 4.404873477038425, "grad_norm": 2.6824848651885986, "learning_rate": 9.449390815370197e-06, "loss": 0.1529, "step": 9400 }, { "epoch": 4.409559512652296, "grad_norm": 2.406972885131836, "learning_rate": 9.448805060918463e-06, "loss": 0.153, "step": 9410 }, { "epoch": 4.414245548266167, "grad_norm": 2.7198727130889893, "learning_rate": 9.448219306466731e-06, "loss": 0.1657, "step": 9420 }, { "epoch": 4.418931583880037, "grad_norm": 2.511366844177246, "learning_rate": 9.447633552014996e-06, "loss": 0.1409, "step": 9430 }, { "epoch": 4.423617619493908, "grad_norm": 2.4278454780578613, "learning_rate": 9.447047797563262e-06, "loss": 0.1392, "step": 9440 }, { "epoch": 4.428303655107779, "grad_norm": 3.2954795360565186, "learning_rate": 9.446462043111529e-06, "loss": 0.142, "step": 9450 }, { "epoch": 4.43298969072165, "grad_norm": 2.079050302505493, "learning_rate": 9.445876288659794e-06, "loss": 0.1444, "step": 9460 }, { "epoch": 4.43767572633552, "grad_norm": 2.284108877182007, "learning_rate": 9.44529053420806e-06, "loss": 0.1549, "step": 9470 }, { "epoch": 4.442361761949391, "grad_norm": 1.7953790426254272, "learning_rate": 9.444704779756328e-06, "loss": 0.1632, "step": 9480 }, { "epoch": 4.447047797563261, "grad_norm": 2.6434693336486816, "learning_rate": 9.444119025304593e-06, "loss": 0.1453, "step": 9490 }, { "epoch": 4.451733833177133, "grad_norm": 2.6489696502685547, "learning_rate": 9.44353327085286e-06, "loss": 0.1577, "step": 9500 }, { "epoch": 4.451733833177133, "eval_loss": 0.0442158505320549, "eval_pearson_cosine": 0.7793668842918748, "eval_pearson_dot": 0.6181762937224704, "eval_pearson_euclidean": 0.7443899044656206, "eval_pearson_manhattan": 0.7439473885249299, "eval_runtime": 39.5492, "eval_samples_per_second": 37.927, "eval_spearman_cosine": 0.7823666516115477, "eval_spearman_dot": 0.6290855072192552, "eval_spearman_euclidean": 0.7629748815703942, "eval_spearman_manhattan": 0.7626889580909112, "eval_steps_per_second": 37.927, "step": 9500 }, { "epoch": 4.456419868791003, "grad_norm": 2.769876003265381, "learning_rate": 9.442947516401125e-06, "loss": 0.1474, "step": 9510 }, { "epoch": 4.4611059044048735, "grad_norm": 2.8958747386932373, "learning_rate": 9.442361761949392e-06, "loss": 0.1479, "step": 9520 }, { "epoch": 4.465791940018744, "grad_norm": 3.219896078109741, "learning_rate": 9.441776007497657e-06, "loss": 0.1621, "step": 9530 }, { "epoch": 4.4704779756326145, "grad_norm": 2.0421993732452393, "learning_rate": 9.441190253045924e-06, "loss": 0.1473, "step": 9540 }, { "epoch": 4.475164011246486, "grad_norm": 2.189121961593628, "learning_rate": 9.440604498594191e-06, "loss": 0.1751, "step": 9550 }, { "epoch": 4.479850046860356, "grad_norm": 2.7012038230895996, "learning_rate": 9.440018744142456e-06, "loss": 0.1598, "step": 9560 }, { "epoch": 4.484536082474227, "grad_norm": 2.8359875679016113, "learning_rate": 9.439432989690721e-06, "loss": 0.1217, "step": 9570 }, { "epoch": 4.489222118088097, "grad_norm": 2.1257011890411377, "learning_rate": 9.438847235238988e-06, "loss": 0.1395, "step": 9580 }, { "epoch": 4.493908153701968, "grad_norm": 2.8070883750915527, "learning_rate": 9.438261480787255e-06, "loss": 0.1654, "step": 9590 }, { "epoch": 4.498594189315839, "grad_norm": 2.672773838043213, "learning_rate": 9.43767572633552e-06, "loss": 0.133, "step": 9600 }, { "epoch": 4.50328022492971, "grad_norm": 2.5079360008239746, "learning_rate": 9.437089971883787e-06, "loss": 0.131, "step": 9610 }, { "epoch": 4.50796626054358, "grad_norm": 3.078028678894043, "learning_rate": 9.436504217432053e-06, "loss": 0.1634, "step": 9620 }, { "epoch": 4.512652296157451, "grad_norm": 2.3852665424346924, "learning_rate": 9.43591846298032e-06, "loss": 0.1382, "step": 9630 }, { "epoch": 4.517338331771321, "grad_norm": 2.917783260345459, "learning_rate": 9.435332708528585e-06, "loss": 0.1573, "step": 9640 }, { "epoch": 4.5220243673851925, "grad_norm": 2.0304133892059326, "learning_rate": 9.434746954076852e-06, "loss": 0.1261, "step": 9650 }, { "epoch": 4.526710402999063, "grad_norm": 2.4420013427734375, "learning_rate": 9.434161199625119e-06, "loss": 0.1529, "step": 9660 }, { "epoch": 4.5313964386129335, "grad_norm": 3.5100250244140625, "learning_rate": 9.433575445173384e-06, "loss": 0.1415, "step": 9670 }, { "epoch": 4.536082474226804, "grad_norm": 2.250225782394409, "learning_rate": 9.43298969072165e-06, "loss": 0.1389, "step": 9680 }, { "epoch": 4.5407685098406745, "grad_norm": 2.866528272628784, "learning_rate": 9.432403936269916e-06, "loss": 0.1366, "step": 9690 }, { "epoch": 4.545454545454545, "grad_norm": 2.196103096008301, "learning_rate": 9.431818181818183e-06, "loss": 0.139, "step": 9700 }, { "epoch": 4.550140581068416, "grad_norm": 2.383646249771118, "learning_rate": 9.43123242736645e-06, "loss": 0.1503, "step": 9710 }, { "epoch": 4.554826616682287, "grad_norm": 2.5517141819000244, "learning_rate": 9.430646672914715e-06, "loss": 0.1588, "step": 9720 }, { "epoch": 4.559512652296157, "grad_norm": 2.985891103744507, "learning_rate": 9.43006091846298e-06, "loss": 0.1476, "step": 9730 }, { "epoch": 4.564198687910028, "grad_norm": 2.9456253051757812, "learning_rate": 9.429475164011247e-06, "loss": 0.1698, "step": 9740 }, { "epoch": 4.568884723523899, "grad_norm": 2.052727699279785, "learning_rate": 9.428889409559512e-06, "loss": 0.1463, "step": 9750 }, { "epoch": 4.568884723523899, "eval_loss": 0.0455799400806427, "eval_pearson_cosine": 0.7764385842406938, "eval_pearson_dot": 0.5941173098885884, "eval_pearson_euclidean": 0.7405048981360327, "eval_pearson_manhattan": 0.740129834669768, "eval_runtime": 40.4889, "eval_samples_per_second": 37.047, "eval_spearman_cosine": 0.782117792198136, "eval_spearman_dot": 0.599132023248896, "eval_spearman_euclidean": 0.7603847963092804, "eval_spearman_manhattan": 0.7601623940158222, "eval_steps_per_second": 37.047, "step": 9750 }, { "epoch": 4.57357075913777, "grad_norm": 2.1978442668914795, "learning_rate": 9.42830365510778e-06, "loss": 0.1537, "step": 9760 }, { "epoch": 4.57825679475164, "grad_norm": 2.976311206817627, "learning_rate": 9.427717900656046e-06, "loss": 0.1506, "step": 9770 }, { "epoch": 4.582942830365511, "grad_norm": 1.1756877899169922, "learning_rate": 9.427132146204311e-06, "loss": 0.127, "step": 9780 }, { "epoch": 4.587628865979381, "grad_norm": 2.0622363090515137, "learning_rate": 9.426546391752578e-06, "loss": 0.1508, "step": 9790 }, { "epoch": 4.592314901593252, "grad_norm": 2.6205925941467285, "learning_rate": 9.425960637300844e-06, "loss": 0.142, "step": 9800 }, { "epoch": 4.597000937207123, "grad_norm": 1.8698289394378662, "learning_rate": 9.42537488284911e-06, "loss": 0.1375, "step": 9810 }, { "epoch": 4.6016869728209935, "grad_norm": 1.9030426740646362, "learning_rate": 9.424789128397377e-06, "loss": 0.1626, "step": 9820 }, { "epoch": 4.606373008434864, "grad_norm": 2.2842605113983154, "learning_rate": 9.424203373945643e-06, "loss": 0.1518, "step": 9830 }, { "epoch": 4.6110590440487345, "grad_norm": 1.5332591533660889, "learning_rate": 9.42361761949391e-06, "loss": 0.1612, "step": 9840 }, { "epoch": 4.615745079662606, "grad_norm": 2.012329339981079, "learning_rate": 9.423031865042175e-06, "loss": 0.159, "step": 9850 }, { "epoch": 4.620431115276476, "grad_norm": 1.955610752105713, "learning_rate": 9.42244611059044e-06, "loss": 0.1672, "step": 9860 }, { "epoch": 4.625117150890347, "grad_norm": 2.124481439590454, "learning_rate": 9.421860356138709e-06, "loss": 0.1325, "step": 9870 }, { "epoch": 4.629803186504217, "grad_norm": 2.279585599899292, "learning_rate": 9.421274601686974e-06, "loss": 0.1611, "step": 9880 }, { "epoch": 4.634489222118088, "grad_norm": 1.9664572477340698, "learning_rate": 9.420688847235239e-06, "loss": 0.1299, "step": 9890 }, { "epoch": 4.639175257731958, "grad_norm": 2.385551691055298, "learning_rate": 9.420103092783506e-06, "loss": 0.1631, "step": 9900 }, { "epoch": 4.64386129334583, "grad_norm": 1.469383955001831, "learning_rate": 9.419517338331771e-06, "loss": 0.1205, "step": 9910 }, { "epoch": 4.6485473289597, "grad_norm": 2.6736183166503906, "learning_rate": 9.418931583880038e-06, "loss": 0.1675, "step": 9920 }, { "epoch": 4.653233364573571, "grad_norm": 3.122366189956665, "learning_rate": 9.418345829428305e-06, "loss": 0.135, "step": 9930 }, { "epoch": 4.657919400187441, "grad_norm": 2.8057267665863037, "learning_rate": 9.41776007497657e-06, "loss": 0.1318, "step": 9940 }, { "epoch": 4.6626054358013125, "grad_norm": 3.2085843086242676, "learning_rate": 9.417174320524837e-06, "loss": 0.163, "step": 9950 }, { "epoch": 4.667291471415183, "grad_norm": 3.1496477127075195, "learning_rate": 9.416588566073102e-06, "loss": 0.1655, "step": 9960 }, { "epoch": 4.6719775070290535, "grad_norm": 2.3873279094696045, "learning_rate": 9.41600281162137e-06, "loss": 0.1381, "step": 9970 }, { "epoch": 4.676663542642924, "grad_norm": 2.0492842197418213, "learning_rate": 9.415417057169636e-06, "loss": 0.1462, "step": 9980 }, { "epoch": 4.681349578256794, "grad_norm": 3.0173768997192383, "learning_rate": 9.414831302717901e-06, "loss": 0.1711, "step": 9990 }, { "epoch": 4.686035613870665, "grad_norm": 2.1712629795074463, "learning_rate": 9.414245548266168e-06, "loss": 0.16, "step": 10000 }, { "epoch": 4.686035613870665, "eval_loss": 0.0459674596786499, "eval_pearson_cosine": 0.7749308103807095, "eval_pearson_dot": 0.6140435552912393, "eval_pearson_euclidean": 0.7497870505171651, "eval_pearson_manhattan": 0.7494772460672863, "eval_runtime": 40.3087, "eval_samples_per_second": 37.213, "eval_spearman_cosine": 0.7793432902242333, "eval_spearman_dot": 0.6191753058355182, "eval_spearman_euclidean": 0.7659596686028919, "eval_spearman_manhattan": 0.7658321542772971, "eval_steps_per_second": 37.213, "step": 10000 }, { "epoch": 4.690721649484536, "grad_norm": 2.401972532272339, "learning_rate": 9.413659793814434e-06, "loss": 0.1564, "step": 10010 }, { "epoch": 4.695407685098407, "grad_norm": 1.4866012334823608, "learning_rate": 9.413074039362699e-06, "loss": 0.1573, "step": 10020 }, { "epoch": 4.700093720712277, "grad_norm": 2.4179933071136475, "learning_rate": 9.412488284910966e-06, "loss": 0.1613, "step": 10030 }, { "epoch": 4.704779756326148, "grad_norm": 2.4758639335632324, "learning_rate": 9.411902530459233e-06, "loss": 0.1621, "step": 10040 }, { "epoch": 4.709465791940019, "grad_norm": 2.450495481491089, "learning_rate": 9.411316776007498e-06, "loss": 0.1355, "step": 10050 }, { "epoch": 4.71415182755389, "grad_norm": 2.5910768508911133, "learning_rate": 9.410731021555765e-06, "loss": 0.1382, "step": 10060 }, { "epoch": 4.71883786316776, "grad_norm": 3.6344025135040283, "learning_rate": 9.41014526710403e-06, "loss": 0.1609, "step": 10070 }, { "epoch": 4.723523898781631, "grad_norm": 2.099355697631836, "learning_rate": 9.409559512652297e-06, "loss": 0.1476, "step": 10080 }, { "epoch": 4.728209934395501, "grad_norm": 2.334122896194458, "learning_rate": 9.408973758200564e-06, "loss": 0.1444, "step": 10090 }, { "epoch": 4.7328959700093725, "grad_norm": 2.061877965927124, "learning_rate": 9.408388003748829e-06, "loss": 0.1507, "step": 10100 }, { "epoch": 4.737582005623243, "grad_norm": 1.8726963996887207, "learning_rate": 9.407802249297096e-06, "loss": 0.1546, "step": 10110 }, { "epoch": 4.742268041237113, "grad_norm": 2.147475004196167, "learning_rate": 9.407216494845361e-06, "loss": 0.1378, "step": 10120 }, { "epoch": 4.746954076850984, "grad_norm": 2.2211480140686035, "learning_rate": 9.406630740393628e-06, "loss": 0.1636, "step": 10130 }, { "epoch": 4.751640112464854, "grad_norm": 2.4492108821868896, "learning_rate": 9.406044985941893e-06, "loss": 0.1485, "step": 10140 }, { "epoch": 4.756326148078726, "grad_norm": 2.647960662841797, "learning_rate": 9.40545923149016e-06, "loss": 0.1518, "step": 10150 }, { "epoch": 4.761012183692596, "grad_norm": 2.5408220291137695, "learning_rate": 9.404873477038427e-06, "loss": 0.1088, "step": 10160 }, { "epoch": 4.765698219306467, "grad_norm": 2.3026010990142822, "learning_rate": 9.404287722586692e-06, "loss": 0.1414, "step": 10170 }, { "epoch": 4.770384254920337, "grad_norm": 2.1535961627960205, "learning_rate": 9.403701968134958e-06, "loss": 0.1438, "step": 10180 }, { "epoch": 4.775070290534208, "grad_norm": 2.3178975582122803, "learning_rate": 9.403116213683224e-06, "loss": 0.1579, "step": 10190 }, { "epoch": 4.779756326148079, "grad_norm": 1.7218929529190063, "learning_rate": 9.402530459231491e-06, "loss": 0.1565, "step": 10200 }, { "epoch": 4.78444236176195, "grad_norm": 1.7112232446670532, "learning_rate": 9.401944704779757e-06, "loss": 0.1576, "step": 10210 }, { "epoch": 4.78912839737582, "grad_norm": 2.9099996089935303, "learning_rate": 9.401358950328024e-06, "loss": 0.1404, "step": 10220 }, { "epoch": 4.793814432989691, "grad_norm": 2.4656243324279785, "learning_rate": 9.400773195876289e-06, "loss": 0.1537, "step": 10230 }, { "epoch": 4.798500468603561, "grad_norm": 2.836191415786743, "learning_rate": 9.400187441424556e-06, "loss": 0.1444, "step": 10240 }, { "epoch": 4.803186504217432, "grad_norm": 1.7375587224960327, "learning_rate": 9.399601686972821e-06, "loss": 0.148, "step": 10250 }, { "epoch": 4.803186504217432, "eval_loss": 0.04357453063130379, "eval_pearson_cosine": 0.7816648988028874, "eval_pearson_dot": 0.617140121100384, "eval_pearson_euclidean": 0.7424818050017805, "eval_pearson_manhattan": 0.74207686822308, "eval_runtime": 40.6143, "eval_samples_per_second": 36.933, "eval_spearman_cosine": 0.7855171487091146, "eval_spearman_dot": 0.623924985286794, "eval_spearman_euclidean": 0.7600678672581802, "eval_spearman_manhattan": 0.7596066338339531, "eval_steps_per_second": 36.933, "step": 10250 }, { "epoch": 4.807872539831303, "grad_norm": 2.1630876064300537, "learning_rate": 9.399015932521088e-06, "loss": 0.1382, "step": 10260 }, { "epoch": 4.812558575445173, "grad_norm": 2.035651445388794, "learning_rate": 9.398430178069355e-06, "loss": 0.1449, "step": 10270 }, { "epoch": 4.817244611059044, "grad_norm": 3.047879219055176, "learning_rate": 9.39784442361762e-06, "loss": 0.1438, "step": 10280 }, { "epoch": 4.821930646672914, "grad_norm": 3.29184889793396, "learning_rate": 9.397258669165887e-06, "loss": 0.1794, "step": 10290 }, { "epoch": 4.826616682286786, "grad_norm": 2.301718235015869, "learning_rate": 9.396672914714152e-06, "loss": 0.1511, "step": 10300 }, { "epoch": 4.831302717900656, "grad_norm": 3.2951526641845703, "learning_rate": 9.396087160262419e-06, "loss": 0.1624, "step": 10310 }, { "epoch": 4.835988753514527, "grad_norm": 2.597886800765991, "learning_rate": 9.395501405810686e-06, "loss": 0.1464, "step": 10320 }, { "epoch": 4.840674789128397, "grad_norm": 2.1214399337768555, "learning_rate": 9.394915651358951e-06, "loss": 0.1434, "step": 10330 }, { "epoch": 4.845360824742268, "grad_norm": 2.453712224960327, "learning_rate": 9.394329896907216e-06, "loss": 0.145, "step": 10340 }, { "epoch": 4.850046860356139, "grad_norm": 1.6721562147140503, "learning_rate": 9.393744142455483e-06, "loss": 0.1481, "step": 10350 }, { "epoch": 4.85473289597001, "grad_norm": 3.1657016277313232, "learning_rate": 9.393158388003748e-06, "loss": 0.1661, "step": 10360 }, { "epoch": 4.85941893158388, "grad_norm": 2.637096881866455, "learning_rate": 9.392572633552015e-06, "loss": 0.1445, "step": 10370 }, { "epoch": 4.8641049671977505, "grad_norm": 2.9260575771331787, "learning_rate": 9.391986879100282e-06, "loss": 0.1446, "step": 10380 }, { "epoch": 4.868791002811621, "grad_norm": 2.740488052368164, "learning_rate": 9.391401124648548e-06, "loss": 0.1233, "step": 10390 }, { "epoch": 4.873477038425492, "grad_norm": 3.028416156768799, "learning_rate": 9.390815370196814e-06, "loss": 0.1496, "step": 10400 }, { "epoch": 4.878163074039363, "grad_norm": 2.666135787963867, "learning_rate": 9.39022961574508e-06, "loss": 0.1509, "step": 10410 }, { "epoch": 4.882849109653233, "grad_norm": 1.6811872720718384, "learning_rate": 9.389643861293347e-06, "loss": 0.1288, "step": 10420 }, { "epoch": 4.887535145267104, "grad_norm": 3.225088357925415, "learning_rate": 9.389058106841614e-06, "loss": 0.1674, "step": 10430 }, { "epoch": 4.892221180880974, "grad_norm": 2.038783311843872, "learning_rate": 9.388472352389879e-06, "loss": 0.1628, "step": 10440 }, { "epoch": 4.896907216494846, "grad_norm": 2.5650861263275146, "learning_rate": 9.387886597938146e-06, "loss": 0.1423, "step": 10450 }, { "epoch": 4.901593252108716, "grad_norm": 2.9860751628875732, "learning_rate": 9.387300843486411e-06, "loss": 0.14, "step": 10460 }, { "epoch": 4.906279287722587, "grad_norm": 2.6722230911254883, "learning_rate": 9.386715089034678e-06, "loss": 0.1416, "step": 10470 }, { "epoch": 4.910965323336457, "grad_norm": 2.631300926208496, "learning_rate": 9.386129334582945e-06, "loss": 0.1482, "step": 10480 }, { "epoch": 4.915651358950328, "grad_norm": 2.792668104171753, "learning_rate": 9.38554358013121e-06, "loss": 0.1639, "step": 10490 }, { "epoch": 4.920337394564199, "grad_norm": 2.3472137451171875, "learning_rate": 9.384957825679475e-06, "loss": 0.1382, "step": 10500 }, { "epoch": 4.920337394564199, "eval_loss": 0.04462406784296036, "eval_pearson_cosine": 0.7824275239321139, "eval_pearson_dot": 0.6329557539240795, "eval_pearson_euclidean": 0.7443049370608668, "eval_pearson_manhattan": 0.7436570805807037, "eval_runtime": 39.5855, "eval_samples_per_second": 37.893, "eval_spearman_cosine": 0.7871549274543737, "eval_spearman_dot": 0.6424130372572389, "eval_spearman_euclidean": 0.7624881768092765, "eval_spearman_manhattan": 0.7619956515478402, "eval_steps_per_second": 37.893, "step": 10500 }, { "epoch": 4.9250234301780695, "grad_norm": 2.011869192123413, "learning_rate": 9.384372071227742e-06, "loss": 0.1682, "step": 10510 }, { "epoch": 4.92970946579194, "grad_norm": 2.2601213455200195, "learning_rate": 9.383786316776007e-06, "loss": 0.1386, "step": 10520 }, { "epoch": 4.9343955014058105, "grad_norm": 3.00738263130188, "learning_rate": 9.383200562324274e-06, "loss": 0.1712, "step": 10530 }, { "epoch": 4.939081537019681, "grad_norm": 2.7528035640716553, "learning_rate": 9.382614807872541e-06, "loss": 0.163, "step": 10540 }, { "epoch": 4.943767572633552, "grad_norm": 2.2604238986968994, "learning_rate": 9.382029053420806e-06, "loss": 0.1638, "step": 10550 }, { "epoch": 4.948453608247423, "grad_norm": 2.3817455768585205, "learning_rate": 9.381443298969073e-06, "loss": 0.1478, "step": 10560 }, { "epoch": 4.953139643861293, "grad_norm": 2.392261028289795, "learning_rate": 9.380857544517338e-06, "loss": 0.1521, "step": 10570 }, { "epoch": 4.957825679475164, "grad_norm": 2.8620009422302246, "learning_rate": 9.380271790065605e-06, "loss": 0.1477, "step": 10580 }, { "epoch": 4.962511715089034, "grad_norm": 2.658115863800049, "learning_rate": 9.379686035613872e-06, "loss": 0.1251, "step": 10590 }, { "epoch": 4.967197750702906, "grad_norm": 2.8949685096740723, "learning_rate": 9.379100281162138e-06, "loss": 0.1595, "step": 10600 }, { "epoch": 4.971883786316776, "grad_norm": 2.472933292388916, "learning_rate": 9.378514526710404e-06, "loss": 0.1611, "step": 10610 }, { "epoch": 4.976569821930647, "grad_norm": 2.650047540664673, "learning_rate": 9.37792877225867e-06, "loss": 0.1483, "step": 10620 }, { "epoch": 4.981255857544517, "grad_norm": 2.293938398361206, "learning_rate": 9.377343017806937e-06, "loss": 0.1535, "step": 10630 }, { "epoch": 4.985941893158388, "grad_norm": 2.1445188522338867, "learning_rate": 9.376757263355202e-06, "loss": 0.1562, "step": 10640 }, { "epoch": 4.990627928772259, "grad_norm": 1.898871660232544, "learning_rate": 9.376171508903469e-06, "loss": 0.16, "step": 10650 }, { "epoch": 4.9953139643861295, "grad_norm": 1.8987932205200195, "learning_rate": 9.375585754451734e-06, "loss": 0.1348, "step": 10660 }, { "epoch": 5.0, "grad_norm": 1.9247934818267822, "learning_rate": 9.375000000000001e-06, "loss": 0.1357, "step": 10670 }, { "epoch": 5.0046860356138705, "grad_norm": 2.254408836364746, "learning_rate": 9.374414245548266e-06, "loss": 0.0979, "step": 10680 }, { "epoch": 5.009372071227741, "grad_norm": 2.336634874343872, "learning_rate": 9.373828491096533e-06, "loss": 0.1008, "step": 10690 }, { "epoch": 5.014058106841612, "grad_norm": 2.0158193111419678, "learning_rate": 9.3732427366448e-06, "loss": 0.1206, "step": 10700 }, { "epoch": 5.018744142455483, "grad_norm": 2.12335205078125, "learning_rate": 9.372656982193065e-06, "loss": 0.1153, "step": 10710 }, { "epoch": 5.023430178069353, "grad_norm": 1.9908068180084229, "learning_rate": 9.372071227741332e-06, "loss": 0.096, "step": 10720 }, { "epoch": 5.028116213683224, "grad_norm": 2.713801145553589, "learning_rate": 9.371485473289597e-06, "loss": 0.1183, "step": 10730 }, { "epoch": 5.032802249297094, "grad_norm": 1.922958493232727, "learning_rate": 9.370899718837864e-06, "loss": 0.0975, "step": 10740 }, { "epoch": 5.037488284910966, "grad_norm": 1.787152647972107, "learning_rate": 9.37031396438613e-06, "loss": 0.1109, "step": 10750 }, { "epoch": 5.037488284910966, "eval_loss": 0.042590245604515076, "eval_pearson_cosine": 0.7796460620464813, "eval_pearson_dot": 0.6195124617279077, "eval_pearson_euclidean": 0.7433733944090761, "eval_pearson_manhattan": 0.7430945401829945, "eval_runtime": 39.7087, "eval_samples_per_second": 37.775, "eval_spearman_cosine": 0.7845566035417548, "eval_spearman_dot": 0.6248763613915714, "eval_spearman_euclidean": 0.7601630737316083, "eval_spearman_manhattan": 0.7599814027838542, "eval_steps_per_second": 37.775, "step": 10750 }, { "epoch": 5.042174320524836, "grad_norm": 2.3772122859954834, "learning_rate": 9.369728209934396e-06, "loss": 0.1063, "step": 10760 }, { "epoch": 5.046860356138707, "grad_norm": 2.4249303340911865, "learning_rate": 9.369142455482663e-06, "loss": 0.0909, "step": 10770 }, { "epoch": 5.051546391752577, "grad_norm": 2.354619264602661, "learning_rate": 9.368556701030928e-06, "loss": 0.1121, "step": 10780 }, { "epoch": 5.056232427366448, "grad_norm": 2.3761093616485596, "learning_rate": 9.367970946579195e-06, "loss": 0.0962, "step": 10790 }, { "epoch": 5.060918462980319, "grad_norm": 1.9781490564346313, "learning_rate": 9.36738519212746e-06, "loss": 0.1159, "step": 10800 }, { "epoch": 5.0656044985941895, "grad_norm": 2.9498939514160156, "learning_rate": 9.366799437675728e-06, "loss": 0.1046, "step": 10810 }, { "epoch": 5.07029053420806, "grad_norm": 1.618909478187561, "learning_rate": 9.366213683223993e-06, "loss": 0.1145, "step": 10820 }, { "epoch": 5.0749765698219305, "grad_norm": 1.5350381135940552, "learning_rate": 9.36562792877226e-06, "loss": 0.125, "step": 10830 }, { "epoch": 5.079662605435801, "grad_norm": 2.1376562118530273, "learning_rate": 9.365042174320525e-06, "loss": 0.11, "step": 10840 }, { "epoch": 5.084348641049672, "grad_norm": 2.638848066329956, "learning_rate": 9.364456419868792e-06, "loss": 0.1257, "step": 10850 }, { "epoch": 5.089034676663543, "grad_norm": 2.5730180740356445, "learning_rate": 9.363870665417057e-06, "loss": 0.1131, "step": 10860 }, { "epoch": 5.093720712277413, "grad_norm": 1.658488392829895, "learning_rate": 9.363284910965324e-06, "loss": 0.1095, "step": 10870 }, { "epoch": 5.098406747891284, "grad_norm": 2.5831501483917236, "learning_rate": 9.362699156513591e-06, "loss": 0.1159, "step": 10880 }, { "epoch": 5.103092783505154, "grad_norm": 1.3760308027267456, "learning_rate": 9.362113402061856e-06, "loss": 0.1052, "step": 10890 }, { "epoch": 5.107778819119026, "grad_norm": 1.529236078262329, "learning_rate": 9.361527647610123e-06, "loss": 0.1132, "step": 10900 }, { "epoch": 5.112464854732896, "grad_norm": 1.9166224002838135, "learning_rate": 9.360941893158388e-06, "loss": 0.1142, "step": 10910 }, { "epoch": 5.117150890346767, "grad_norm": 2.521615505218506, "learning_rate": 9.360356138706655e-06, "loss": 0.1222, "step": 10920 }, { "epoch": 5.121836925960637, "grad_norm": 2.1560580730438232, "learning_rate": 9.359770384254922e-06, "loss": 0.1081, "step": 10930 }, { "epoch": 5.126522961574508, "grad_norm": 1.3674089908599854, "learning_rate": 9.359184629803187e-06, "loss": 0.1198, "step": 10940 }, { "epoch": 5.131208997188379, "grad_norm": 2.396967649459839, "learning_rate": 9.358598875351454e-06, "loss": 0.1101, "step": 10950 }, { "epoch": 5.1358950328022495, "grad_norm": 1.6446019411087036, "learning_rate": 9.35801312089972e-06, "loss": 0.096, "step": 10960 }, { "epoch": 5.14058106841612, "grad_norm": 2.565040349960327, "learning_rate": 9.357427366447985e-06, "loss": 0.1047, "step": 10970 }, { "epoch": 5.14526710402999, "grad_norm": 1.8515427112579346, "learning_rate": 9.356841611996252e-06, "loss": 0.0955, "step": 10980 }, { "epoch": 5.149953139643861, "grad_norm": 2.294940233230591, "learning_rate": 9.356255857544518e-06, "loss": 0.1212, "step": 10990 }, { "epoch": 5.154639175257732, "grad_norm": 1.995937466621399, "learning_rate": 9.355670103092784e-06, "loss": 0.1009, "step": 11000 }, { "epoch": 5.154639175257732, "eval_loss": 0.04305338114500046, "eval_pearson_cosine": 0.780686355713101, "eval_pearson_dot": 0.6237317182336994, "eval_pearson_euclidean": 0.7427541562137776, "eval_pearson_manhattan": 0.7423336637021691, "eval_runtime": 40.3116, "eval_samples_per_second": 37.21, "eval_spearman_cosine": 0.7834710778584321, "eval_spearman_dot": 0.637744309819412, "eval_spearman_euclidean": 0.7591336014689473, "eval_spearman_manhattan": 0.7590724893258253, "eval_steps_per_second": 37.21, "step": 11000 }, { "epoch": 5.159325210871603, "grad_norm": 2.7338879108428955, "learning_rate": 9.35508434864105e-06, "loss": 0.1157, "step": 11010 }, { "epoch": 5.164011246485473, "grad_norm": 2.1284737586975098, "learning_rate": 9.354498594189316e-06, "loss": 0.1073, "step": 11020 }, { "epoch": 5.168697282099344, "grad_norm": 1.506453037261963, "learning_rate": 9.353912839737583e-06, "loss": 0.1119, "step": 11030 }, { "epoch": 5.173383317713214, "grad_norm": 2.2100448608398438, "learning_rate": 9.35332708528585e-06, "loss": 0.1091, "step": 11040 }, { "epoch": 5.178069353327086, "grad_norm": 2.5225071907043457, "learning_rate": 9.352741330834115e-06, "loss": 0.1089, "step": 11050 }, { "epoch": 5.182755388940956, "grad_norm": 2.282289981842041, "learning_rate": 9.352155576382382e-06, "loss": 0.1165, "step": 11060 }, { "epoch": 5.187441424554827, "grad_norm": 2.467607259750366, "learning_rate": 9.351569821930647e-06, "loss": 0.1104, "step": 11070 }, { "epoch": 5.192127460168697, "grad_norm": 2.524582624435425, "learning_rate": 9.350984067478914e-06, "loss": 0.1316, "step": 11080 }, { "epoch": 5.196813495782568, "grad_norm": 2.6294236183166504, "learning_rate": 9.350398313027181e-06, "loss": 0.1068, "step": 11090 }, { "epoch": 5.201499531396439, "grad_norm": 2.033003091812134, "learning_rate": 9.349812558575446e-06, "loss": 0.1025, "step": 11100 }, { "epoch": 5.206185567010309, "grad_norm": 1.909003734588623, "learning_rate": 9.349226804123713e-06, "loss": 0.0948, "step": 11110 }, { "epoch": 5.21087160262418, "grad_norm": 2.284494400024414, "learning_rate": 9.348641049671978e-06, "loss": 0.1456, "step": 11120 }, { "epoch": 5.21555763823805, "grad_norm": 2.152364492416382, "learning_rate": 9.348055295220243e-06, "loss": 0.1067, "step": 11130 }, { "epoch": 5.220243673851921, "grad_norm": 1.9327460527420044, "learning_rate": 9.34746954076851e-06, "loss": 0.1041, "step": 11140 }, { "epoch": 5.224929709465792, "grad_norm": 1.4854366779327393, "learning_rate": 9.346883786316777e-06, "loss": 0.1014, "step": 11150 }, { "epoch": 5.229615745079663, "grad_norm": 2.438124418258667, "learning_rate": 9.346298031865043e-06, "loss": 0.1009, "step": 11160 }, { "epoch": 5.234301780693533, "grad_norm": 2.2534475326538086, "learning_rate": 9.34571227741331e-06, "loss": 0.1204, "step": 11170 }, { "epoch": 5.238987816307404, "grad_norm": 1.7294119596481323, "learning_rate": 9.345126522961575e-06, "loss": 0.1156, "step": 11180 }, { "epoch": 5.243673851921274, "grad_norm": 2.046323299407959, "learning_rate": 9.344540768509842e-06, "loss": 0.1052, "step": 11190 }, { "epoch": 5.248359887535146, "grad_norm": 1.4913642406463623, "learning_rate": 9.343955014058108e-06, "loss": 0.1108, "step": 11200 }, { "epoch": 5.253045923149016, "grad_norm": 2.705307722091675, "learning_rate": 9.343369259606374e-06, "loss": 0.1196, "step": 11210 }, { "epoch": 5.257731958762887, "grad_norm": 2.288198709487915, "learning_rate": 9.34278350515464e-06, "loss": 0.1111, "step": 11220 }, { "epoch": 5.262417994376757, "grad_norm": 1.7562745809555054, "learning_rate": 9.342197750702906e-06, "loss": 0.1091, "step": 11230 }, { "epoch": 5.2671040299906275, "grad_norm": 1.7820425033569336, "learning_rate": 9.341611996251173e-06, "loss": 0.1274, "step": 11240 }, { "epoch": 5.271790065604499, "grad_norm": 1.3509507179260254, "learning_rate": 9.341026241799438e-06, "loss": 0.1082, "step": 11250 }, { "epoch": 5.271790065604499, "eval_loss": 0.04377983510494232, "eval_pearson_cosine": 0.777409933262625, "eval_pearson_dot": 0.6039188474436514, "eval_pearson_euclidean": 0.7432758495109439, "eval_pearson_manhattan": 0.742989414571646, "eval_runtime": 40.4868, "eval_samples_per_second": 37.049, "eval_spearman_cosine": 0.7818314076527526, "eval_spearman_dot": 0.6129138744459114, "eval_spearman_euclidean": 0.7593037528355521, "eval_spearman_manhattan": 0.7591440996970567, "eval_steps_per_second": 37.049, "step": 11250 }, { "epoch": 5.276476101218369, "grad_norm": 1.9400215148925781, "learning_rate": 9.340440487347705e-06, "loss": 0.1033, "step": 11260 }, { "epoch": 5.28116213683224, "grad_norm": 2.5327563285827637, "learning_rate": 9.339854732895972e-06, "loss": 0.1006, "step": 11270 }, { "epoch": 5.28584817244611, "grad_norm": 2.5133285522460938, "learning_rate": 9.339268978444237e-06, "loss": 0.1092, "step": 11280 }, { "epoch": 5.290534208059981, "grad_norm": 2.192230224609375, "learning_rate": 9.338683223992502e-06, "loss": 0.0995, "step": 11290 }, { "epoch": 5.295220243673852, "grad_norm": 2.7413835525512695, "learning_rate": 9.33809746954077e-06, "loss": 0.1253, "step": 11300 }, { "epoch": 5.299906279287723, "grad_norm": 2.4897637367248535, "learning_rate": 9.337511715089036e-06, "loss": 0.1231, "step": 11310 }, { "epoch": 5.304592314901593, "grad_norm": 2.837437629699707, "learning_rate": 9.336925960637301e-06, "loss": 0.1061, "step": 11320 }, { "epoch": 5.309278350515464, "grad_norm": 2.932845115661621, "learning_rate": 9.336340206185568e-06, "loss": 0.1128, "step": 11330 }, { "epoch": 5.313964386129334, "grad_norm": 2.7689220905303955, "learning_rate": 9.335754451733833e-06, "loss": 0.106, "step": 11340 }, { "epoch": 5.318650421743206, "grad_norm": 2.0174484252929688, "learning_rate": 9.3351686972821e-06, "loss": 0.1068, "step": 11350 }, { "epoch": 5.323336457357076, "grad_norm": 1.9986820220947266, "learning_rate": 9.334582942830366e-06, "loss": 0.1138, "step": 11360 }, { "epoch": 5.3280224929709465, "grad_norm": 2.263935089111328, "learning_rate": 9.333997188378632e-06, "loss": 0.1215, "step": 11370 }, { "epoch": 5.332708528584817, "grad_norm": 2.540949821472168, "learning_rate": 9.3334114339269e-06, "loss": 0.1139, "step": 11380 }, { "epoch": 5.3373945641986875, "grad_norm": 2.293947219848633, "learning_rate": 9.332825679475165e-06, "loss": 0.1113, "step": 11390 }, { "epoch": 5.342080599812559, "grad_norm": 1.2586168050765991, "learning_rate": 9.332239925023432e-06, "loss": 0.1072, "step": 11400 }, { "epoch": 5.346766635426429, "grad_norm": 2.605865955352783, "learning_rate": 9.331654170571697e-06, "loss": 0.1032, "step": 11410 }, { "epoch": 5.3514526710403, "grad_norm": 2.003467321395874, "learning_rate": 9.331068416119962e-06, "loss": 0.1107, "step": 11420 }, { "epoch": 5.35613870665417, "grad_norm": 2.439173460006714, "learning_rate": 9.33048266166823e-06, "loss": 0.1055, "step": 11430 }, { "epoch": 5.360824742268041, "grad_norm": 2.154588460922241, "learning_rate": 9.329896907216496e-06, "loss": 0.1008, "step": 11440 }, { "epoch": 5.365510777881912, "grad_norm": 2.530766725540161, "learning_rate": 9.329311152764761e-06, "loss": 0.1062, "step": 11450 }, { "epoch": 5.370196813495783, "grad_norm": 1.994178056716919, "learning_rate": 9.328725398313028e-06, "loss": 0.1472, "step": 11460 }, { "epoch": 5.374882849109653, "grad_norm": 2.8370258808135986, "learning_rate": 9.328139643861293e-06, "loss": 0.1098, "step": 11470 }, { "epoch": 5.379568884723524, "grad_norm": 1.487667441368103, "learning_rate": 9.32755388940956e-06, "loss": 0.1124, "step": 11480 }, { "epoch": 5.384254920337394, "grad_norm": 2.180283308029175, "learning_rate": 9.326968134957827e-06, "loss": 0.1181, "step": 11490 }, { "epoch": 5.3889409559512655, "grad_norm": 2.2690398693084717, "learning_rate": 9.326382380506092e-06, "loss": 0.1138, "step": 11500 }, { "epoch": 5.3889409559512655, "eval_loss": 0.04145639017224312, "eval_pearson_cosine": 0.782895609059949, "eval_pearson_dot": 0.63470693002958, "eval_pearson_euclidean": 0.740956945446257, "eval_pearson_manhattan": 0.740521959356709, "eval_runtime": 40.2743, "eval_samples_per_second": 37.245, "eval_spearman_cosine": 0.7869693899558259, "eval_spearman_dot": 0.6463692784357133, "eval_spearman_euclidean": 0.7560888074878002, "eval_spearman_manhattan": 0.7559675920990712, "eval_steps_per_second": 37.245, "step": 11500 }, { "epoch": 5.393626991565136, "grad_norm": 1.9818650484085083, "learning_rate": 9.32579662605436e-06, "loss": 0.1057, "step": 11510 }, { "epoch": 5.3983130271790065, "grad_norm": 2.286259651184082, "learning_rate": 9.325210871602624e-06, "loss": 0.1091, "step": 11520 }, { "epoch": 5.402999062792877, "grad_norm": 1.2215278148651123, "learning_rate": 9.324625117150891e-06, "loss": 0.0859, "step": 11530 }, { "epoch": 5.4076850984067475, "grad_norm": 2.2619011402130127, "learning_rate": 9.324039362699158e-06, "loss": 0.1281, "step": 11540 }, { "epoch": 5.412371134020619, "grad_norm": 1.313723087310791, "learning_rate": 9.323453608247423e-06, "loss": 0.0844, "step": 11550 }, { "epoch": 5.417057169634489, "grad_norm": 1.5897408723831177, "learning_rate": 9.32286785379569e-06, "loss": 0.1006, "step": 11560 }, { "epoch": 5.42174320524836, "grad_norm": 2.7910852432250977, "learning_rate": 9.322282099343956e-06, "loss": 0.1199, "step": 11570 }, { "epoch": 5.42642924086223, "grad_norm": 1.7807790040969849, "learning_rate": 9.32169634489222e-06, "loss": 0.1063, "step": 11580 }, { "epoch": 5.431115276476101, "grad_norm": 2.412628650665283, "learning_rate": 9.32111059044049e-06, "loss": 0.103, "step": 11590 }, { "epoch": 5.435801312089972, "grad_norm": 1.029317855834961, "learning_rate": 9.320524835988755e-06, "loss": 0.1025, "step": 11600 }, { "epoch": 5.440487347703843, "grad_norm": 2.206700563430786, "learning_rate": 9.31993908153702e-06, "loss": 0.114, "step": 11610 }, { "epoch": 5.445173383317713, "grad_norm": 1.643839716911316, "learning_rate": 9.319353327085287e-06, "loss": 0.1155, "step": 11620 }, { "epoch": 5.449859418931584, "grad_norm": 2.211933135986328, "learning_rate": 9.318767572633552e-06, "loss": 0.1096, "step": 11630 }, { "epoch": 5.454545454545454, "grad_norm": 2.2456345558166504, "learning_rate": 9.318181818181819e-06, "loss": 0.1153, "step": 11640 }, { "epoch": 5.4592314901593255, "grad_norm": 2.144434928894043, "learning_rate": 9.317596063730086e-06, "loss": 0.1245, "step": 11650 }, { "epoch": 5.463917525773196, "grad_norm": 1.8856313228607178, "learning_rate": 9.317010309278351e-06, "loss": 0.1139, "step": 11660 }, { "epoch": 5.4686035613870665, "grad_norm": 2.399268627166748, "learning_rate": 9.316424554826618e-06, "loss": 0.1189, "step": 11670 }, { "epoch": 5.473289597000937, "grad_norm": 2.7165584564208984, "learning_rate": 9.315838800374883e-06, "loss": 0.1016, "step": 11680 }, { "epoch": 5.4779756326148075, "grad_norm": 2.1864237785339355, "learning_rate": 9.31525304592315e-06, "loss": 0.1164, "step": 11690 }, { "epoch": 5.482661668228679, "grad_norm": 1.5643881559371948, "learning_rate": 9.314667291471417e-06, "loss": 0.1069, "step": 11700 }, { "epoch": 5.487347703842549, "grad_norm": 2.7151424884796143, "learning_rate": 9.314081537019682e-06, "loss": 0.1268, "step": 11710 }, { "epoch": 5.49203373945642, "grad_norm": 2.5677719116210938, "learning_rate": 9.31349578256795e-06, "loss": 0.0967, "step": 11720 }, { "epoch": 5.49671977507029, "grad_norm": 1.8421558141708374, "learning_rate": 9.312910028116214e-06, "loss": 0.111, "step": 11730 }, { "epoch": 5.501405810684162, "grad_norm": 2.0751326084136963, "learning_rate": 9.31232427366448e-06, "loss": 0.12, "step": 11740 }, { "epoch": 5.506091846298032, "grad_norm": 2.1153831481933594, "learning_rate": 9.311738519212747e-06, "loss": 0.1015, "step": 11750 }, { "epoch": 5.506091846298032, "eval_loss": 0.041955165565013885, "eval_pearson_cosine": 0.777849777417785, "eval_pearson_dot": 0.6249138636860323, "eval_pearson_euclidean": 0.7435429429906506, "eval_pearson_manhattan": 0.7436891262385359, "eval_runtime": 40.2707, "eval_samples_per_second": 37.248, "eval_spearman_cosine": 0.7810709480369951, "eval_spearman_dot": 0.6369677452237958, "eval_spearman_euclidean": 0.7589293041705764, "eval_spearman_manhattan": 0.7592155214560544, "eval_steps_per_second": 37.248, "step": 11750 }, { "epoch": 5.510777881911903, "grad_norm": 2.5980823040008545, "learning_rate": 9.311152764761013e-06, "loss": 0.097, "step": 11760 }, { "epoch": 5.515463917525773, "grad_norm": 2.678083658218384, "learning_rate": 9.310567010309279e-06, "loss": 0.1082, "step": 11770 }, { "epoch": 5.520149953139644, "grad_norm": 2.221808910369873, "learning_rate": 9.309981255857546e-06, "loss": 0.0951, "step": 11780 }, { "epoch": 5.524835988753514, "grad_norm": 3.2802789211273193, "learning_rate": 9.30939550140581e-06, "loss": 0.1153, "step": 11790 }, { "epoch": 5.5295220243673855, "grad_norm": 2.6341254711151123, "learning_rate": 9.308809746954078e-06, "loss": 0.1173, "step": 11800 }, { "epoch": 5.534208059981256, "grad_norm": 1.4996163845062256, "learning_rate": 9.308223992502345e-06, "loss": 0.1066, "step": 11810 }, { "epoch": 5.5388940955951265, "grad_norm": 1.6984524726867676, "learning_rate": 9.30763823805061e-06, "loss": 0.0985, "step": 11820 }, { "epoch": 5.543580131208997, "grad_norm": 2.20283842086792, "learning_rate": 9.307052483598877e-06, "loss": 0.1011, "step": 11830 }, { "epoch": 5.548266166822868, "grad_norm": 1.3354145288467407, "learning_rate": 9.306466729147142e-06, "loss": 0.1067, "step": 11840 }, { "epoch": 5.552952202436739, "grad_norm": 2.6207938194274902, "learning_rate": 9.305880974695409e-06, "loss": 0.1159, "step": 11850 }, { "epoch": 5.557638238050609, "grad_norm": 2.9331352710723877, "learning_rate": 9.305295220243674e-06, "loss": 0.1152, "step": 11860 }, { "epoch": 5.56232427366448, "grad_norm": 1.8482978343963623, "learning_rate": 9.304709465791941e-06, "loss": 0.1121, "step": 11870 }, { "epoch": 5.56701030927835, "grad_norm": 1.7065162658691406, "learning_rate": 9.304123711340208e-06, "loss": 0.1256, "step": 11880 }, { "epoch": 5.571696344892221, "grad_norm": 2.6578869819641113, "learning_rate": 9.303537956888473e-06, "loss": 0.1033, "step": 11890 }, { "epoch": 5.576382380506092, "grad_norm": 2.356271266937256, "learning_rate": 9.302952202436738e-06, "loss": 0.1106, "step": 11900 }, { "epoch": 5.581068416119963, "grad_norm": 2.4331321716308594, "learning_rate": 9.302366447985005e-06, "loss": 0.1342, "step": 11910 }, { "epoch": 5.585754451733833, "grad_norm": 2.5583012104034424, "learning_rate": 9.30178069353327e-06, "loss": 0.1232, "step": 11920 }, { "epoch": 5.590440487347704, "grad_norm": 1.2320704460144043, "learning_rate": 9.301194939081537e-06, "loss": 0.1235, "step": 11930 }, { "epoch": 5.595126522961575, "grad_norm": 2.749943256378174, "learning_rate": 9.300609184629804e-06, "loss": 0.1204, "step": 11940 }, { "epoch": 5.5998125585754455, "grad_norm": 1.935255527496338, "learning_rate": 9.30002343017807e-06, "loss": 0.1194, "step": 11950 }, { "epoch": 5.604498594189316, "grad_norm": 2.05389666557312, "learning_rate": 9.299437675726337e-06, "loss": 0.1185, "step": 11960 }, { "epoch": 5.609184629803186, "grad_norm": 2.0948450565338135, "learning_rate": 9.298851921274602e-06, "loss": 0.117, "step": 11970 }, { "epoch": 5.613870665417057, "grad_norm": 2.4468414783477783, "learning_rate": 9.298266166822869e-06, "loss": 0.1042, "step": 11980 }, { "epoch": 5.618556701030927, "grad_norm": 1.7686811685562134, "learning_rate": 9.297680412371136e-06, "loss": 0.1323, "step": 11990 }, { "epoch": 5.623242736644799, "grad_norm": 2.70004940032959, "learning_rate": 9.2970946579194e-06, "loss": 0.1153, "step": 12000 }, { "epoch": 5.623242736644799, "eval_loss": 0.044810693711042404, "eval_pearson_cosine": 0.7729592983209912, "eval_pearson_dot": 0.6141014999130547, "eval_pearson_euclidean": 0.7453445610743417, "eval_pearson_manhattan": 0.7450714346494465, "eval_runtime": 39.9226, "eval_samples_per_second": 37.573, "eval_spearman_cosine": 0.7783555866745624, "eval_spearman_dot": 0.6213713696364336, "eval_spearman_euclidean": 0.7596029469985899, "eval_spearman_manhattan": 0.7598359774134882, "eval_steps_per_second": 37.573, "step": 12000 }, { "epoch": 5.627928772258669, "grad_norm": 2.1159090995788574, "learning_rate": 9.296508903467668e-06, "loss": 0.1142, "step": 12010 }, { "epoch": 5.63261480787254, "grad_norm": 2.249617576599121, "learning_rate": 9.295923149015933e-06, "loss": 0.1091, "step": 12020 }, { "epoch": 5.63730084348641, "grad_norm": 2.0257644653320312, "learning_rate": 9.295337394564198e-06, "loss": 0.1094, "step": 12030 }, { "epoch": 5.641986879100282, "grad_norm": 3.4566030502319336, "learning_rate": 9.294751640112467e-06, "loss": 0.1203, "step": 12040 }, { "epoch": 5.646672914714152, "grad_norm": 3.4752063751220703, "learning_rate": 9.294165885660732e-06, "loss": 0.1359, "step": 12050 }, { "epoch": 5.651358950328023, "grad_norm": 2.0857534408569336, "learning_rate": 9.293580131208997e-06, "loss": 0.0959, "step": 12060 }, { "epoch": 5.656044985941893, "grad_norm": 1.1136995553970337, "learning_rate": 9.292994376757264e-06, "loss": 0.0922, "step": 12070 }, { "epoch": 5.660731021555764, "grad_norm": 1.7703429460525513, "learning_rate": 9.29240862230553e-06, "loss": 0.1314, "step": 12080 }, { "epoch": 5.665417057169634, "grad_norm": 2.678006172180176, "learning_rate": 9.291822867853796e-06, "loss": 0.1124, "step": 12090 }, { "epoch": 5.670103092783505, "grad_norm": 2.3180134296417236, "learning_rate": 9.291237113402063e-06, "loss": 0.0916, "step": 12100 }, { "epoch": 5.674789128397376, "grad_norm": 1.4481223821640015, "learning_rate": 9.290651358950328e-06, "loss": 0.0934, "step": 12110 }, { "epoch": 5.679475164011246, "grad_norm": 1.6825222969055176, "learning_rate": 9.290065604498595e-06, "loss": 0.112, "step": 12120 }, { "epoch": 5.684161199625117, "grad_norm": 1.8293483257293701, "learning_rate": 9.28947985004686e-06, "loss": 0.1006, "step": 12130 }, { "epoch": 5.688847235238988, "grad_norm": 1.5621511936187744, "learning_rate": 9.288894095595127e-06, "loss": 0.1069, "step": 12140 }, { "epoch": 5.693533270852859, "grad_norm": 1.9712047576904297, "learning_rate": 9.288308341143394e-06, "loss": 0.1121, "step": 12150 }, { "epoch": 5.698219306466729, "grad_norm": 1.5489860773086548, "learning_rate": 9.28772258669166e-06, "loss": 0.1152, "step": 12160 }, { "epoch": 5.7029053420806, "grad_norm": 1.5992718935012817, "learning_rate": 9.287136832239927e-06, "loss": 0.1081, "step": 12170 }, { "epoch": 5.70759137769447, "grad_norm": 2.584080219268799, "learning_rate": 9.286551077788192e-06, "loss": 0.115, "step": 12180 }, { "epoch": 5.712277413308341, "grad_norm": 1.9940451383590698, "learning_rate": 9.285965323336457e-06, "loss": 0.1334, "step": 12190 }, { "epoch": 5.716963448922212, "grad_norm": 2.244067668914795, "learning_rate": 9.285379568884726e-06, "loss": 0.1126, "step": 12200 }, { "epoch": 5.721649484536083, "grad_norm": 2.828308343887329, "learning_rate": 9.28479381443299e-06, "loss": 0.0978, "step": 12210 }, { "epoch": 5.726335520149953, "grad_norm": 2.3048787117004395, "learning_rate": 9.284208059981256e-06, "loss": 0.1285, "step": 12220 }, { "epoch": 5.7310215557638235, "grad_norm": 1.9416192770004272, "learning_rate": 9.283622305529523e-06, "loss": 0.114, "step": 12230 }, { "epoch": 5.735707591377695, "grad_norm": 2.0904664993286133, "learning_rate": 9.283036551077788e-06, "loss": 0.1135, "step": 12240 }, { "epoch": 5.740393626991565, "grad_norm": 2.0567378997802734, "learning_rate": 9.282450796626055e-06, "loss": 0.1269, "step": 12250 }, { "epoch": 5.740393626991565, "eval_loss": 0.042026255279779434, "eval_pearson_cosine": 0.7802074426247394, "eval_pearson_dot": 0.621680331450122, "eval_pearson_euclidean": 0.7417166161845756, "eval_pearson_manhattan": 0.7412630516460794, "eval_runtime": 40.3256, "eval_samples_per_second": 37.197, "eval_spearman_cosine": 0.7839546315832364, "eval_spearman_dot": 0.6311338337036988, "eval_spearman_euclidean": 0.7564314536390471, "eval_spearman_manhattan": 0.7562308413966785, "eval_steps_per_second": 37.197, "step": 12250 }, { "epoch": 5.745079662605436, "grad_norm": 1.8017923831939697, "learning_rate": 9.281865042174322e-06, "loss": 0.116, "step": 12260 }, { "epoch": 5.749765698219306, "grad_norm": 2.184885025024414, "learning_rate": 9.281279287722587e-06, "loss": 0.1141, "step": 12270 }, { "epoch": 5.754451733833177, "grad_norm": 2.258493423461914, "learning_rate": 9.280693533270854e-06, "loss": 0.1179, "step": 12280 }, { "epoch": 5.759137769447047, "grad_norm": 3.2758543491363525, "learning_rate": 9.28010777881912e-06, "loss": 0.1354, "step": 12290 }, { "epoch": 5.763823805060919, "grad_norm": 2.4894609451293945, "learning_rate": 9.279522024367386e-06, "loss": 0.1088, "step": 12300 }, { "epoch": 5.768509840674789, "grad_norm": 1.9505615234375, "learning_rate": 9.278936269915653e-06, "loss": 0.1104, "step": 12310 }, { "epoch": 5.77319587628866, "grad_norm": 2.9411964416503906, "learning_rate": 9.278350515463918e-06, "loss": 0.1333, "step": 12320 }, { "epoch": 5.77788191190253, "grad_norm": 2.877175807952881, "learning_rate": 9.277764761012185e-06, "loss": 0.1038, "step": 12330 }, { "epoch": 5.782567947516402, "grad_norm": 2.866086006164551, "learning_rate": 9.27717900656045e-06, "loss": 0.1119, "step": 12340 }, { "epoch": 5.787253983130272, "grad_norm": 2.0350656509399414, "learning_rate": 9.276593252108716e-06, "loss": 0.1218, "step": 12350 }, { "epoch": 5.7919400187441425, "grad_norm": 1.9179691076278687, "learning_rate": 9.276007497656983e-06, "loss": 0.117, "step": 12360 }, { "epoch": 5.796626054358013, "grad_norm": 1.894805669784546, "learning_rate": 9.27542174320525e-06, "loss": 0.1148, "step": 12370 }, { "epoch": 5.8013120899718835, "grad_norm": 1.7460695505142212, "learning_rate": 9.274835988753515e-06, "loss": 0.1347, "step": 12380 }, { "epoch": 5.805998125585754, "grad_norm": 2.7748680114746094, "learning_rate": 9.274250234301782e-06, "loss": 0.1077, "step": 12390 }, { "epoch": 5.810684161199625, "grad_norm": 2.6616406440734863, "learning_rate": 9.273664479850047e-06, "loss": 0.111, "step": 12400 }, { "epoch": 5.815370196813496, "grad_norm": 2.389298439025879, "learning_rate": 9.273078725398314e-06, "loss": 0.1061, "step": 12410 }, { "epoch": 5.820056232427366, "grad_norm": 1.6245344877243042, "learning_rate": 9.272492970946579e-06, "loss": 0.1196, "step": 12420 }, { "epoch": 5.824742268041237, "grad_norm": 2.8195879459381104, "learning_rate": 9.271907216494846e-06, "loss": 0.1265, "step": 12430 }, { "epoch": 5.829428303655108, "grad_norm": 2.538292169570923, "learning_rate": 9.271321462043113e-06, "loss": 0.1038, "step": 12440 }, { "epoch": 5.834114339268979, "grad_norm": 1.4378900527954102, "learning_rate": 9.270735707591378e-06, "loss": 0.1097, "step": 12450 }, { "epoch": 5.838800374882849, "grad_norm": 2.120596170425415, "learning_rate": 9.270149953139645e-06, "loss": 0.1054, "step": 12460 }, { "epoch": 5.84348641049672, "grad_norm": 1.7521088123321533, "learning_rate": 9.26956419868791e-06, "loss": 0.0985, "step": 12470 }, { "epoch": 5.84817244611059, "grad_norm": 2.082510471343994, "learning_rate": 9.268978444236177e-06, "loss": 0.1142, "step": 12480 }, { "epoch": 5.852858481724461, "grad_norm": 2.3451695442199707, "learning_rate": 9.268392689784444e-06, "loss": 0.135, "step": 12490 }, { "epoch": 5.857544517338332, "grad_norm": 1.9797242879867554, "learning_rate": 9.26780693533271e-06, "loss": 0.0888, "step": 12500 }, { "epoch": 5.857544517338332, "eval_loss": 0.04142308607697487, "eval_pearson_cosine": 0.7805016780478695, "eval_pearson_dot": 0.6245128907955291, "eval_pearson_euclidean": 0.7411648320805888, "eval_pearson_manhattan": 0.7407809523735267, "eval_runtime": 39.8943, "eval_samples_per_second": 37.599, "eval_spearman_cosine": 0.7841450480888137, "eval_spearman_dot": 0.636499292941551, "eval_spearman_euclidean": 0.7567573577855005, "eval_spearman_manhattan": 0.7567068203829979, "eval_steps_per_second": 37.599, "step": 12500 }, { "epoch": 5.8622305529522025, "grad_norm": 2.519564628601074, "learning_rate": 9.267221180880975e-06, "loss": 0.1118, "step": 12510 }, { "epoch": 5.866916588566073, "grad_norm": 2.348604679107666, "learning_rate": 9.266635426429241e-06, "loss": 0.1165, "step": 12520 }, { "epoch": 5.8716026241799435, "grad_norm": 1.9285309314727783, "learning_rate": 9.266049671977507e-06, "loss": 0.1168, "step": 12530 }, { "epoch": 5.876288659793815, "grad_norm": 2.3968348503112793, "learning_rate": 9.265463917525774e-06, "loss": 0.1226, "step": 12540 }, { "epoch": 5.880974695407685, "grad_norm": 1.3296688795089722, "learning_rate": 9.26487816307404e-06, "loss": 0.0979, "step": 12550 }, { "epoch": 5.885660731021556, "grad_norm": 2.3655405044555664, "learning_rate": 9.264292408622306e-06, "loss": 0.1163, "step": 12560 }, { "epoch": 5.890346766635426, "grad_norm": 1.9741175174713135, "learning_rate": 9.263706654170573e-06, "loss": 0.1193, "step": 12570 }, { "epoch": 5.895032802249297, "grad_norm": 2.2787790298461914, "learning_rate": 9.263120899718838e-06, "loss": 0.1053, "step": 12580 }, { "epoch": 5.899718837863167, "grad_norm": 2.3028697967529297, "learning_rate": 9.262535145267105e-06, "loss": 0.105, "step": 12590 }, { "epoch": 5.904404873477039, "grad_norm": 2.420567274093628, "learning_rate": 9.261949390815372e-06, "loss": 0.1153, "step": 12600 }, { "epoch": 5.909090909090909, "grad_norm": 1.8667070865631104, "learning_rate": 9.261363636363637e-06, "loss": 0.1206, "step": 12610 }, { "epoch": 5.91377694470478, "grad_norm": 2.433323621749878, "learning_rate": 9.260777881911904e-06, "loss": 0.1107, "step": 12620 }, { "epoch": 5.91846298031865, "grad_norm": 1.6899259090423584, "learning_rate": 9.260192127460169e-06, "loss": 0.1006, "step": 12630 }, { "epoch": 5.9231490159325215, "grad_norm": 3.0744214057922363, "learning_rate": 9.259606373008434e-06, "loss": 0.1165, "step": 12640 }, { "epoch": 5.927835051546392, "grad_norm": 1.6527074575424194, "learning_rate": 9.259020618556703e-06, "loss": 0.1134, "step": 12650 }, { "epoch": 5.9325210871602625, "grad_norm": 2.3836679458618164, "learning_rate": 9.258434864104968e-06, "loss": 0.1195, "step": 12660 }, { "epoch": 5.937207122774133, "grad_norm": 1.6903315782546997, "learning_rate": 9.257849109653233e-06, "loss": 0.125, "step": 12670 }, { "epoch": 5.9418931583880035, "grad_norm": 2.0928590297698975, "learning_rate": 9.2572633552015e-06, "loss": 0.114, "step": 12680 }, { "epoch": 5.946579194001874, "grad_norm": 1.6326929330825806, "learning_rate": 9.256677600749765e-06, "loss": 0.1056, "step": 12690 }, { "epoch": 5.951265229615745, "grad_norm": 2.0911965370178223, "learning_rate": 9.256091846298032e-06, "loss": 0.128, "step": 12700 }, { "epoch": 5.955951265229616, "grad_norm": 1.6815580129623413, "learning_rate": 9.2555060918463e-06, "loss": 0.1211, "step": 12710 }, { "epoch": 5.960637300843486, "grad_norm": 2.4735517501831055, "learning_rate": 9.254920337394565e-06, "loss": 0.1246, "step": 12720 }, { "epoch": 5.965323336457357, "grad_norm": 1.822643756866455, "learning_rate": 9.254334582942831e-06, "loss": 0.1119, "step": 12730 }, { "epoch": 5.970009372071228, "grad_norm": 2.694791793823242, "learning_rate": 9.253748828491097e-06, "loss": 0.1186, "step": 12740 }, { "epoch": 5.974695407685099, "grad_norm": 1.8677020072937012, "learning_rate": 9.253163074039364e-06, "loss": 0.1202, "step": 12750 }, { "epoch": 5.974695407685099, "eval_loss": 0.04308323189616203, "eval_pearson_cosine": 0.7792983938024989, "eval_pearson_dot": 0.6261386080869897, "eval_pearson_euclidean": 0.7413977396293134, "eval_pearson_manhattan": 0.7411537960595762, "eval_runtime": 41.5128, "eval_samples_per_second": 36.133, "eval_spearman_cosine": 0.7834591025676726, "eval_spearman_dot": 0.6404906337885011, "eval_spearman_euclidean": 0.7574889490533175, "eval_spearman_manhattan": 0.7571743616408941, "eval_steps_per_second": 36.133, "step": 12750 }, { "epoch": 5.979381443298969, "grad_norm": 2.534433126449585, "learning_rate": 9.25257731958763e-06, "loss": 0.1275, "step": 12760 }, { "epoch": 5.98406747891284, "grad_norm": 1.7585105895996094, "learning_rate": 9.251991565135896e-06, "loss": 0.1129, "step": 12770 }, { "epoch": 5.98875351452671, "grad_norm": 2.6499111652374268, "learning_rate": 9.251405810684163e-06, "loss": 0.1117, "step": 12780 }, { "epoch": 5.993439550140581, "grad_norm": 2.0610055923461914, "learning_rate": 9.250820056232428e-06, "loss": 0.1137, "step": 12790 }, { "epoch": 5.998125585754452, "grad_norm": 2.293468952178955, "learning_rate": 9.250234301780693e-06, "loss": 0.1178, "step": 12800 }, { "epoch": 6.0028116213683225, "grad_norm": 1.97608482837677, "learning_rate": 9.249648547328962e-06, "loss": 0.1105, "step": 12810 }, { "epoch": 6.007497656982193, "grad_norm": 1.9157034158706665, "learning_rate": 9.249062792877227e-06, "loss": 0.0914, "step": 12820 }, { "epoch": 6.0121836925960634, "grad_norm": 1.4950352907180786, "learning_rate": 9.248477038425492e-06, "loss": 0.0983, "step": 12830 }, { "epoch": 6.016869728209935, "grad_norm": 1.4796631336212158, "learning_rate": 9.247891283973759e-06, "loss": 0.0799, "step": 12840 }, { "epoch": 6.021555763823805, "grad_norm": 1.68351149559021, "learning_rate": 9.247305529522024e-06, "loss": 0.079, "step": 12850 }, { "epoch": 6.026241799437676, "grad_norm": 2.24094295501709, "learning_rate": 9.246719775070291e-06, "loss": 0.0908, "step": 12860 }, { "epoch": 6.030927835051546, "grad_norm": 2.414583683013916, "learning_rate": 9.246134020618558e-06, "loss": 0.0908, "step": 12870 }, { "epoch": 6.035613870665417, "grad_norm": 2.87400221824646, "learning_rate": 9.245548266166823e-06, "loss": 0.085, "step": 12880 }, { "epoch": 6.040299906279288, "grad_norm": 1.8591458797454834, "learning_rate": 9.24496251171509e-06, "loss": 0.0825, "step": 12890 }, { "epoch": 6.044985941893159, "grad_norm": 2.2384636402130127, "learning_rate": 9.244376757263355e-06, "loss": 0.0826, "step": 12900 }, { "epoch": 6.049671977507029, "grad_norm": 1.670571208000183, "learning_rate": 9.243791002811622e-06, "loss": 0.0746, "step": 12910 }, { "epoch": 6.0543580131209, "grad_norm": 1.607620358467102, "learning_rate": 9.243205248359888e-06, "loss": 0.106, "step": 12920 }, { "epoch": 6.05904404873477, "grad_norm": 1.543734073638916, "learning_rate": 9.242619493908155e-06, "loss": 0.0788, "step": 12930 }, { "epoch": 6.0637300843486415, "grad_norm": 2.0840065479278564, "learning_rate": 9.242033739456421e-06, "loss": 0.1013, "step": 12940 }, { "epoch": 6.068416119962512, "grad_norm": 1.8061577081680298, "learning_rate": 9.241447985004687e-06, "loss": 0.0895, "step": 12950 }, { "epoch": 6.073102155576382, "grad_norm": 1.341036081314087, "learning_rate": 9.240862230552952e-06, "loss": 0.0714, "step": 12960 }, { "epoch": 6.077788191190253, "grad_norm": 2.1150712966918945, "learning_rate": 9.240276476101219e-06, "loss": 0.0899, "step": 12970 }, { "epoch": 6.082474226804123, "grad_norm": 2.214730739593506, "learning_rate": 9.239690721649486e-06, "loss": 0.0758, "step": 12980 }, { "epoch": 6.087160262417995, "grad_norm": 1.489686369895935, "learning_rate": 9.239104967197751e-06, "loss": 0.0784, "step": 12990 }, { "epoch": 6.091846298031865, "grad_norm": 1.2778211832046509, "learning_rate": 9.238519212746018e-06, "loss": 0.0941, "step": 13000 }, { "epoch": 6.091846298031865, "eval_loss": 0.0399174839258194, "eval_pearson_cosine": 0.7838266464106027, "eval_pearson_dot": 0.6493223534201924, "eval_pearson_euclidean": 0.739064666910151, "eval_pearson_manhattan": 0.7387769365054666, "eval_runtime": 40.0598, "eval_samples_per_second": 37.444, "eval_spearman_cosine": 0.7872885894711749, "eval_spearman_dot": 0.6641643317048077, "eval_spearman_euclidean": 0.7529671041992676, "eval_spearman_manhattan": 0.752705655614685, "eval_steps_per_second": 37.444, "step": 13000 }, { "epoch": 6.096532333645736, "grad_norm": 2.591386556625366, "learning_rate": 9.237933458294283e-06, "loss": 0.074, "step": 13010 }, { "epoch": 6.101218369259606, "grad_norm": 3.0938313007354736, "learning_rate": 9.23734770384255e-06, "loss": 0.0889, "step": 13020 }, { "epoch": 6.105904404873477, "grad_norm": 1.073983907699585, "learning_rate": 9.236761949390815e-06, "loss": 0.0702, "step": 13030 }, { "epoch": 6.110590440487348, "grad_norm": 1.7386090755462646, "learning_rate": 9.236176194939082e-06, "loss": 0.0965, "step": 13040 }, { "epoch": 6.115276476101219, "grad_norm": 2.1852550506591797, "learning_rate": 9.235590440487349e-06, "loss": 0.0818, "step": 13050 }, { "epoch": 6.119962511715089, "grad_norm": 2.9571776390075684, "learning_rate": 9.235004686035614e-06, "loss": 0.0909, "step": 13060 }, { "epoch": 6.12464854732896, "grad_norm": 1.568499207496643, "learning_rate": 9.234418931583881e-06, "loss": 0.0935, "step": 13070 }, { "epoch": 6.12933458294283, "grad_norm": 2.0286030769348145, "learning_rate": 9.233833177132146e-06, "loss": 0.0923, "step": 13080 }, { "epoch": 6.134020618556701, "grad_norm": 1.3567001819610596, "learning_rate": 9.233247422680413e-06, "loss": 0.0874, "step": 13090 }, { "epoch": 6.138706654170572, "grad_norm": 3.1520678997039795, "learning_rate": 9.23266166822868e-06, "loss": 0.0907, "step": 13100 }, { "epoch": 6.143392689784442, "grad_norm": 1.935091495513916, "learning_rate": 9.232075913776945e-06, "loss": 0.0927, "step": 13110 }, { "epoch": 6.148078725398313, "grad_norm": 2.9123198986053467, "learning_rate": 9.23149015932521e-06, "loss": 0.0986, "step": 13120 }, { "epoch": 6.152764761012183, "grad_norm": 2.592006206512451, "learning_rate": 9.230904404873478e-06, "loss": 0.0864, "step": 13130 }, { "epoch": 6.157450796626055, "grad_norm": 2.0214340686798096, "learning_rate": 9.230318650421743e-06, "loss": 0.1066, "step": 13140 }, { "epoch": 6.162136832239925, "grad_norm": 1.743891716003418, "learning_rate": 9.22973289597001e-06, "loss": 0.0871, "step": 13150 }, { "epoch": 6.166822867853796, "grad_norm": 2.368562698364258, "learning_rate": 9.229147141518277e-06, "loss": 0.0754, "step": 13160 }, { "epoch": 6.171508903467666, "grad_norm": 2.369436264038086, "learning_rate": 9.228561387066542e-06, "loss": 0.0788, "step": 13170 }, { "epoch": 6.176194939081537, "grad_norm": 1.3312957286834717, "learning_rate": 9.227975632614809e-06, "loss": 0.0816, "step": 13180 }, { "epoch": 6.180880974695408, "grad_norm": 1.15755295753479, "learning_rate": 9.227389878163074e-06, "loss": 0.0867, "step": 13190 }, { "epoch": 6.185567010309279, "grad_norm": 1.4255220890045166, "learning_rate": 9.226804123711341e-06, "loss": 0.085, "step": 13200 }, { "epoch": 6.190253045923149, "grad_norm": 2.264315605163574, "learning_rate": 9.226218369259608e-06, "loss": 0.0939, "step": 13210 }, { "epoch": 6.1949390815370196, "grad_norm": 1.314502239227295, "learning_rate": 9.225632614807873e-06, "loss": 0.0847, "step": 13220 }, { "epoch": 6.19962511715089, "grad_norm": 1.463553786277771, "learning_rate": 9.22504686035614e-06, "loss": 0.0733, "step": 13230 }, { "epoch": 6.204311152764761, "grad_norm": 2.397528648376465, "learning_rate": 9.224461105904405e-06, "loss": 0.1004, "step": 13240 }, { "epoch": 6.208997188378632, "grad_norm": 1.8068519830703735, "learning_rate": 9.223875351452672e-06, "loss": 0.081, "step": 13250 }, { "epoch": 6.208997188378632, "eval_loss": 0.04053681343793869, "eval_pearson_cosine": 0.7813660928551514, "eval_pearson_dot": 0.6355779688477607, "eval_pearson_euclidean": 0.7355234843709724, "eval_pearson_manhattan": 0.7352503996205417, "eval_runtime": 39.4829, "eval_samples_per_second": 37.991, "eval_spearman_cosine": 0.7853597304476542, "eval_spearman_dot": 0.6478331015151749, "eval_spearman_euclidean": 0.7514050247961205, "eval_spearman_manhattan": 0.751296576320389, "eval_steps_per_second": 37.991, "step": 13250 }, { "epoch": 6.213683223992502, "grad_norm": 2.0780019760131836, "learning_rate": 9.223289597000939e-06, "loss": 0.1108, "step": 13260 }, { "epoch": 6.218369259606373, "grad_norm": 1.3198286294937134, "learning_rate": 9.222703842549204e-06, "loss": 0.0809, "step": 13270 }, { "epoch": 6.223055295220243, "grad_norm": 1.4620928764343262, "learning_rate": 9.22211808809747e-06, "loss": 0.0803, "step": 13280 }, { "epoch": 6.227741330834115, "grad_norm": 2.1493215560913086, "learning_rate": 9.221532333645736e-06, "loss": 0.0925, "step": 13290 }, { "epoch": 6.232427366447985, "grad_norm": 2.0909435749053955, "learning_rate": 9.220946579194002e-06, "loss": 0.1007, "step": 13300 }, { "epoch": 6.237113402061856, "grad_norm": 1.8717360496520996, "learning_rate": 9.220360824742269e-06, "loss": 0.0907, "step": 13310 }, { "epoch": 6.241799437675726, "grad_norm": 2.817506790161133, "learning_rate": 9.219775070290535e-06, "loss": 0.1027, "step": 13320 }, { "epoch": 6.246485473289597, "grad_norm": 2.344900369644165, "learning_rate": 9.2191893158388e-06, "loss": 0.0993, "step": 13330 }, { "epoch": 6.251171508903468, "grad_norm": 1.5306053161621094, "learning_rate": 9.218603561387068e-06, "loss": 0.0717, "step": 13340 }, { "epoch": 6.2558575445173386, "grad_norm": 1.2907652854919434, "learning_rate": 9.218017806935333e-06, "loss": 0.0813, "step": 13350 }, { "epoch": 6.260543580131209, "grad_norm": 2.5026895999908447, "learning_rate": 9.2174320524836e-06, "loss": 0.107, "step": 13360 }, { "epoch": 6.2652296157450795, "grad_norm": 2.076305389404297, "learning_rate": 9.216846298031867e-06, "loss": 0.0725, "step": 13370 }, { "epoch": 6.26991565135895, "grad_norm": 2.985992908477783, "learning_rate": 9.216260543580132e-06, "loss": 0.1041, "step": 13380 }, { "epoch": 6.274601686972821, "grad_norm": 1.430763602256775, "learning_rate": 9.215674789128399e-06, "loss": 0.0941, "step": 13390 }, { "epoch": 6.279287722586692, "grad_norm": 2.2910046577453613, "learning_rate": 9.215089034676664e-06, "loss": 0.07, "step": 13400 }, { "epoch": 6.283973758200562, "grad_norm": 2.093707799911499, "learning_rate": 9.214503280224931e-06, "loss": 0.085, "step": 13410 }, { "epoch": 6.288659793814433, "grad_norm": 1.2367407083511353, "learning_rate": 9.213917525773196e-06, "loss": 0.0895, "step": 13420 }, { "epoch": 6.293345829428303, "grad_norm": 1.8991947174072266, "learning_rate": 9.213331771321463e-06, "loss": 0.0992, "step": 13430 }, { "epoch": 6.298031865042175, "grad_norm": 2.4072980880737305, "learning_rate": 9.212746016869728e-06, "loss": 0.1033, "step": 13440 }, { "epoch": 6.302717900656045, "grad_norm": 1.8667056560516357, "learning_rate": 9.212160262417995e-06, "loss": 0.0825, "step": 13450 }, { "epoch": 6.307403936269916, "grad_norm": 2.0823960304260254, "learning_rate": 9.21157450796626e-06, "loss": 0.095, "step": 13460 }, { "epoch": 6.312089971883786, "grad_norm": 1.7069936990737915, "learning_rate": 9.210988753514527e-06, "loss": 0.088, "step": 13470 }, { "epoch": 6.316776007497657, "grad_norm": 2.103296995162964, "learning_rate": 9.210402999062794e-06, "loss": 0.0867, "step": 13480 }, { "epoch": 6.321462043111528, "grad_norm": 1.0462993383407593, "learning_rate": 9.20981724461106e-06, "loss": 0.0813, "step": 13490 }, { "epoch": 6.3261480787253985, "grad_norm": 1.5300264358520508, "learning_rate": 9.209231490159326e-06, "loss": 0.0807, "step": 13500 }, { "epoch": 6.3261480787253985, "eval_loss": 0.040135517716407776, "eval_pearson_cosine": 0.7837547061964756, "eval_pearson_dot": 0.6449662712980881, "eval_pearson_euclidean": 0.7344489051322731, "eval_pearson_manhattan": 0.733928021220585, "eval_runtime": 40.9066, "eval_samples_per_second": 36.669, "eval_spearman_cosine": 0.787895557874529, "eval_spearman_dot": 0.6615144668773622, "eval_spearman_euclidean": 0.7513080603143243, "eval_spearman_manhattan": 0.7509655476151297, "eval_steps_per_second": 36.669, "step": 13500 }, { "epoch": 6.330834114339269, "grad_norm": 2.420490264892578, "learning_rate": 9.208645735707592e-06, "loss": 0.086, "step": 13510 }, { "epoch": 6.3355201499531395, "grad_norm": 2.568124771118164, "learning_rate": 9.208059981255859e-06, "loss": 0.0972, "step": 13520 }, { "epoch": 6.34020618556701, "grad_norm": 2.0971317291259766, "learning_rate": 9.207474226804124e-06, "loss": 0.0851, "step": 13530 }, { "epoch": 6.344892221180881, "grad_norm": 2.3092575073242188, "learning_rate": 9.20688847235239e-06, "loss": 0.1103, "step": 13540 }, { "epoch": 6.349578256794752, "grad_norm": 1.0552003383636475, "learning_rate": 9.206302717900658e-06, "loss": 0.0927, "step": 13550 }, { "epoch": 6.354264292408622, "grad_norm": 2.5293657779693604, "learning_rate": 9.205716963448923e-06, "loss": 0.0948, "step": 13560 }, { "epoch": 6.358950328022493, "grad_norm": 2.3516149520874023, "learning_rate": 9.20513120899719e-06, "loss": 0.0997, "step": 13570 }, { "epoch": 6.363636363636363, "grad_norm": 2.1005942821502686, "learning_rate": 9.204545454545455e-06, "loss": 0.0839, "step": 13580 }, { "epoch": 6.368322399250235, "grad_norm": 1.8200384378433228, "learning_rate": 9.203959700093722e-06, "loss": 0.0876, "step": 13590 }, { "epoch": 6.373008434864105, "grad_norm": 2.4939422607421875, "learning_rate": 9.203373945641987e-06, "loss": 0.0912, "step": 13600 }, { "epoch": 6.377694470477976, "grad_norm": 2.695819616317749, "learning_rate": 9.202788191190254e-06, "loss": 0.0797, "step": 13610 }, { "epoch": 6.382380506091846, "grad_norm": 2.2140467166900635, "learning_rate": 9.20220243673852e-06, "loss": 0.0908, "step": 13620 }, { "epoch": 6.387066541705717, "grad_norm": 1.4225996732711792, "learning_rate": 9.201616682286786e-06, "loss": 0.0888, "step": 13630 }, { "epoch": 6.391752577319588, "grad_norm": 1.8974356651306152, "learning_rate": 9.201030927835051e-06, "loss": 0.0721, "step": 13640 }, { "epoch": 6.3964386129334585, "grad_norm": 2.853360176086426, "learning_rate": 9.200445173383318e-06, "loss": 0.0998, "step": 13650 }, { "epoch": 6.401124648547329, "grad_norm": 1.7699189186096191, "learning_rate": 9.199859418931585e-06, "loss": 0.075, "step": 13660 }, { "epoch": 6.4058106841611995, "grad_norm": 1.9779934883117676, "learning_rate": 9.19927366447985e-06, "loss": 0.0854, "step": 13670 }, { "epoch": 6.41049671977507, "grad_norm": 1.0165759325027466, "learning_rate": 9.198687910028117e-06, "loss": 0.0694, "step": 13680 }, { "epoch": 6.415182755388941, "grad_norm": 2.2649970054626465, "learning_rate": 9.198102155576383e-06, "loss": 0.1138, "step": 13690 }, { "epoch": 6.419868791002812, "grad_norm": 1.683463454246521, "learning_rate": 9.19751640112465e-06, "loss": 0.0838, "step": 13700 }, { "epoch": 6.424554826616682, "grad_norm": 2.579838275909424, "learning_rate": 9.196930646672916e-06, "loss": 0.1073, "step": 13710 }, { "epoch": 6.429240862230553, "grad_norm": 2.041942834854126, "learning_rate": 9.196344892221182e-06, "loss": 0.0804, "step": 13720 }, { "epoch": 6.433926897844423, "grad_norm": 1.7387852668762207, "learning_rate": 9.195759137769449e-06, "loss": 0.0994, "step": 13730 }, { "epoch": 6.438612933458295, "grad_norm": 1.3456875085830688, "learning_rate": 9.195173383317714e-06, "loss": 0.0957, "step": 13740 }, { "epoch": 6.443298969072165, "grad_norm": 1.8419511318206787, "learning_rate": 9.194587628865979e-06, "loss": 0.0863, "step": 13750 }, { "epoch": 6.443298969072165, "eval_loss": 0.04049157723784447, "eval_pearson_cosine": 0.7813714988340337, "eval_pearson_dot": 0.6324385596537514, "eval_pearson_euclidean": 0.7407851614356957, "eval_pearson_manhattan": 0.7403546898217854, "eval_runtime": 39.3844, "eval_samples_per_second": 38.086, "eval_spearman_cosine": 0.7841122028697166, "eval_spearman_dot": 0.6478511175419069, "eval_spearman_euclidean": 0.7589082543335365, "eval_spearman_manhattan": 0.7586951153918645, "eval_steps_per_second": 38.086, "step": 13750 }, { "epoch": 6.447985004686036, "grad_norm": 2.0003013610839844, "learning_rate": 9.194001874414246e-06, "loss": 0.0752, "step": 13760 }, { "epoch": 6.452671040299906, "grad_norm": 1.2026764154434204, "learning_rate": 9.193416119962513e-06, "loss": 0.0879, "step": 13770 }, { "epoch": 6.457357075913777, "grad_norm": 1.3370740413665771, "learning_rate": 9.192830365510778e-06, "loss": 0.1001, "step": 13780 }, { "epoch": 6.462043111527648, "grad_norm": 2.915313482284546, "learning_rate": 9.192244611059045e-06, "loss": 0.0893, "step": 13790 }, { "epoch": 6.4667291471415185, "grad_norm": 2.472095012664795, "learning_rate": 9.19165885660731e-06, "loss": 0.0833, "step": 13800 }, { "epoch": 6.471415182755389, "grad_norm": 2.4989047050476074, "learning_rate": 9.191073102155577e-06, "loss": 0.0808, "step": 13810 }, { "epoch": 6.4761012183692594, "grad_norm": 1.9011194705963135, "learning_rate": 9.190487347703844e-06, "loss": 0.0845, "step": 13820 }, { "epoch": 6.48078725398313, "grad_norm": 1.4347310066223145, "learning_rate": 9.18990159325211e-06, "loss": 0.1029, "step": 13830 }, { "epoch": 6.485473289597001, "grad_norm": 2.54189395904541, "learning_rate": 9.189315838800376e-06, "loss": 0.0779, "step": 13840 }, { "epoch": 6.490159325210872, "grad_norm": 1.2057007551193237, "learning_rate": 9.188730084348641e-06, "loss": 0.0894, "step": 13850 }, { "epoch": 6.494845360824742, "grad_norm": 1.7278800010681152, "learning_rate": 9.188144329896908e-06, "loss": 0.0779, "step": 13860 }, { "epoch": 6.499531396438613, "grad_norm": 1.5743318796157837, "learning_rate": 9.187558575445175e-06, "loss": 0.0927, "step": 13870 }, { "epoch": 6.504217432052483, "grad_norm": 1.5857350826263428, "learning_rate": 9.18697282099344e-06, "loss": 0.0947, "step": 13880 }, { "epoch": 6.508903467666355, "grad_norm": 1.6598036289215088, "learning_rate": 9.186387066541707e-06, "loss": 0.0686, "step": 13890 }, { "epoch": 6.513589503280225, "grad_norm": 1.861210823059082, "learning_rate": 9.185801312089973e-06, "loss": 0.0785, "step": 13900 }, { "epoch": 6.518275538894096, "grad_norm": 2.236298084259033, "learning_rate": 9.185215557638238e-06, "loss": 0.0786, "step": 13910 }, { "epoch": 6.522961574507966, "grad_norm": 2.7919623851776123, "learning_rate": 9.184629803186505e-06, "loss": 0.091, "step": 13920 }, { "epoch": 6.527647610121837, "grad_norm": 2.071950674057007, "learning_rate": 9.184044048734772e-06, "loss": 0.0815, "step": 13930 }, { "epoch": 6.532333645735708, "grad_norm": 2.533116102218628, "learning_rate": 9.183458294283037e-06, "loss": 0.0787, "step": 13940 }, { "epoch": 6.5370196813495784, "grad_norm": 2.1527774333953857, "learning_rate": 9.182872539831304e-06, "loss": 0.0749, "step": 13950 }, { "epoch": 6.541705716963449, "grad_norm": 1.961341142654419, "learning_rate": 9.182286785379569e-06, "loss": 0.0953, "step": 13960 }, { "epoch": 6.546391752577319, "grad_norm": 2.28048038482666, "learning_rate": 9.181701030927836e-06, "loss": 0.0915, "step": 13970 }, { "epoch": 6.55107778819119, "grad_norm": 2.0521295070648193, "learning_rate": 9.181115276476103e-06, "loss": 0.0895, "step": 13980 }, { "epoch": 6.555763823805061, "grad_norm": 1.6671199798583984, "learning_rate": 9.180529522024368e-06, "loss": 0.0728, "step": 13990 }, { "epoch": 6.560449859418932, "grad_norm": 1.6179335117340088, "learning_rate": 9.179943767572635e-06, "loss": 0.0948, "step": 14000 }, { "epoch": 6.560449859418932, "eval_loss": 0.03971678018569946, "eval_pearson_cosine": 0.7830490563978572, "eval_pearson_dot": 0.6307782966974682, "eval_pearson_euclidean": 0.7414858554074186, "eval_pearson_manhattan": 0.7410214751653541, "eval_runtime": 40.1664, "eval_samples_per_second": 37.345, "eval_spearman_cosine": 0.7866220854382757, "eval_spearman_dot": 0.6460234579111318, "eval_spearman_euclidean": 0.7578877607249501, "eval_spearman_manhattan": 0.757795808702236, "eval_steps_per_second": 37.345, "step": 14000 }, { "epoch": 6.565135895032802, "grad_norm": 1.1771986484527588, "learning_rate": 9.1793580131209e-06, "loss": 0.0723, "step": 14010 }, { "epoch": 6.569821930646673, "grad_norm": 3.105875253677368, "learning_rate": 9.178772258669167e-06, "loss": 0.0831, "step": 14020 }, { "epoch": 6.574507966260543, "grad_norm": 2.368614673614502, "learning_rate": 9.178186504217432e-06, "loss": 0.0896, "step": 14030 }, { "epoch": 6.579194001874415, "grad_norm": 1.0225666761398315, "learning_rate": 9.1776007497657e-06, "loss": 0.0773, "step": 14040 }, { "epoch": 6.583880037488285, "grad_norm": 1.7535622119903564, "learning_rate": 9.177014995313966e-06, "loss": 0.0966, "step": 14050 }, { "epoch": 6.588566073102156, "grad_norm": 1.9074852466583252, "learning_rate": 9.176429240862231e-06, "loss": 0.099, "step": 14060 }, { "epoch": 6.593252108716026, "grad_norm": 1.9208943843841553, "learning_rate": 9.175843486410497e-06, "loss": 0.0829, "step": 14070 }, { "epoch": 6.597938144329897, "grad_norm": 1.6164686679840088, "learning_rate": 9.175257731958764e-06, "loss": 0.0791, "step": 14080 }, { "epoch": 6.602624179943768, "grad_norm": 2.0227229595184326, "learning_rate": 9.17467197750703e-06, "loss": 0.087, "step": 14090 }, { "epoch": 6.607310215557638, "grad_norm": 2.0741658210754395, "learning_rate": 9.174086223055296e-06, "loss": 0.0832, "step": 14100 }, { "epoch": 6.611996251171509, "grad_norm": 2.0126988887786865, "learning_rate": 9.173500468603563e-06, "loss": 0.0959, "step": 14110 }, { "epoch": 6.616682286785379, "grad_norm": 2.8039979934692383, "learning_rate": 9.172914714151828e-06, "loss": 0.0876, "step": 14120 }, { "epoch": 6.62136832239925, "grad_norm": 2.453516721725464, "learning_rate": 9.172328959700095e-06, "loss": 0.0946, "step": 14130 }, { "epoch": 6.626054358013121, "grad_norm": 1.891790509223938, "learning_rate": 9.17174320524836e-06, "loss": 0.0995, "step": 14140 }, { "epoch": 6.630740393626992, "grad_norm": 2.185473680496216, "learning_rate": 9.171157450796627e-06, "loss": 0.0766, "step": 14150 }, { "epoch": 6.635426429240862, "grad_norm": 1.8330421447753906, "learning_rate": 9.170571696344894e-06, "loss": 0.0944, "step": 14160 }, { "epoch": 6.640112464854733, "grad_norm": 1.9126405715942383, "learning_rate": 9.169985941893159e-06, "loss": 0.0903, "step": 14170 }, { "epoch": 6.644798500468603, "grad_norm": 2.1286425590515137, "learning_rate": 9.169400187441426e-06, "loss": 0.0909, "step": 14180 }, { "epoch": 6.649484536082475, "grad_norm": 2.539160966873169, "learning_rate": 9.168814432989691e-06, "loss": 0.0842, "step": 14190 }, { "epoch": 6.654170571696345, "grad_norm": 2.383507490158081, "learning_rate": 9.168228678537958e-06, "loss": 0.0884, "step": 14200 }, { "epoch": 6.658856607310216, "grad_norm": 1.336970567703247, "learning_rate": 9.167642924086225e-06, "loss": 0.0832, "step": 14210 }, { "epoch": 6.663542642924086, "grad_norm": 1.9437108039855957, "learning_rate": 9.16705716963449e-06, "loss": 0.0939, "step": 14220 }, { "epoch": 6.6682286785379565, "grad_norm": 1.757957100868225, "learning_rate": 9.166471415182755e-06, "loss": 0.0789, "step": 14230 }, { "epoch": 6.672914714151828, "grad_norm": 1.8997693061828613, "learning_rate": 9.165885660731022e-06, "loss": 0.0871, "step": 14240 }, { "epoch": 6.677600749765698, "grad_norm": 2.3691928386688232, "learning_rate": 9.165299906279288e-06, "loss": 0.0919, "step": 14250 }, { "epoch": 6.677600749765698, "eval_loss": 0.04086451604962349, "eval_pearson_cosine": 0.7820280058953486, "eval_pearson_dot": 0.6341032311060246, "eval_pearson_euclidean": 0.7402547311501273, "eval_pearson_manhattan": 0.7402377003069951, "eval_runtime": 39.165, "eval_samples_per_second": 38.3, "eval_spearman_cosine": 0.7858084087028067, "eval_spearman_dot": 0.6459085165502623, "eval_spearman_euclidean": 0.7544028045089441, "eval_spearman_manhattan": 0.7545435768510885, "eval_steps_per_second": 38.3, "step": 14250 }, { "epoch": 6.682286785379569, "grad_norm": 1.7762689590454102, "learning_rate": 9.164714151827554e-06, "loss": 0.0917, "step": 14260 }, { "epoch": 6.686972820993439, "grad_norm": 2.296797037124634, "learning_rate": 9.164128397375821e-06, "loss": 0.0843, "step": 14270 }, { "epoch": 6.69165885660731, "grad_norm": 2.3844399452209473, "learning_rate": 9.163542642924087e-06, "loss": 0.0987, "step": 14280 }, { "epoch": 6.696344892221181, "grad_norm": 1.65080988407135, "learning_rate": 9.162956888472354e-06, "loss": 0.0644, "step": 14290 }, { "epoch": 6.701030927835052, "grad_norm": 2.429184913635254, "learning_rate": 9.162371134020619e-06, "loss": 0.0913, "step": 14300 }, { "epoch": 6.705716963448922, "grad_norm": 2.7205028533935547, "learning_rate": 9.161785379568886e-06, "loss": 0.0955, "step": 14310 }, { "epoch": 6.710402999062793, "grad_norm": 1.9049710035324097, "learning_rate": 9.161199625117153e-06, "loss": 0.0943, "step": 14320 }, { "epoch": 6.715089034676663, "grad_norm": 1.4774081707000732, "learning_rate": 9.160613870665418e-06, "loss": 0.0909, "step": 14330 }, { "epoch": 6.719775070290535, "grad_norm": 2.102787971496582, "learning_rate": 9.160028116213685e-06, "loss": 0.0757, "step": 14340 }, { "epoch": 6.724461105904405, "grad_norm": 1.7760144472122192, "learning_rate": 9.15944236176195e-06, "loss": 0.0949, "step": 14350 }, { "epoch": 6.7291471415182755, "grad_norm": 2.6216742992401123, "learning_rate": 9.158856607310215e-06, "loss": 0.0997, "step": 14360 }, { "epoch": 6.733833177132146, "grad_norm": 1.8759592771530151, "learning_rate": 9.158270852858484e-06, "loss": 0.0898, "step": 14370 }, { "epoch": 6.7385192127460165, "grad_norm": 1.7543883323669434, "learning_rate": 9.157685098406749e-06, "loss": 0.0913, "step": 14380 }, { "epoch": 6.743205248359888, "grad_norm": 2.2106900215148926, "learning_rate": 9.157099343955014e-06, "loss": 0.0996, "step": 14390 }, { "epoch": 6.747891283973758, "grad_norm": 2.408613443374634, "learning_rate": 9.156513589503281e-06, "loss": 0.0988, "step": 14400 }, { "epoch": 6.752577319587629, "grad_norm": 3.430854082107544, "learning_rate": 9.155927835051546e-06, "loss": 0.0998, "step": 14410 }, { "epoch": 6.757263355201499, "grad_norm": 2.024101495742798, "learning_rate": 9.155342080599813e-06, "loss": 0.0784, "step": 14420 }, { "epoch": 6.76194939081537, "grad_norm": 1.9962611198425293, "learning_rate": 9.15475632614808e-06, "loss": 0.0778, "step": 14430 }, { "epoch": 6.766635426429241, "grad_norm": 2.7750437259674072, "learning_rate": 9.154170571696345e-06, "loss": 0.0787, "step": 14440 }, { "epoch": 6.771321462043112, "grad_norm": 2.2293503284454346, "learning_rate": 9.153584817244612e-06, "loss": 0.0866, "step": 14450 }, { "epoch": 6.776007497656982, "grad_norm": 2.104199171066284, "learning_rate": 9.152999062792878e-06, "loss": 0.0886, "step": 14460 }, { "epoch": 6.780693533270853, "grad_norm": 2.1825919151306152, "learning_rate": 9.152413308341144e-06, "loss": 0.0881, "step": 14470 }, { "epoch": 6.785379568884723, "grad_norm": 2.0743556022644043, "learning_rate": 9.151827553889411e-06, "loss": 0.0816, "step": 14480 }, { "epoch": 6.7900656044985945, "grad_norm": 1.4492499828338623, "learning_rate": 9.151241799437677e-06, "loss": 0.0798, "step": 14490 }, { "epoch": 6.794751640112465, "grad_norm": 2.3385043144226074, "learning_rate": 9.150656044985944e-06, "loss": 0.0784, "step": 14500 }, { "epoch": 6.794751640112465, "eval_loss": 0.040780164301395416, "eval_pearson_cosine": 0.7793833022968641, "eval_pearson_dot": 0.6305632848645555, "eval_pearson_euclidean": 0.7311736352551961, "eval_pearson_manhattan": 0.7308022032341484, "eval_runtime": 40.6522, "eval_samples_per_second": 36.898, "eval_spearman_cosine": 0.7838836973690695, "eval_spearman_dot": 0.6427126264330253, "eval_spearman_euclidean": 0.7493899504087712, "eval_spearman_manhattan": 0.7495131408878735, "eval_steps_per_second": 36.898, "step": 14500 }, { "epoch": 6.7994376757263355, "grad_norm": 2.441380023956299, "learning_rate": 9.150070290534209e-06, "loss": 0.0866, "step": 14510 }, { "epoch": 6.804123711340206, "grad_norm": 2.3444080352783203, "learning_rate": 9.149484536082474e-06, "loss": 0.1104, "step": 14520 }, { "epoch": 6.8088097469540765, "grad_norm": 2.7428460121154785, "learning_rate": 9.148898781630741e-06, "loss": 0.1116, "step": 14530 }, { "epoch": 6.813495782567948, "grad_norm": 2.756432294845581, "learning_rate": 9.148313027179008e-06, "loss": 0.0942, "step": 14540 }, { "epoch": 6.818181818181818, "grad_norm": 2.891023635864258, "learning_rate": 9.147727272727273e-06, "loss": 0.1101, "step": 14550 }, { "epoch": 6.822867853795689, "grad_norm": 2.115098476409912, "learning_rate": 9.14714151827554e-06, "loss": 0.0896, "step": 14560 }, { "epoch": 6.827553889409559, "grad_norm": 1.161385178565979, "learning_rate": 9.146555763823805e-06, "loss": 0.0881, "step": 14570 }, { "epoch": 6.83223992502343, "grad_norm": 2.266988754272461, "learning_rate": 9.145970009372072e-06, "loss": 0.0904, "step": 14580 }, { "epoch": 6.836925960637301, "grad_norm": 2.111978769302368, "learning_rate": 9.145384254920339e-06, "loss": 0.0864, "step": 14590 }, { "epoch": 6.841611996251172, "grad_norm": 2.025771141052246, "learning_rate": 9.144798500468604e-06, "loss": 0.0947, "step": 14600 }, { "epoch": 6.846298031865042, "grad_norm": 1.6989368200302124, "learning_rate": 9.144212746016871e-06, "loss": 0.0906, "step": 14610 }, { "epoch": 6.850984067478913, "grad_norm": 2.4871666431427, "learning_rate": 9.143626991565136e-06, "loss": 0.0969, "step": 14620 }, { "epoch": 6.855670103092783, "grad_norm": 2.155759572982788, "learning_rate": 9.143041237113403e-06, "loss": 0.0978, "step": 14630 }, { "epoch": 6.8603561387066545, "grad_norm": 3.0879483222961426, "learning_rate": 9.142455482661668e-06, "loss": 0.0928, "step": 14640 }, { "epoch": 6.865042174320525, "grad_norm": 1.6940726041793823, "learning_rate": 9.141869728209935e-06, "loss": 0.0879, "step": 14650 }, { "epoch": 6.8697282099343955, "grad_norm": 2.233914613723755, "learning_rate": 9.141283973758202e-06, "loss": 0.0894, "step": 14660 }, { "epoch": 6.874414245548266, "grad_norm": 2.0306718349456787, "learning_rate": 9.140698219306468e-06, "loss": 0.0844, "step": 14670 }, { "epoch": 6.8791002811621365, "grad_norm": 1.8942639827728271, "learning_rate": 9.140112464854733e-06, "loss": 0.0817, "step": 14680 }, { "epoch": 6.883786316776008, "grad_norm": 2.03265643119812, "learning_rate": 9.139526710403e-06, "loss": 0.0852, "step": 14690 }, { "epoch": 6.888472352389878, "grad_norm": 1.7808202505111694, "learning_rate": 9.138940955951267e-06, "loss": 0.0896, "step": 14700 }, { "epoch": 6.893158388003749, "grad_norm": 2.668078660964966, "learning_rate": 9.138355201499532e-06, "loss": 0.0973, "step": 14710 }, { "epoch": 6.897844423617619, "grad_norm": 1.402289867401123, "learning_rate": 9.137769447047799e-06, "loss": 0.1026, "step": 14720 }, { "epoch": 6.90253045923149, "grad_norm": 2.591413974761963, "learning_rate": 9.137183692596064e-06, "loss": 0.1042, "step": 14730 }, { "epoch": 6.907216494845361, "grad_norm": 2.172842264175415, "learning_rate": 9.136597938144331e-06, "loss": 0.0896, "step": 14740 }, { "epoch": 6.911902530459232, "grad_norm": 2.8410751819610596, "learning_rate": 9.136012183692596e-06, "loss": 0.0821, "step": 14750 }, { "epoch": 6.911902530459232, "eval_loss": 0.04055028408765793, "eval_pearson_cosine": 0.7788665525758081, "eval_pearson_dot": 0.6376645263249117, "eval_pearson_euclidean": 0.727043862062235, "eval_pearson_manhattan": 0.7265471618369332, "eval_runtime": 40.6024, "eval_samples_per_second": 36.944, "eval_spearman_cosine": 0.7822270840908377, "eval_spearman_dot": 0.6567383479059669, "eval_spearman_euclidean": 0.744619349149987, "eval_spearman_manhattan": 0.7446135946974944, "eval_steps_per_second": 36.944, "step": 14750 }, { "epoch": 6.916588566073102, "grad_norm": 2.586047649383545, "learning_rate": 9.135426429240863e-06, "loss": 0.0965, "step": 14760 }, { "epoch": 6.921274601686973, "grad_norm": 1.9667673110961914, "learning_rate": 9.13484067478913e-06, "loss": 0.0913, "step": 14770 }, { "epoch": 6.925960637300843, "grad_norm": 2.6598875522613525, "learning_rate": 9.134254920337395e-06, "loss": 0.0863, "step": 14780 }, { "epoch": 6.9306466729147145, "grad_norm": 1.5291812419891357, "learning_rate": 9.133669165885662e-06, "loss": 0.0699, "step": 14790 }, { "epoch": 6.935332708528585, "grad_norm": 1.8771485090255737, "learning_rate": 9.133083411433927e-06, "loss": 0.0836, "step": 14800 }, { "epoch": 6.9400187441424555, "grad_norm": 1.7967191934585571, "learning_rate": 9.132497656982192e-06, "loss": 0.0833, "step": 14810 }, { "epoch": 6.944704779756326, "grad_norm": 1.3904474973678589, "learning_rate": 9.131911902530461e-06, "loss": 0.1147, "step": 14820 }, { "epoch": 6.949390815370196, "grad_norm": 2.315178871154785, "learning_rate": 9.131326148078726e-06, "loss": 0.1054, "step": 14830 }, { "epoch": 6.954076850984068, "grad_norm": 2.0457489490509033, "learning_rate": 9.130740393626992e-06, "loss": 0.0833, "step": 14840 }, { "epoch": 6.958762886597938, "grad_norm": 1.5070949792861938, "learning_rate": 9.130154639175258e-06, "loss": 0.0737, "step": 14850 }, { "epoch": 6.963448922211809, "grad_norm": 2.301478862762451, "learning_rate": 9.129568884723524e-06, "loss": 0.0779, "step": 14860 }, { "epoch": 6.968134957825679, "grad_norm": 1.9525973796844482, "learning_rate": 9.12898313027179e-06, "loss": 0.0829, "step": 14870 }, { "epoch": 6.97282099343955, "grad_norm": 2.3320276737213135, "learning_rate": 9.128397375820058e-06, "loss": 0.09, "step": 14880 }, { "epoch": 6.977507029053421, "grad_norm": 2.358041286468506, "learning_rate": 9.127811621368323e-06, "loss": 0.0848, "step": 14890 }, { "epoch": 6.982193064667292, "grad_norm": 2.0248255729675293, "learning_rate": 9.12722586691659e-06, "loss": 0.1023, "step": 14900 }, { "epoch": 6.986879100281162, "grad_norm": 2.4078421592712402, "learning_rate": 9.126640112464855e-06, "loss": 0.0886, "step": 14910 }, { "epoch": 6.991565135895033, "grad_norm": 2.0203652381896973, "learning_rate": 9.126054358013122e-06, "loss": 0.0989, "step": 14920 }, { "epoch": 6.996251171508904, "grad_norm": 2.0305638313293457, "learning_rate": 9.125468603561389e-06, "loss": 0.0854, "step": 14930 }, { "epoch": 7.0009372071227745, "grad_norm": 2.3170547485351562, "learning_rate": 9.124882849109654e-06, "loss": 0.0888, "step": 14940 }, { "epoch": 7.005623242736645, "grad_norm": 1.3576775789260864, "learning_rate": 9.124297094657921e-06, "loss": 0.0756, "step": 14950 }, { "epoch": 7.010309278350515, "grad_norm": 2.177962303161621, "learning_rate": 9.123711340206186e-06, "loss": 0.0679, "step": 14960 }, { "epoch": 7.014995313964386, "grad_norm": 1.6852316856384277, "learning_rate": 9.123125585754451e-06, "loss": 0.0635, "step": 14970 }, { "epoch": 7.019681349578256, "grad_norm": 2.245973825454712, "learning_rate": 9.12253983130272e-06, "loss": 0.081, "step": 14980 }, { "epoch": 7.024367385192128, "grad_norm": 1.2212837934494019, "learning_rate": 9.121954076850985e-06, "loss": 0.0684, "step": 14990 }, { "epoch": 7.029053420805998, "grad_norm": 2.061438798904419, "learning_rate": 9.12136832239925e-06, "loss": 0.0792, "step": 15000 }, { "epoch": 7.029053420805998, "eval_loss": 0.04008892923593521, "eval_pearson_cosine": 0.7799962553837254, "eval_pearson_dot": 0.6338093099514381, "eval_pearson_euclidean": 0.740542330523235, "eval_pearson_manhattan": 0.7397823209884535, "eval_runtime": 39.3867, "eval_samples_per_second": 38.084, "eval_spearman_cosine": 0.7833374621050089, "eval_spearman_dot": 0.646744349870265, "eval_spearman_euclidean": 0.7572153670081455, "eval_spearman_manhattan": 0.7568637419859118, "eval_steps_per_second": 38.084, "step": 15000 }, { "epoch": 7.033739456419869, "grad_norm": 1.905097246170044, "learning_rate": 9.120782567947517e-06, "loss": 0.058, "step": 15010 }, { "epoch": 7.038425492033739, "grad_norm": 1.7693982124328613, "learning_rate": 9.120196813495782e-06, "loss": 0.0661, "step": 15020 }, { "epoch": 7.04311152764761, "grad_norm": 2.130645990371704, "learning_rate": 9.11961105904405e-06, "loss": 0.0749, "step": 15030 }, { "epoch": 7.047797563261481, "grad_norm": 1.778387427330017, "learning_rate": 9.119025304592316e-06, "loss": 0.063, "step": 15040 }, { "epoch": 7.052483598875352, "grad_norm": 1.038841724395752, "learning_rate": 9.118439550140582e-06, "loss": 0.0717, "step": 15050 }, { "epoch": 7.057169634489222, "grad_norm": 1.8356302976608276, "learning_rate": 9.117853795688848e-06, "loss": 0.0607, "step": 15060 }, { "epoch": 7.061855670103093, "grad_norm": 1.5863852500915527, "learning_rate": 9.117268041237114e-06, "loss": 0.0609, "step": 15070 }, { "epoch": 7.066541705716963, "grad_norm": 1.3317904472351074, "learning_rate": 9.11668228678538e-06, "loss": 0.0576, "step": 15080 }, { "epoch": 7.071227741330834, "grad_norm": 2.148087978363037, "learning_rate": 9.116096532333648e-06, "loss": 0.0694, "step": 15090 }, { "epoch": 7.075913776944705, "grad_norm": 1.34135901927948, "learning_rate": 9.115510777881913e-06, "loss": 0.063, "step": 15100 }, { "epoch": 7.080599812558575, "grad_norm": 1.9813562631607056, "learning_rate": 9.11492502343018e-06, "loss": 0.0843, "step": 15110 }, { "epoch": 7.085285848172446, "grad_norm": 1.3236151933670044, "learning_rate": 9.114339268978445e-06, "loss": 0.0682, "step": 15120 }, { "epoch": 7.089971883786316, "grad_norm": 1.483312726020813, "learning_rate": 9.11375351452671e-06, "loss": 0.0742, "step": 15130 }, { "epoch": 7.094657919400188, "grad_norm": 1.7315001487731934, "learning_rate": 9.113167760074977e-06, "loss": 0.0544, "step": 15140 }, { "epoch": 7.099343955014058, "grad_norm": 2.530773162841797, "learning_rate": 9.112582005623244e-06, "loss": 0.0768, "step": 15150 }, { "epoch": 7.104029990627929, "grad_norm": 2.089907169342041, "learning_rate": 9.11199625117151e-06, "loss": 0.0617, "step": 15160 }, { "epoch": 7.108716026241799, "grad_norm": 1.7428967952728271, "learning_rate": 9.111410496719776e-06, "loss": 0.0763, "step": 15170 }, { "epoch": 7.11340206185567, "grad_norm": 1.5844217538833618, "learning_rate": 9.110824742268041e-06, "loss": 0.0554, "step": 15180 }, { "epoch": 7.118088097469541, "grad_norm": 1.881791353225708, "learning_rate": 9.110238987816308e-06, "loss": 0.0662, "step": 15190 }, { "epoch": 7.122774133083412, "grad_norm": 1.2586263418197632, "learning_rate": 9.109653233364575e-06, "loss": 0.0648, "step": 15200 }, { "epoch": 7.127460168697282, "grad_norm": 1.9210679531097412, "learning_rate": 9.10906747891284e-06, "loss": 0.0647, "step": 15210 }, { "epoch": 7.1321462043111525, "grad_norm": 2.3099005222320557, "learning_rate": 9.108481724461107e-06, "loss": 0.0632, "step": 15220 }, { "epoch": 7.136832239925023, "grad_norm": 2.189209222793579, "learning_rate": 9.107895970009372e-06, "loss": 0.0779, "step": 15230 }, { "epoch": 7.141518275538894, "grad_norm": 1.2473788261413574, "learning_rate": 9.10731021555764e-06, "loss": 0.0684, "step": 15240 }, { "epoch": 7.146204311152765, "grad_norm": 1.381177306175232, "learning_rate": 9.106724461105905e-06, "loss": 0.0698, "step": 15250 }, { "epoch": 7.146204311152765, "eval_loss": 0.039560701698064804, "eval_pearson_cosine": 0.7821626687438226, "eval_pearson_dot": 0.6380954414398445, "eval_pearson_euclidean": 0.7346215848669857, "eval_pearson_manhattan": 0.7341262164749853, "eval_runtime": 40.552, "eval_samples_per_second": 36.99, "eval_spearman_cosine": 0.7855493497996395, "eval_spearman_dot": 0.6551626564215037, "eval_spearman_euclidean": 0.7509105689283286, "eval_spearman_manhattan": 0.7507208072274875, "eval_steps_per_second": 36.99, "step": 15250 }, { "epoch": 7.150890346766635, "grad_norm": 1.887905478477478, "learning_rate": 9.106138706654172e-06, "loss": 0.0744, "step": 15260 }, { "epoch": 7.155576382380506, "grad_norm": 1.1103498935699463, "learning_rate": 9.105552952202438e-06, "loss": 0.0614, "step": 15270 }, { "epoch": 7.160262417994376, "grad_norm": 1.2665252685546875, "learning_rate": 9.104967197750704e-06, "loss": 0.0668, "step": 15280 }, { "epoch": 7.164948453608248, "grad_norm": 1.8980008363723755, "learning_rate": 9.104381443298969e-06, "loss": 0.0615, "step": 15290 }, { "epoch": 7.169634489222118, "grad_norm": 1.789542317390442, "learning_rate": 9.103795688847236e-06, "loss": 0.0857, "step": 15300 }, { "epoch": 7.174320524835989, "grad_norm": 1.84969162940979, "learning_rate": 9.103209934395501e-06, "loss": 0.069, "step": 15310 }, { "epoch": 7.179006560449859, "grad_norm": 1.0945720672607422, "learning_rate": 9.102624179943768e-06, "loss": 0.0665, "step": 15320 }, { "epoch": 7.18369259606373, "grad_norm": 1.3088226318359375, "learning_rate": 9.102038425492035e-06, "loss": 0.0747, "step": 15330 }, { "epoch": 7.188378631677601, "grad_norm": 1.0556889772415161, "learning_rate": 9.1014526710403e-06, "loss": 0.0642, "step": 15340 }, { "epoch": 7.1930646672914715, "grad_norm": 1.7667440176010132, "learning_rate": 9.100866916588567e-06, "loss": 0.0644, "step": 15350 }, { "epoch": 7.197750702905342, "grad_norm": 1.3899027109146118, "learning_rate": 9.100281162136832e-06, "loss": 0.0712, "step": 15360 }, { "epoch": 7.2024367385192125, "grad_norm": 1.9200291633605957, "learning_rate": 9.099695407685099e-06, "loss": 0.08, "step": 15370 }, { "epoch": 7.207122774133083, "grad_norm": 2.3893768787384033, "learning_rate": 9.099109653233366e-06, "loss": 0.0741, "step": 15380 }, { "epoch": 7.211808809746954, "grad_norm": 1.2158704996109009, "learning_rate": 9.098523898781631e-06, "loss": 0.0866, "step": 15390 }, { "epoch": 7.216494845360825, "grad_norm": 2.252181053161621, "learning_rate": 9.097938144329898e-06, "loss": 0.076, "step": 15400 }, { "epoch": 7.221180880974695, "grad_norm": 1.2606340646743774, "learning_rate": 9.097352389878163e-06, "loss": 0.0661, "step": 15410 }, { "epoch": 7.225866916588566, "grad_norm": 1.1483300924301147, "learning_rate": 9.096766635426429e-06, "loss": 0.0776, "step": 15420 }, { "epoch": 7.230552952202436, "grad_norm": 1.4554270505905151, "learning_rate": 9.096180880974697e-06, "loss": 0.0713, "step": 15430 }, { "epoch": 7.235238987816308, "grad_norm": 1.8985337018966675, "learning_rate": 9.095595126522962e-06, "loss": 0.0787, "step": 15440 }, { "epoch": 7.239925023430178, "grad_norm": 2.394465208053589, "learning_rate": 9.095009372071228e-06, "loss": 0.0767, "step": 15450 }, { "epoch": 7.244611059044049, "grad_norm": 1.2314172983169556, "learning_rate": 9.094423617619495e-06, "loss": 0.0647, "step": 15460 }, { "epoch": 7.249297094657919, "grad_norm": 2.137882947921753, "learning_rate": 9.09383786316776e-06, "loss": 0.0656, "step": 15470 }, { "epoch": 7.25398313027179, "grad_norm": 1.7702836990356445, "learning_rate": 9.093252108716027e-06, "loss": 0.0576, "step": 15480 }, { "epoch": 7.258669165885661, "grad_norm": 2.0788486003875732, "learning_rate": 9.092666354264294e-06, "loss": 0.0582, "step": 15490 }, { "epoch": 7.2633552014995315, "grad_norm": 1.0218828916549683, "learning_rate": 9.092080599812559e-06, "loss": 0.0699, "step": 15500 }, { "epoch": 7.2633552014995315, "eval_loss": 0.03923952579498291, "eval_pearson_cosine": 0.7819686811712643, "eval_pearson_dot": 0.6466329055139823, "eval_pearson_euclidean": 0.732531031261928, "eval_pearson_manhattan": 0.732247137892952, "eval_runtime": 39.7194, "eval_samples_per_second": 37.765, "eval_spearman_cosine": 0.7850868672642034, "eval_spearman_dot": 0.6628931528870909, "eval_spearman_euclidean": 0.7501575772894145, "eval_spearman_manhattan": 0.7501561306691681, "eval_steps_per_second": 37.765, "step": 15500 }, { "epoch": 7.268041237113402, "grad_norm": 0.7509507536888123, "learning_rate": 9.091494845360826e-06, "loss": 0.0823, "step": 15510 }, { "epoch": 7.2727272727272725, "grad_norm": 2.109041690826416, "learning_rate": 9.090909090909091e-06, "loss": 0.0701, "step": 15520 }, { "epoch": 7.277413308341144, "grad_norm": 1.6811095476150513, "learning_rate": 9.090323336457358e-06, "loss": 0.0726, "step": 15530 }, { "epoch": 7.282099343955014, "grad_norm": 1.9557669162750244, "learning_rate": 9.089737582005625e-06, "loss": 0.0796, "step": 15540 }, { "epoch": 7.286785379568885, "grad_norm": 1.6342480182647705, "learning_rate": 9.08915182755389e-06, "loss": 0.0618, "step": 15550 }, { "epoch": 7.291471415182755, "grad_norm": 1.3443505764007568, "learning_rate": 9.088566073102157e-06, "loss": 0.0767, "step": 15560 }, { "epoch": 7.296157450796626, "grad_norm": 2.0768396854400635, "learning_rate": 9.087980318650422e-06, "loss": 0.0699, "step": 15570 }, { "epoch": 7.300843486410496, "grad_norm": 1.989401936531067, "learning_rate": 9.087394564198687e-06, "loss": 0.0706, "step": 15580 }, { "epoch": 7.305529522024368, "grad_norm": 1.7831469774246216, "learning_rate": 9.086808809746956e-06, "loss": 0.0701, "step": 15590 }, { "epoch": 7.310215557638238, "grad_norm": 2.3312692642211914, "learning_rate": 9.086223055295221e-06, "loss": 0.0703, "step": 15600 }, { "epoch": 7.314901593252109, "grad_norm": 1.7669209241867065, "learning_rate": 9.085637300843487e-06, "loss": 0.0646, "step": 15610 }, { "epoch": 7.319587628865979, "grad_norm": 1.880066156387329, "learning_rate": 9.085051546391753e-06, "loss": 0.0798, "step": 15620 }, { "epoch": 7.3242736644798505, "grad_norm": 1.3240752220153809, "learning_rate": 9.084465791940019e-06, "loss": 0.0749, "step": 15630 }, { "epoch": 7.328959700093721, "grad_norm": 1.0103267431259155, "learning_rate": 9.083880037488286e-06, "loss": 0.0635, "step": 15640 }, { "epoch": 7.3336457357075915, "grad_norm": 1.4677484035491943, "learning_rate": 9.083294283036552e-06, "loss": 0.0705, "step": 15650 }, { "epoch": 7.338331771321462, "grad_norm": 2.090219736099243, "learning_rate": 9.082708528584818e-06, "loss": 0.0693, "step": 15660 }, { "epoch": 7.3430178069353325, "grad_norm": 2.349215030670166, "learning_rate": 9.082122774133085e-06, "loss": 0.0714, "step": 15670 }, { "epoch": 7.347703842549203, "grad_norm": 0.8705586791038513, "learning_rate": 9.08153701968135e-06, "loss": 0.0761, "step": 15680 }, { "epoch": 7.352389878163074, "grad_norm": 1.986405372619629, "learning_rate": 9.080951265229617e-06, "loss": 0.0579, "step": 15690 }, { "epoch": 7.357075913776945, "grad_norm": 2.267803430557251, "learning_rate": 9.080365510777884e-06, "loss": 0.0925, "step": 15700 }, { "epoch": 7.361761949390815, "grad_norm": 1.7816276550292969, "learning_rate": 9.079779756326149e-06, "loss": 0.0713, "step": 15710 }, { "epoch": 7.366447985004686, "grad_norm": 3.1647868156433105, "learning_rate": 9.079194001874416e-06, "loss": 0.089, "step": 15720 }, { "epoch": 7.371134020618557, "grad_norm": 2.082855463027954, "learning_rate": 9.078608247422681e-06, "loss": 0.0695, "step": 15730 }, { "epoch": 7.375820056232428, "grad_norm": 1.4253464937210083, "learning_rate": 9.078022492970946e-06, "loss": 0.0681, "step": 15740 }, { "epoch": 7.380506091846298, "grad_norm": 1.7833616733551025, "learning_rate": 9.077436738519213e-06, "loss": 0.0739, "step": 15750 }, { "epoch": 7.380506091846298, "eval_loss": 0.03890243172645569, "eval_pearson_cosine": 0.7865226942731169, "eval_pearson_dot": 0.6411769886141485, "eval_pearson_euclidean": 0.7328063007950192, "eval_pearson_manhattan": 0.7322937842561661, "eval_runtime": 39.8796, "eval_samples_per_second": 37.613, "eval_spearman_cosine": 0.7886056526857715, "eval_spearman_dot": 0.6589435896491915, "eval_spearman_euclidean": 0.7495362761356495, "eval_spearman_manhattan": 0.7491310374131812, "eval_steps_per_second": 37.613, "step": 15750 }, { "epoch": 7.385192127460169, "grad_norm": 1.368802785873413, "learning_rate": 9.07685098406748e-06, "loss": 0.0634, "step": 15760 }, { "epoch": 7.389878163074039, "grad_norm": 2.0611209869384766, "learning_rate": 9.076265229615745e-06, "loss": 0.0732, "step": 15770 }, { "epoch": 7.39456419868791, "grad_norm": 1.3949185609817505, "learning_rate": 9.075679475164012e-06, "loss": 0.0643, "step": 15780 }, { "epoch": 7.399250234301781, "grad_norm": 2.267596960067749, "learning_rate": 9.075093720712277e-06, "loss": 0.0721, "step": 15790 }, { "epoch": 7.4039362699156515, "grad_norm": 1.2794581651687622, "learning_rate": 9.074507966260544e-06, "loss": 0.0718, "step": 15800 }, { "epoch": 7.408622305529522, "grad_norm": 1.8668746948242188, "learning_rate": 9.07392221180881e-06, "loss": 0.0734, "step": 15810 }, { "epoch": 7.413308341143392, "grad_norm": 2.141602039337158, "learning_rate": 9.073336457357077e-06, "loss": 0.0637, "step": 15820 }, { "epoch": 7.417994376757264, "grad_norm": 1.9020168781280518, "learning_rate": 9.072750702905343e-06, "loss": 0.0727, "step": 15830 }, { "epoch": 7.422680412371134, "grad_norm": 1.7302427291870117, "learning_rate": 9.072164948453609e-06, "loss": 0.0742, "step": 15840 }, { "epoch": 7.427366447985005, "grad_norm": 1.6152589321136475, "learning_rate": 9.071579194001876e-06, "loss": 0.0764, "step": 15850 }, { "epoch": 7.432052483598875, "grad_norm": 2.491912364959717, "learning_rate": 9.07099343955014e-06, "loss": 0.0746, "step": 15860 }, { "epoch": 7.436738519212746, "grad_norm": 1.8737932443618774, "learning_rate": 9.070407685098408e-06, "loss": 0.0665, "step": 15870 }, { "epoch": 7.441424554826616, "grad_norm": 2.3536882400512695, "learning_rate": 9.069821930646675e-06, "loss": 0.0702, "step": 15880 }, { "epoch": 7.446110590440488, "grad_norm": 1.0954251289367676, "learning_rate": 9.06923617619494e-06, "loss": 0.0768, "step": 15890 }, { "epoch": 7.450796626054358, "grad_norm": 1.973325490951538, "learning_rate": 9.068650421743205e-06, "loss": 0.0666, "step": 15900 }, { "epoch": 7.455482661668229, "grad_norm": 0.6486696600914001, "learning_rate": 9.068064667291472e-06, "loss": 0.0813, "step": 15910 }, { "epoch": 7.460168697282099, "grad_norm": 2.4715213775634766, "learning_rate": 9.067478912839737e-06, "loss": 0.0698, "step": 15920 }, { "epoch": 7.4648547328959705, "grad_norm": 1.3833788633346558, "learning_rate": 9.066893158388004e-06, "loss": 0.0783, "step": 15930 }, { "epoch": 7.469540768509841, "grad_norm": 1.5679866075515747, "learning_rate": 9.066307403936271e-06, "loss": 0.0675, "step": 15940 }, { "epoch": 7.474226804123711, "grad_norm": 1.173086166381836, "learning_rate": 9.065721649484536e-06, "loss": 0.0705, "step": 15950 }, { "epoch": 7.478912839737582, "grad_norm": 2.0635769367218018, "learning_rate": 9.065135895032803e-06, "loss": 0.0717, "step": 15960 }, { "epoch": 7.483598875351452, "grad_norm": 2.0800647735595703, "learning_rate": 9.064550140581068e-06, "loss": 0.0712, "step": 15970 }, { "epoch": 7.488284910965323, "grad_norm": 1.0860838890075684, "learning_rate": 9.063964386129335e-06, "loss": 0.0637, "step": 15980 }, { "epoch": 7.492970946579194, "grad_norm": 2.794854164123535, "learning_rate": 9.063378631677602e-06, "loss": 0.0799, "step": 15990 }, { "epoch": 7.497656982193065, "grad_norm": 0.8473652005195618, "learning_rate": 9.062792877225867e-06, "loss": 0.0745, "step": 16000 }, { "epoch": 7.497656982193065, "eval_loss": 0.039655983448028564, "eval_pearson_cosine": 0.779382095257283, "eval_pearson_dot": 0.6379781011095105, "eval_pearson_euclidean": 0.7372894551077778, "eval_pearson_manhattan": 0.736644904985166, "eval_runtime": 40.2485, "eval_samples_per_second": 37.268, "eval_spearman_cosine": 0.7827440097255054, "eval_spearman_dot": 0.6504351353485877, "eval_spearman_euclidean": 0.752448571801891, "eval_spearman_manhattan": 0.7524283280152466, "eval_steps_per_second": 37.268, "step": 16000 }, { "epoch": 7.502343017806935, "grad_norm": 2.1431262493133545, "learning_rate": 9.062207122774134e-06, "loss": 0.0768, "step": 16010 }, { "epoch": 7.507029053420806, "grad_norm": 1.5847636461257935, "learning_rate": 9.0616213683224e-06, "loss": 0.0772, "step": 16020 }, { "epoch": 7.511715089034677, "grad_norm": 1.5291898250579834, "learning_rate": 9.061035613870666e-06, "loss": 0.0642, "step": 16030 }, { "epoch": 7.516401124648548, "grad_norm": 1.497979998588562, "learning_rate": 9.060449859418933e-06, "loss": 0.0846, "step": 16040 }, { "epoch": 7.521087160262418, "grad_norm": 2.9073336124420166, "learning_rate": 9.059864104967199e-06, "loss": 0.0735, "step": 16050 }, { "epoch": 7.525773195876289, "grad_norm": 2.264319896697998, "learning_rate": 9.059278350515464e-06, "loss": 0.0752, "step": 16060 }, { "epoch": 7.530459231490159, "grad_norm": 1.6372432708740234, "learning_rate": 9.05869259606373e-06, "loss": 0.0842, "step": 16070 }, { "epoch": 7.5351452671040295, "grad_norm": 1.1619336605072021, "learning_rate": 9.058106841611996e-06, "loss": 0.0687, "step": 16080 }, { "epoch": 7.539831302717901, "grad_norm": 0.9320247173309326, "learning_rate": 9.057521087160263e-06, "loss": 0.0709, "step": 16090 }, { "epoch": 7.544517338331771, "grad_norm": 1.3003836870193481, "learning_rate": 9.05693533270853e-06, "loss": 0.062, "step": 16100 }, { "epoch": 7.549203373945642, "grad_norm": 1.8614999055862427, "learning_rate": 9.056349578256795e-06, "loss": 0.0713, "step": 16110 }, { "epoch": 7.553889409559512, "grad_norm": 2.049309492111206, "learning_rate": 9.055763823805062e-06, "loss": 0.0763, "step": 16120 }, { "epoch": 7.558575445173384, "grad_norm": 1.3978779315948486, "learning_rate": 9.055178069353327e-06, "loss": 0.0778, "step": 16130 }, { "epoch": 7.563261480787254, "grad_norm": 1.9440947771072388, "learning_rate": 9.054592314901594e-06, "loss": 0.0637, "step": 16140 }, { "epoch": 7.567947516401125, "grad_norm": 1.5653728246688843, "learning_rate": 9.054006560449861e-06, "loss": 0.0791, "step": 16150 }, { "epoch": 7.572633552014995, "grad_norm": 1.3674747943878174, "learning_rate": 9.053420805998126e-06, "loss": 0.0662, "step": 16160 }, { "epoch": 7.577319587628866, "grad_norm": 1.2067365646362305, "learning_rate": 9.052835051546393e-06, "loss": 0.0677, "step": 16170 }, { "epoch": 7.582005623242736, "grad_norm": 1.9453731775283813, "learning_rate": 9.052249297094658e-06, "loss": 0.0776, "step": 16180 }, { "epoch": 7.586691658856608, "grad_norm": 1.6629338264465332, "learning_rate": 9.051663542642925e-06, "loss": 0.0615, "step": 16190 }, { "epoch": 7.591377694470478, "grad_norm": 2.192781925201416, "learning_rate": 9.051077788191192e-06, "loss": 0.0762, "step": 16200 }, { "epoch": 7.5960637300843485, "grad_norm": 1.7695443630218506, "learning_rate": 9.050492033739457e-06, "loss": 0.0798, "step": 16210 }, { "epoch": 7.600749765698219, "grad_norm": 2.5343542098999023, "learning_rate": 9.049906279287723e-06, "loss": 0.0642, "step": 16220 }, { "epoch": 7.60543580131209, "grad_norm": 2.3569960594177246, "learning_rate": 9.04932052483599e-06, "loss": 0.0791, "step": 16230 }, { "epoch": 7.610121836925961, "grad_norm": 1.6627905368804932, "learning_rate": 9.048734770384255e-06, "loss": 0.0751, "step": 16240 }, { "epoch": 7.614807872539831, "grad_norm": 2.732750177383423, "learning_rate": 9.048149015932522e-06, "loss": 0.0779, "step": 16250 }, { "epoch": 7.614807872539831, "eval_loss": 0.03914293646812439, "eval_pearson_cosine": 0.7825741148617453, "eval_pearson_dot": 0.6371994676784709, "eval_pearson_euclidean": 0.7333148039136894, "eval_pearson_manhattan": 0.7325802747620713, "eval_runtime": 39.7805, "eval_samples_per_second": 37.707, "eval_spearman_cosine": 0.7845511113526636, "eval_spearman_dot": 0.6532379504600656, "eval_spearman_euclidean": 0.746661992951968, "eval_spearman_manhattan": 0.7461572363651041, "eval_steps_per_second": 37.707, "step": 16250 }, { "epoch": 7.619493908153702, "grad_norm": 1.1624847650527954, "learning_rate": 9.047563261480789e-06, "loss": 0.0677, "step": 16260 }, { "epoch": 7.624179943767572, "grad_norm": 2.173877477645874, "learning_rate": 9.046977507029054e-06, "loss": 0.0697, "step": 16270 }, { "epoch": 7.628865979381443, "grad_norm": 2.1271920204162598, "learning_rate": 9.04639175257732e-06, "loss": 0.0736, "step": 16280 }, { "epoch": 7.633552014995314, "grad_norm": 1.3459683656692505, "learning_rate": 9.045805998125586e-06, "loss": 0.082, "step": 16290 }, { "epoch": 7.638238050609185, "grad_norm": 2.0023584365844727, "learning_rate": 9.045220243673853e-06, "loss": 0.0662, "step": 16300 }, { "epoch": 7.642924086223055, "grad_norm": 1.8559486865997314, "learning_rate": 9.044634489222118e-06, "loss": 0.0685, "step": 16310 }, { "epoch": 7.647610121836926, "grad_norm": 2.1703007221221924, "learning_rate": 9.044048734770385e-06, "loss": 0.0659, "step": 16320 }, { "epoch": 7.652296157450797, "grad_norm": 2.0675439834594727, "learning_rate": 9.043462980318652e-06, "loss": 0.0708, "step": 16330 }, { "epoch": 7.6569821930646675, "grad_norm": 1.600040316581726, "learning_rate": 9.042877225866917e-06, "loss": 0.0682, "step": 16340 }, { "epoch": 7.661668228678538, "grad_norm": 2.5737037658691406, "learning_rate": 9.042291471415184e-06, "loss": 0.077, "step": 16350 }, { "epoch": 7.6663542642924085, "grad_norm": 1.909056544303894, "learning_rate": 9.04170571696345e-06, "loss": 0.076, "step": 16360 }, { "epoch": 7.671040299906279, "grad_norm": 2.5772509574890137, "learning_rate": 9.041119962511716e-06, "loss": 0.0743, "step": 16370 }, { "epoch": 7.6757263355201495, "grad_norm": 1.6890363693237305, "learning_rate": 9.040534208059981e-06, "loss": 0.0696, "step": 16380 }, { "epoch": 7.680412371134021, "grad_norm": 1.6115903854370117, "learning_rate": 9.039948453608248e-06, "loss": 0.0849, "step": 16390 }, { "epoch": 7.685098406747891, "grad_norm": 2.3779239654541016, "learning_rate": 9.039362699156514e-06, "loss": 0.0819, "step": 16400 }, { "epoch": 7.689784442361762, "grad_norm": 1.5444949865341187, "learning_rate": 9.03877694470478e-06, "loss": 0.0718, "step": 16410 }, { "epoch": 7.694470477975632, "grad_norm": 2.221595525741577, "learning_rate": 9.038191190253046e-06, "loss": 0.084, "step": 16420 }, { "epoch": 7.699156513589504, "grad_norm": 2.68977427482605, "learning_rate": 9.037605435801313e-06, "loss": 0.0828, "step": 16430 }, { "epoch": 7.703842549203374, "grad_norm": 1.9625297784805298, "learning_rate": 9.03701968134958e-06, "loss": 0.0645, "step": 16440 }, { "epoch": 7.708528584817245, "grad_norm": 1.4848051071166992, "learning_rate": 9.036433926897845e-06, "loss": 0.0775, "step": 16450 }, { "epoch": 7.713214620431115, "grad_norm": 1.2312238216400146, "learning_rate": 9.035848172446112e-06, "loss": 0.0654, "step": 16460 }, { "epoch": 7.717900656044986, "grad_norm": 2.353621006011963, "learning_rate": 9.035262417994377e-06, "loss": 0.0798, "step": 16470 }, { "epoch": 7.722586691658856, "grad_norm": 2.2561025619506836, "learning_rate": 9.034676663542644e-06, "loss": 0.0679, "step": 16480 }, { "epoch": 7.7272727272727275, "grad_norm": 1.7776751518249512, "learning_rate": 9.03409090909091e-06, "loss": 0.0619, "step": 16490 }, { "epoch": 7.731958762886598, "grad_norm": 1.9327503442764282, "learning_rate": 9.033505154639176e-06, "loss": 0.078, "step": 16500 }, { "epoch": 7.731958762886598, "eval_loss": 0.039704494178295135, "eval_pearson_cosine": 0.7809507850262349, "eval_pearson_dot": 0.636416760886064, "eval_pearson_euclidean": 0.7299738247708838, "eval_pearson_manhattan": 0.7298765220753651, "eval_runtime": 40.0275, "eval_samples_per_second": 37.474, "eval_spearman_cosine": 0.782649086170428, "eval_spearman_dot": 0.6554585356303039, "eval_spearman_euclidean": 0.7456788267720733, "eval_spearman_manhattan": 0.7461425779916862, "eval_steps_per_second": 37.474, "step": 16500 }, { "epoch": 7.7366447985004685, "grad_norm": 1.9791489839553833, "learning_rate": 9.032919400187443e-06, "loss": 0.08, "step": 16510 }, { "epoch": 7.741330834114339, "grad_norm": 2.5181267261505127, "learning_rate": 9.032333645735708e-06, "loss": 0.0822, "step": 16520 }, { "epoch": 7.74601686972821, "grad_norm": 1.2553796768188477, "learning_rate": 9.031747891283973e-06, "loss": 0.0713, "step": 16530 }, { "epoch": 7.750702905342081, "grad_norm": 2.394421100616455, "learning_rate": 9.03116213683224e-06, "loss": 0.0852, "step": 16540 }, { "epoch": 7.755388940955951, "grad_norm": 2.388476848602295, "learning_rate": 9.030576382380507e-06, "loss": 0.0703, "step": 16550 }, { "epoch": 7.760074976569822, "grad_norm": 1.3286539316177368, "learning_rate": 9.029990627928772e-06, "loss": 0.0606, "step": 16560 }, { "epoch": 7.764761012183692, "grad_norm": 2.0466766357421875, "learning_rate": 9.02940487347704e-06, "loss": 0.0729, "step": 16570 }, { "epoch": 7.769447047797563, "grad_norm": 1.3759188652038574, "learning_rate": 9.028819119025305e-06, "loss": 0.0776, "step": 16580 }, { "epoch": 7.774133083411434, "grad_norm": 1.6511011123657227, "learning_rate": 9.028233364573571e-06, "loss": 0.0728, "step": 16590 }, { "epoch": 7.778819119025305, "grad_norm": 2.05136775970459, "learning_rate": 9.027647610121838e-06, "loss": 0.0719, "step": 16600 }, { "epoch": 7.783505154639175, "grad_norm": 2.3014705181121826, "learning_rate": 9.027061855670104e-06, "loss": 0.0682, "step": 16610 }, { "epoch": 7.788191190253046, "grad_norm": 2.6752190589904785, "learning_rate": 9.02647610121837e-06, "loss": 0.0856, "step": 16620 }, { "epoch": 7.792877225866917, "grad_norm": 1.7644881010055542, "learning_rate": 9.025890346766636e-06, "loss": 0.0705, "step": 16630 }, { "epoch": 7.7975632614807875, "grad_norm": 2.1563751697540283, "learning_rate": 9.025304592314903e-06, "loss": 0.0842, "step": 16640 }, { "epoch": 7.802249297094658, "grad_norm": 1.4930392503738403, "learning_rate": 9.02471883786317e-06, "loss": 0.0759, "step": 16650 }, { "epoch": 7.8069353327085285, "grad_norm": 2.3332340717315674, "learning_rate": 9.024133083411435e-06, "loss": 0.0668, "step": 16660 }, { "epoch": 7.811621368322399, "grad_norm": 2.424914836883545, "learning_rate": 9.023547328959702e-06, "loss": 0.0619, "step": 16670 }, { "epoch": 7.816307403936269, "grad_norm": 2.246410369873047, "learning_rate": 9.022961574507967e-06, "loss": 0.0772, "step": 16680 }, { "epoch": 7.820993439550141, "grad_norm": 1.8411740064620972, "learning_rate": 9.022375820056232e-06, "loss": 0.0685, "step": 16690 }, { "epoch": 7.825679475164011, "grad_norm": 1.6910183429718018, "learning_rate": 9.021790065604499e-06, "loss": 0.0666, "step": 16700 }, { "epoch": 7.830365510777882, "grad_norm": 1.7055261135101318, "learning_rate": 9.021204311152766e-06, "loss": 0.0555, "step": 16710 }, { "epoch": 7.835051546391752, "grad_norm": 1.0138518810272217, "learning_rate": 9.020618556701031e-06, "loss": 0.0709, "step": 16720 }, { "epoch": 7.839737582005624, "grad_norm": 1.5108051300048828, "learning_rate": 9.020032802249298e-06, "loss": 0.0563, "step": 16730 }, { "epoch": 7.844423617619494, "grad_norm": 1.4900165796279907, "learning_rate": 9.019447047797563e-06, "loss": 0.0725, "step": 16740 }, { "epoch": 7.849109653233365, "grad_norm": 2.11224627494812, "learning_rate": 9.01886129334583e-06, "loss": 0.0699, "step": 16750 }, { "epoch": 7.849109653233365, "eval_loss": 0.04045228287577629, "eval_pearson_cosine": 0.7810519865633125, "eval_pearson_dot": 0.6315366018290618, "eval_pearson_euclidean": 0.7311610922445455, "eval_pearson_manhattan": 0.7308498943499657, "eval_runtime": 40.5903, "eval_samples_per_second": 36.955, "eval_spearman_cosine": 0.7836878662688926, "eval_spearman_dot": 0.642582986344888, "eval_spearman_euclidean": 0.7470082334118219, "eval_spearman_manhattan": 0.7467779627853639, "eval_steps_per_second": 36.955, "step": 16750 }, { "epoch": 7.853795688847235, "grad_norm": 1.3252798318862915, "learning_rate": 9.018275538894097e-06, "loss": 0.0691, "step": 16760 }, { "epoch": 7.858481724461106, "grad_norm": 0.9918208122253418, "learning_rate": 9.017689784442362e-06, "loss": 0.0723, "step": 16770 }, { "epoch": 7.863167760074976, "grad_norm": 2.2344889640808105, "learning_rate": 9.01710402999063e-06, "loss": 0.0739, "step": 16780 }, { "epoch": 7.8678537956888475, "grad_norm": 1.8005706071853638, "learning_rate": 9.016518275538895e-06, "loss": 0.0831, "step": 16790 }, { "epoch": 7.872539831302718, "grad_norm": 1.365945816040039, "learning_rate": 9.015932521087161e-06, "loss": 0.0759, "step": 16800 }, { "epoch": 7.877225866916588, "grad_norm": 1.3977360725402832, "learning_rate": 9.015346766635427e-06, "loss": 0.0806, "step": 16810 }, { "epoch": 7.881911902530459, "grad_norm": 1.3826375007629395, "learning_rate": 9.014761012183694e-06, "loss": 0.0744, "step": 16820 }, { "epoch": 7.88659793814433, "grad_norm": 2.0823261737823486, "learning_rate": 9.01417525773196e-06, "loss": 0.0731, "step": 16830 }, { "epoch": 7.891283973758201, "grad_norm": 1.4947584867477417, "learning_rate": 9.013589503280226e-06, "loss": 0.0788, "step": 16840 }, { "epoch": 7.895970009372071, "grad_norm": 1.659224033355713, "learning_rate": 9.013003748828491e-06, "loss": 0.0733, "step": 16850 }, { "epoch": 7.900656044985942, "grad_norm": 1.4698199033737183, "learning_rate": 9.012417994376758e-06, "loss": 0.0789, "step": 16860 }, { "epoch": 7.905342080599812, "grad_norm": 1.6106451749801636, "learning_rate": 9.011832239925025e-06, "loss": 0.0656, "step": 16870 }, { "epoch": 7.910028116213683, "grad_norm": 1.2820615768432617, "learning_rate": 9.01124648547329e-06, "loss": 0.0648, "step": 16880 }, { "epoch": 7.914714151827554, "grad_norm": 2.3736705780029297, "learning_rate": 9.010660731021557e-06, "loss": 0.0884, "step": 16890 }, { "epoch": 7.919400187441425, "grad_norm": 1.1591442823410034, "learning_rate": 9.010074976569822e-06, "loss": 0.0657, "step": 16900 }, { "epoch": 7.924086223055295, "grad_norm": 1.9707759618759155, "learning_rate": 9.009489222118089e-06, "loss": 0.0833, "step": 16910 }, { "epoch": 7.928772258669166, "grad_norm": 2.5806972980499268, "learning_rate": 9.008903467666354e-06, "loss": 0.073, "step": 16920 }, { "epoch": 7.933458294283037, "grad_norm": 0.8301031589508057, "learning_rate": 9.008317713214621e-06, "loss": 0.0694, "step": 16930 }, { "epoch": 7.938144329896907, "grad_norm": 2.491325855255127, "learning_rate": 9.007731958762888e-06, "loss": 0.07, "step": 16940 }, { "epoch": 7.942830365510778, "grad_norm": 1.3585147857666016, "learning_rate": 9.007146204311153e-06, "loss": 0.0844, "step": 16950 }, { "epoch": 7.947516401124648, "grad_norm": 0.8648898601531982, "learning_rate": 9.00656044985942e-06, "loss": 0.0706, "step": 16960 }, { "epoch": 7.952202436738519, "grad_norm": 1.6157063245773315, "learning_rate": 9.005974695407685e-06, "loss": 0.0849, "step": 16970 }, { "epoch": 7.956888472352389, "grad_norm": 2.6578354835510254, "learning_rate": 9.005388940955952e-06, "loss": 0.0785, "step": 16980 }, { "epoch": 7.961574507966261, "grad_norm": 2.8184850215911865, "learning_rate": 9.00480318650422e-06, "loss": 0.0769, "step": 16990 }, { "epoch": 7.966260543580131, "grad_norm": 1.8346798419952393, "learning_rate": 9.004217432052485e-06, "loss": 0.0735, "step": 17000 }, { "epoch": 7.966260543580131, "eval_loss": 0.03939095139503479, "eval_pearson_cosine": 0.780422600052205, "eval_pearson_dot": 0.646788551622171, "eval_pearson_euclidean": 0.7325980054422985, "eval_pearson_manhattan": 0.731991687137608, "eval_runtime": 39.8363, "eval_samples_per_second": 37.654, "eval_spearman_cosine": 0.7823114033515521, "eval_spearman_dot": 0.6607344073150395, "eval_spearman_euclidean": 0.7461718651526544, "eval_spearman_manhattan": 0.745468210963869, "eval_steps_per_second": 37.654, "step": 17000 }, { "epoch": 7.970946579194002, "grad_norm": 1.712897539138794, "learning_rate": 9.00363167760075e-06, "loss": 0.0638, "step": 17010 }, { "epoch": 7.975632614807872, "grad_norm": 1.6661624908447266, "learning_rate": 9.003045923149017e-06, "loss": 0.0721, "step": 17020 }, { "epoch": 7.980318650421744, "grad_norm": 1.8900232315063477, "learning_rate": 9.002460168697282e-06, "loss": 0.0697, "step": 17030 }, { "epoch": 7.985004686035614, "grad_norm": 1.7580076456069946, "learning_rate": 9.001874414245549e-06, "loss": 0.0829, "step": 17040 }, { "epoch": 7.989690721649485, "grad_norm": 1.0621994733810425, "learning_rate": 9.001288659793816e-06, "loss": 0.0654, "step": 17050 }, { "epoch": 7.994376757263355, "grad_norm": 2.382904529571533, "learning_rate": 9.000702905342081e-06, "loss": 0.0832, "step": 17060 }, { "epoch": 7.9990627928772255, "grad_norm": 2.5280025005340576, "learning_rate": 9.000117150890348e-06, "loss": 0.0671, "step": 17070 }, { "epoch": 8.003748828491096, "grad_norm": 1.5230673551559448, "learning_rate": 8.999531396438613e-06, "loss": 0.0705, "step": 17080 }, { "epoch": 8.008434864104967, "grad_norm": 1.430708646774292, "learning_rate": 8.99894564198688e-06, "loss": 0.0469, "step": 17090 }, { "epoch": 8.013120899718837, "grad_norm": 1.6292754411697388, "learning_rate": 8.998359887535147e-06, "loss": 0.0511, "step": 17100 }, { "epoch": 8.01780693533271, "grad_norm": 1.5162855386734009, "learning_rate": 8.997774133083412e-06, "loss": 0.0562, "step": 17110 }, { "epoch": 8.02249297094658, "grad_norm": 1.8024640083312988, "learning_rate": 8.997188378631679e-06, "loss": 0.0591, "step": 17120 }, { "epoch": 8.02717900656045, "grad_norm": 1.7311487197875977, "learning_rate": 8.996602624179944e-06, "loss": 0.0588, "step": 17130 }, { "epoch": 8.03186504217432, "grad_norm": 0.8079742193222046, "learning_rate": 8.99601686972821e-06, "loss": 0.0575, "step": 17140 }, { "epoch": 8.036551077788191, "grad_norm": 1.5149396657943726, "learning_rate": 8.995431115276478e-06, "loss": 0.0631, "step": 17150 }, { "epoch": 8.041237113402062, "grad_norm": 1.4858596324920654, "learning_rate": 8.994845360824743e-06, "loss": 0.0531, "step": 17160 }, { "epoch": 8.045923149015932, "grad_norm": 0.9805922508239746, "learning_rate": 8.994259606373009e-06, "loss": 0.0507, "step": 17170 }, { "epoch": 8.050609184629803, "grad_norm": 1.2604528665542603, "learning_rate": 8.993673851921275e-06, "loss": 0.0552, "step": 17180 }, { "epoch": 8.055295220243673, "grad_norm": 1.1252182722091675, "learning_rate": 8.99308809746954e-06, "loss": 0.05, "step": 17190 }, { "epoch": 8.059981255857544, "grad_norm": 2.151175022125244, "learning_rate": 8.992502343017808e-06, "loss": 0.0678, "step": 17200 }, { "epoch": 8.064667291471416, "grad_norm": 1.468262791633606, "learning_rate": 8.991916588566075e-06, "loss": 0.0542, "step": 17210 }, { "epoch": 8.069353327085286, "grad_norm": 1.679754376411438, "learning_rate": 8.99133083411434e-06, "loss": 0.0574, "step": 17220 }, { "epoch": 8.074039362699157, "grad_norm": 2.2520573139190674, "learning_rate": 8.990745079662607e-06, "loss": 0.0517, "step": 17230 }, { "epoch": 8.078725398313027, "grad_norm": 1.5211695432662964, "learning_rate": 8.990159325210872e-06, "loss": 0.0527, "step": 17240 }, { "epoch": 8.083411433926898, "grad_norm": 1.7770270109176636, "learning_rate": 8.989573570759139e-06, "loss": 0.0682, "step": 17250 }, { "epoch": 8.083411433926898, "eval_loss": 0.03860222175717354, "eval_pearson_cosine": 0.7845454144482034, "eval_pearson_dot": 0.6431296048602846, "eval_pearson_euclidean": 0.7311376660170836, "eval_pearson_manhattan": 0.7305724358867849, "eval_runtime": 40.1479, "eval_samples_per_second": 37.362, "eval_spearman_cosine": 0.7869140607349678, "eval_spearman_dot": 0.6613187727914379, "eval_spearman_euclidean": 0.744930207684551, "eval_spearman_manhattan": 0.7446744595094797, "eval_steps_per_second": 37.362, "step": 17250 }, { "epoch": 8.088097469540768, "grad_norm": 1.6006652116775513, "learning_rate": 8.988987816307406e-06, "loss": 0.0604, "step": 17260 }, { "epoch": 8.092783505154639, "grad_norm": 1.7531373500823975, "learning_rate": 8.988402061855671e-06, "loss": 0.063, "step": 17270 }, { "epoch": 8.09746954076851, "grad_norm": 2.294930934906006, "learning_rate": 8.987816307403938e-06, "loss": 0.0568, "step": 17280 }, { "epoch": 8.10215557638238, "grad_norm": 1.9267457723617554, "learning_rate": 8.987230552952203e-06, "loss": 0.0565, "step": 17290 }, { "epoch": 8.10684161199625, "grad_norm": 2.1076624393463135, "learning_rate": 8.986644798500468e-06, "loss": 0.05, "step": 17300 }, { "epoch": 8.111527647610123, "grad_norm": 2.3128514289855957, "learning_rate": 8.986059044048735e-06, "loss": 0.0656, "step": 17310 }, { "epoch": 8.116213683223993, "grad_norm": 1.6104718446731567, "learning_rate": 8.985473289597002e-06, "loss": 0.0554, "step": 17320 }, { "epoch": 8.120899718837864, "grad_norm": 1.1439037322998047, "learning_rate": 8.984887535145267e-06, "loss": 0.0588, "step": 17330 }, { "epoch": 8.125585754451734, "grad_norm": 1.342757225036621, "learning_rate": 8.984301780693534e-06, "loss": 0.0539, "step": 17340 }, { "epoch": 8.130271790065605, "grad_norm": 0.8630651235580444, "learning_rate": 8.9837160262418e-06, "loss": 0.0602, "step": 17350 }, { "epoch": 8.134957825679475, "grad_norm": 2.1189727783203125, "learning_rate": 8.983130271790066e-06, "loss": 0.0676, "step": 17360 }, { "epoch": 8.139643861293345, "grad_norm": 1.943943977355957, "learning_rate": 8.982544517338333e-06, "loss": 0.0564, "step": 17370 }, { "epoch": 8.144329896907216, "grad_norm": 2.4925365447998047, "learning_rate": 8.981958762886599e-06, "loss": 0.058, "step": 17380 }, { "epoch": 8.149015932521086, "grad_norm": 0.8549938797950745, "learning_rate": 8.981373008434865e-06, "loss": 0.0583, "step": 17390 }, { "epoch": 8.153701968134957, "grad_norm": 2.259129762649536, "learning_rate": 8.98078725398313e-06, "loss": 0.064, "step": 17400 }, { "epoch": 8.15838800374883, "grad_norm": 1.5482234954833984, "learning_rate": 8.980201499531398e-06, "loss": 0.0608, "step": 17410 }, { "epoch": 8.1630740393627, "grad_norm": 1.5130146741867065, "learning_rate": 8.979615745079663e-06, "loss": 0.0487, "step": 17420 }, { "epoch": 8.16776007497657, "grad_norm": 2.1378371715545654, "learning_rate": 8.97902999062793e-06, "loss": 0.0687, "step": 17430 }, { "epoch": 8.17244611059044, "grad_norm": 1.4148082733154297, "learning_rate": 8.978444236176197e-06, "loss": 0.0527, "step": 17440 }, { "epoch": 8.177132146204311, "grad_norm": 1.621864676475525, "learning_rate": 8.977858481724462e-06, "loss": 0.0541, "step": 17450 }, { "epoch": 8.181818181818182, "grad_norm": 1.5040533542633057, "learning_rate": 8.977272727272727e-06, "loss": 0.0505, "step": 17460 }, { "epoch": 8.186504217432052, "grad_norm": 1.7481443881988525, "learning_rate": 8.976686972820994e-06, "loss": 0.0658, "step": 17470 }, { "epoch": 8.191190253045923, "grad_norm": 1.6171940565109253, "learning_rate": 8.976101218369261e-06, "loss": 0.0545, "step": 17480 }, { "epoch": 8.195876288659793, "grad_norm": 3.027470350265503, "learning_rate": 8.975515463917526e-06, "loss": 0.0497, "step": 17490 }, { "epoch": 8.200562324273664, "grad_norm": 1.5770045518875122, "learning_rate": 8.974929709465793e-06, "loss": 0.0526, "step": 17500 }, { "epoch": 8.200562324273664, "eval_loss": 0.03888610377907753, "eval_pearson_cosine": 0.7824405710209184, "eval_pearson_dot": 0.6370045075889941, "eval_pearson_euclidean": 0.7275292814047258, "eval_pearson_manhattan": 0.7271617935348544, "eval_runtime": 40.0736, "eval_samples_per_second": 37.431, "eval_spearman_cosine": 0.7832356097193793, "eval_spearman_dot": 0.6538613957323862, "eval_spearman_euclidean": 0.7430558622725291, "eval_spearman_manhattan": 0.7430619174369794, "eval_steps_per_second": 37.431, "step": 17500 }, { "epoch": 8.205248359887536, "grad_norm": 2.2228381633758545, "learning_rate": 8.974343955014058e-06, "loss": 0.0457, "step": 17510 }, { "epoch": 8.209934395501406, "grad_norm": 2.4519641399383545, "learning_rate": 8.973758200562325e-06, "loss": 0.051, "step": 17520 }, { "epoch": 8.214620431115277, "grad_norm": 1.8084455728530884, "learning_rate": 8.97317244611059e-06, "loss": 0.0575, "step": 17530 }, { "epoch": 8.219306466729147, "grad_norm": 1.3803386688232422, "learning_rate": 8.972586691658857e-06, "loss": 0.053, "step": 17540 }, { "epoch": 8.223992502343018, "grad_norm": 1.3450793027877808, "learning_rate": 8.972000937207124e-06, "loss": 0.0556, "step": 17550 }, { "epoch": 8.228678537956888, "grad_norm": 2.0758721828460693, "learning_rate": 8.97141518275539e-06, "loss": 0.0521, "step": 17560 }, { "epoch": 8.233364573570759, "grad_norm": 1.4197956323623657, "learning_rate": 8.970829428303656e-06, "loss": 0.0631, "step": 17570 }, { "epoch": 8.23805060918463, "grad_norm": 1.497050166130066, "learning_rate": 8.970243673851922e-06, "loss": 0.0614, "step": 17580 }, { "epoch": 8.2427366447985, "grad_norm": 1.0769314765930176, "learning_rate": 8.969657919400189e-06, "loss": 0.0587, "step": 17590 }, { "epoch": 8.24742268041237, "grad_norm": 1.9401723146438599, "learning_rate": 8.969072164948455e-06, "loss": 0.0584, "step": 17600 }, { "epoch": 8.252108716026243, "grad_norm": 0.6708168387413025, "learning_rate": 8.96848641049672e-06, "loss": 0.058, "step": 17610 }, { "epoch": 8.256794751640113, "grad_norm": 1.555535912513733, "learning_rate": 8.967900656044986e-06, "loss": 0.0623, "step": 17620 }, { "epoch": 8.261480787253983, "grad_norm": 1.182997703552246, "learning_rate": 8.967314901593253e-06, "loss": 0.0521, "step": 17630 }, { "epoch": 8.266166822867854, "grad_norm": 1.7748857736587524, "learning_rate": 8.966729147141518e-06, "loss": 0.0573, "step": 17640 }, { "epoch": 8.270852858481724, "grad_norm": 1.558457851409912, "learning_rate": 8.966143392689785e-06, "loss": 0.0586, "step": 17650 }, { "epoch": 8.275538894095595, "grad_norm": 2.463069438934326, "learning_rate": 8.965557638238052e-06, "loss": 0.0581, "step": 17660 }, { "epoch": 8.280224929709465, "grad_norm": 1.325049877166748, "learning_rate": 8.964971883786317e-06, "loss": 0.0526, "step": 17670 }, { "epoch": 8.284910965323336, "grad_norm": 1.9136682748794556, "learning_rate": 8.964386129334584e-06, "loss": 0.0717, "step": 17680 }, { "epoch": 8.289597000937206, "grad_norm": 0.9149712920188904, "learning_rate": 8.96380037488285e-06, "loss": 0.0551, "step": 17690 }, { "epoch": 8.294283036551079, "grad_norm": 1.0004934072494507, "learning_rate": 8.963214620431116e-06, "loss": 0.0552, "step": 17700 }, { "epoch": 8.29896907216495, "grad_norm": 2.1920504570007324, "learning_rate": 8.962628865979383e-06, "loss": 0.0631, "step": 17710 }, { "epoch": 8.30365510777882, "grad_norm": 1.7555533647537231, "learning_rate": 8.962043111527648e-06, "loss": 0.0643, "step": 17720 }, { "epoch": 8.30834114339269, "grad_norm": 1.980637550354004, "learning_rate": 8.961457357075915e-06, "loss": 0.0594, "step": 17730 }, { "epoch": 8.31302717900656, "grad_norm": 1.4178955554962158, "learning_rate": 8.96087160262418e-06, "loss": 0.0584, "step": 17740 }, { "epoch": 8.317713214620431, "grad_norm": 1.375645399093628, "learning_rate": 8.960285848172446e-06, "loss": 0.0558, "step": 17750 }, { "epoch": 8.317713214620431, "eval_loss": 0.03849739581346512, "eval_pearson_cosine": 0.7855877317949194, "eval_pearson_dot": 0.651727283647233, "eval_pearson_euclidean": 0.7376296235813697, "eval_pearson_manhattan": 0.7370097948427539, "eval_runtime": 40.7984, "eval_samples_per_second": 36.766, "eval_spearman_cosine": 0.7865254359033228, "eval_spearman_dot": 0.6678553912046729, "eval_spearman_euclidean": 0.7518223898617357, "eval_spearman_manhattan": 0.7512717468993468, "eval_steps_per_second": 36.766, "step": 17750 }, { "epoch": 8.322399250234302, "grad_norm": 1.6528228521347046, "learning_rate": 8.959700093720714e-06, "loss": 0.0671, "step": 17760 }, { "epoch": 8.327085285848172, "grad_norm": 1.526089072227478, "learning_rate": 8.95911433926898e-06, "loss": 0.0661, "step": 17770 }, { "epoch": 8.331771321462043, "grad_norm": 1.9455267190933228, "learning_rate": 8.958528584817245e-06, "loss": 0.059, "step": 17780 }, { "epoch": 8.336457357075913, "grad_norm": 2.1176974773406982, "learning_rate": 8.957942830365512e-06, "loss": 0.0628, "step": 17790 }, { "epoch": 8.341143392689784, "grad_norm": 1.9059792757034302, "learning_rate": 8.957357075913777e-06, "loss": 0.0547, "step": 17800 }, { "epoch": 8.345829428303656, "grad_norm": 1.9086081981658936, "learning_rate": 8.956771321462044e-06, "loss": 0.0598, "step": 17810 }, { "epoch": 8.350515463917526, "grad_norm": 1.835897445678711, "learning_rate": 8.95618556701031e-06, "loss": 0.0528, "step": 17820 }, { "epoch": 8.355201499531397, "grad_norm": 1.4925363063812256, "learning_rate": 8.955599812558576e-06, "loss": 0.054, "step": 17830 }, { "epoch": 8.359887535145267, "grad_norm": 1.8737494945526123, "learning_rate": 8.955014058106843e-06, "loss": 0.0592, "step": 17840 }, { "epoch": 8.364573570759138, "grad_norm": 2.0734856128692627, "learning_rate": 8.954428303655108e-06, "loss": 0.0577, "step": 17850 }, { "epoch": 8.369259606373008, "grad_norm": 1.1876471042633057, "learning_rate": 8.953842549203375e-06, "loss": 0.0529, "step": 17860 }, { "epoch": 8.373945641986879, "grad_norm": 0.8391751646995544, "learning_rate": 8.953256794751642e-06, "loss": 0.0513, "step": 17870 }, { "epoch": 8.37863167760075, "grad_norm": 2.0527615547180176, "learning_rate": 8.952671040299907e-06, "loss": 0.0802, "step": 17880 }, { "epoch": 8.38331771321462, "grad_norm": 1.1670820713043213, "learning_rate": 8.952085285848174e-06, "loss": 0.0567, "step": 17890 }, { "epoch": 8.388003748828492, "grad_norm": 1.0440400838851929, "learning_rate": 8.95149953139644e-06, "loss": 0.0589, "step": 17900 }, { "epoch": 8.392689784442362, "grad_norm": 1.3903789520263672, "learning_rate": 8.950913776944704e-06, "loss": 0.0555, "step": 17910 }, { "epoch": 8.397375820056233, "grad_norm": 2.042224407196045, "learning_rate": 8.950328022492971e-06, "loss": 0.0705, "step": 17920 }, { "epoch": 8.402061855670103, "grad_norm": 1.8270450830459595, "learning_rate": 8.949742268041238e-06, "loss": 0.0568, "step": 17930 }, { "epoch": 8.406747891283974, "grad_norm": 1.7498126029968262, "learning_rate": 8.949156513589504e-06, "loss": 0.0584, "step": 17940 }, { "epoch": 8.411433926897844, "grad_norm": 1.2420893907546997, "learning_rate": 8.94857075913777e-06, "loss": 0.06, "step": 17950 }, { "epoch": 8.416119962511715, "grad_norm": 1.9896409511566162, "learning_rate": 8.947985004686036e-06, "loss": 0.0505, "step": 17960 }, { "epoch": 8.420805998125585, "grad_norm": 1.1669880151748657, "learning_rate": 8.947399250234303e-06, "loss": 0.0595, "step": 17970 }, { "epoch": 8.425492033739456, "grad_norm": 1.2261865139007568, "learning_rate": 8.94681349578257e-06, "loss": 0.0604, "step": 17980 }, { "epoch": 8.430178069353326, "grad_norm": 1.5421935319900513, "learning_rate": 8.946227741330835e-06, "loss": 0.0621, "step": 17990 }, { "epoch": 8.434864104967197, "grad_norm": 1.9026983976364136, "learning_rate": 8.945641986879102e-06, "loss": 0.0633, "step": 18000 }, { "epoch": 8.434864104967197, "eval_loss": 0.039177875965833664, "eval_pearson_cosine": 0.7822495113035757, "eval_pearson_dot": 0.6511666258149553, "eval_pearson_euclidean": 0.7395462188066446, "eval_pearson_manhattan": 0.7387984914454222, "eval_runtime": 42.2768, "eval_samples_per_second": 35.48, "eval_spearman_cosine": 0.7845228935533591, "eval_spearman_dot": 0.6664111108433938, "eval_spearman_euclidean": 0.7541690232038317, "eval_spearman_manhattan": 0.7537307168421792, "eval_steps_per_second": 35.48, "step": 18000 }, { "epoch": 8.43955014058107, "grad_norm": 0.9349134564399719, "learning_rate": 8.945056232427367e-06, "loss": 0.0479, "step": 18010 }, { "epoch": 8.44423617619494, "grad_norm": 0.9666185975074768, "learning_rate": 8.944470477975634e-06, "loss": 0.0595, "step": 18020 }, { "epoch": 8.44892221180881, "grad_norm": 2.2687034606933594, "learning_rate": 8.943884723523899e-06, "loss": 0.055, "step": 18030 }, { "epoch": 8.45360824742268, "grad_norm": 1.9229964017868042, "learning_rate": 8.943298969072166e-06, "loss": 0.0602, "step": 18040 }, { "epoch": 8.458294283036551, "grad_norm": 2.1603922843933105, "learning_rate": 8.942713214620433e-06, "loss": 0.0736, "step": 18050 }, { "epoch": 8.462980318650422, "grad_norm": 2.0796990394592285, "learning_rate": 8.942127460168698e-06, "loss": 0.0569, "step": 18060 }, { "epoch": 8.467666354264292, "grad_norm": 1.9972143173217773, "learning_rate": 8.941541705716963e-06, "loss": 0.0678, "step": 18070 }, { "epoch": 8.472352389878163, "grad_norm": 0.865214467048645, "learning_rate": 8.94095595126523e-06, "loss": 0.0627, "step": 18080 }, { "epoch": 8.477038425492033, "grad_norm": 2.142777442932129, "learning_rate": 8.940370196813497e-06, "loss": 0.0674, "step": 18090 }, { "epoch": 8.481724461105905, "grad_norm": 1.4264150857925415, "learning_rate": 8.939784442361762e-06, "loss": 0.0671, "step": 18100 }, { "epoch": 8.486410496719776, "grad_norm": 0.7674472332000732, "learning_rate": 8.93919868791003e-06, "loss": 0.0559, "step": 18110 }, { "epoch": 8.491096532333646, "grad_norm": 0.81045001745224, "learning_rate": 8.938612933458294e-06, "loss": 0.0587, "step": 18120 }, { "epoch": 8.495782567947517, "grad_norm": 1.4473369121551514, "learning_rate": 8.938027179006561e-06, "loss": 0.0613, "step": 18130 }, { "epoch": 8.500468603561387, "grad_norm": 1.741360068321228, "learning_rate": 8.937441424554827e-06, "loss": 0.0638, "step": 18140 }, { "epoch": 8.505154639175258, "grad_norm": 2.0288760662078857, "learning_rate": 8.936855670103094e-06, "loss": 0.0586, "step": 18150 }, { "epoch": 8.509840674789128, "grad_norm": 2.380078077316284, "learning_rate": 8.93626991565136e-06, "loss": 0.0612, "step": 18160 }, { "epoch": 8.514526710402999, "grad_norm": 1.9383304119110107, "learning_rate": 8.935684161199626e-06, "loss": 0.0601, "step": 18170 }, { "epoch": 8.51921274601687, "grad_norm": 1.2531105279922485, "learning_rate": 8.935098406747893e-06, "loss": 0.0719, "step": 18180 }, { "epoch": 8.52389878163074, "grad_norm": 0.9482662081718445, "learning_rate": 8.934512652296158e-06, "loss": 0.0546, "step": 18190 }, { "epoch": 8.52858481724461, "grad_norm": 1.4870429039001465, "learning_rate": 8.933926897844423e-06, "loss": 0.0568, "step": 18200 }, { "epoch": 8.533270852858482, "grad_norm": 1.6849005222320557, "learning_rate": 8.933341143392692e-06, "loss": 0.0555, "step": 18210 }, { "epoch": 8.537956888472353, "grad_norm": 1.955870270729065, "learning_rate": 8.932755388940957e-06, "loss": 0.0689, "step": 18220 }, { "epoch": 8.542642924086223, "grad_norm": 1.697548747062683, "learning_rate": 8.932169634489222e-06, "loss": 0.056, "step": 18230 }, { "epoch": 8.547328959700094, "grad_norm": 1.673592209815979, "learning_rate": 8.931583880037489e-06, "loss": 0.0705, "step": 18240 }, { "epoch": 8.552014995313964, "grad_norm": 1.5452814102172852, "learning_rate": 8.930998125585754e-06, "loss": 0.0568, "step": 18250 }, { "epoch": 8.552014995313964, "eval_loss": 0.038943566381931305, "eval_pearson_cosine": 0.7825822620756648, "eval_pearson_dot": 0.6378466620068579, "eval_pearson_euclidean": 0.736198412680281, "eval_pearson_manhattan": 0.7358221479501772, "eval_runtime": 39.5923, "eval_samples_per_second": 37.886, "eval_spearman_cosine": 0.7830920224286129, "eval_spearman_dot": 0.6535736820096772, "eval_spearman_euclidean": 0.7509264123559705, "eval_spearman_manhattan": 0.7510068056516, "eval_steps_per_second": 37.886, "step": 18250 }, { "epoch": 8.556701030927835, "grad_norm": 2.1532504558563232, "learning_rate": 8.930412371134021e-06, "loss": 0.0737, "step": 18260 }, { "epoch": 8.561387066541705, "grad_norm": 1.189831256866455, "learning_rate": 8.929826616682288e-06, "loss": 0.0641, "step": 18270 }, { "epoch": 8.566073102155576, "grad_norm": 1.0703136920928955, "learning_rate": 8.929240862230553e-06, "loss": 0.0597, "step": 18280 }, { "epoch": 8.570759137769446, "grad_norm": 1.7828891277313232, "learning_rate": 8.92865510777882e-06, "loss": 0.056, "step": 18290 }, { "epoch": 8.575445173383319, "grad_norm": 1.6652967929840088, "learning_rate": 8.928069353327085e-06, "loss": 0.0587, "step": 18300 }, { "epoch": 8.580131208997189, "grad_norm": 1.5879887342453003, "learning_rate": 8.927483598875352e-06, "loss": 0.0652, "step": 18310 }, { "epoch": 8.58481724461106, "grad_norm": 1.400453805923462, "learning_rate": 8.92689784442362e-06, "loss": 0.0589, "step": 18320 }, { "epoch": 8.58950328022493, "grad_norm": 1.2532896995544434, "learning_rate": 8.926312089971884e-06, "loss": 0.0538, "step": 18330 }, { "epoch": 8.5941893158388, "grad_norm": 1.3725074529647827, "learning_rate": 8.925726335520151e-06, "loss": 0.0458, "step": 18340 }, { "epoch": 8.598875351452671, "grad_norm": 0.8545303344726562, "learning_rate": 8.925140581068417e-06, "loss": 0.054, "step": 18350 }, { "epoch": 8.603561387066541, "grad_norm": 2.2644894123077393, "learning_rate": 8.924554826616682e-06, "loss": 0.0727, "step": 18360 }, { "epoch": 8.608247422680412, "grad_norm": 2.0160939693450928, "learning_rate": 8.92396907216495e-06, "loss": 0.0716, "step": 18370 }, { "epoch": 8.612933458294282, "grad_norm": 1.2805579900741577, "learning_rate": 8.923383317713216e-06, "loss": 0.06, "step": 18380 }, { "epoch": 8.617619493908153, "grad_norm": 2.359361410140991, "learning_rate": 8.922797563261481e-06, "loss": 0.0568, "step": 18390 }, { "epoch": 8.622305529522023, "grad_norm": 2.423886775970459, "learning_rate": 8.922211808809748e-06, "loss": 0.0732, "step": 18400 }, { "epoch": 8.626991565135896, "grad_norm": 1.2898362874984741, "learning_rate": 8.921626054358013e-06, "loss": 0.0566, "step": 18410 }, { "epoch": 8.631677600749766, "grad_norm": 0.6553903818130493, "learning_rate": 8.92104029990628e-06, "loss": 0.0507, "step": 18420 }, { "epoch": 8.636363636363637, "grad_norm": 1.3605937957763672, "learning_rate": 8.920454545454547e-06, "loss": 0.0581, "step": 18430 }, { "epoch": 8.641049671977507, "grad_norm": 1.9910422563552856, "learning_rate": 8.919868791002812e-06, "loss": 0.0566, "step": 18440 }, { "epoch": 8.645735707591378, "grad_norm": 2.0107765197753906, "learning_rate": 8.919283036551079e-06, "loss": 0.068, "step": 18450 }, { "epoch": 8.650421743205248, "grad_norm": 1.168728232383728, "learning_rate": 8.918697282099344e-06, "loss": 0.0589, "step": 18460 }, { "epoch": 8.655107778819119, "grad_norm": 2.3766093254089355, "learning_rate": 8.918111527647611e-06, "loss": 0.061, "step": 18470 }, { "epoch": 8.65979381443299, "grad_norm": 1.6704158782958984, "learning_rate": 8.917525773195878e-06, "loss": 0.0629, "step": 18480 }, { "epoch": 8.66447985004686, "grad_norm": 1.9102870225906372, "learning_rate": 8.916940018744143e-06, "loss": 0.0464, "step": 18490 }, { "epoch": 8.669165885660732, "grad_norm": 2.742626428604126, "learning_rate": 8.91635426429241e-06, "loss": 0.0645, "step": 18500 }, { "epoch": 8.669165885660732, "eval_loss": 0.03774439916014671, "eval_pearson_cosine": 0.7887750445614863, "eval_pearson_dot": 0.6513653629224123, "eval_pearson_euclidean": 0.7319388075486906, "eval_pearson_manhattan": 0.7314905753471947, "eval_runtime": 40.5433, "eval_samples_per_second": 36.998, "eval_spearman_cosine": 0.7892064111202951, "eval_spearman_dot": 0.6704252435211006, "eval_spearman_euclidean": 0.7498699934549212, "eval_spearman_manhattan": 0.7495320910792913, "eval_steps_per_second": 36.998, "step": 18500 }, { "epoch": 8.673851921274602, "grad_norm": 1.4276272058486938, "learning_rate": 8.915768509840675e-06, "loss": 0.0466, "step": 18510 }, { "epoch": 8.678537956888473, "grad_norm": 1.780705451965332, "learning_rate": 8.91518275538894e-06, "loss": 0.073, "step": 18520 }, { "epoch": 8.683223992502343, "grad_norm": 1.422787070274353, "learning_rate": 8.914597000937208e-06, "loss": 0.0662, "step": 18530 }, { "epoch": 8.687910028116214, "grad_norm": 1.8989777565002441, "learning_rate": 8.914011246485474e-06, "loss": 0.053, "step": 18540 }, { "epoch": 8.692596063730084, "grad_norm": 1.208201289176941, "learning_rate": 8.91342549203374e-06, "loss": 0.0557, "step": 18550 }, { "epoch": 8.697282099343955, "grad_norm": 1.4029545783996582, "learning_rate": 8.912839737582007e-06, "loss": 0.0498, "step": 18560 }, { "epoch": 8.701968134957825, "grad_norm": 1.4905900955200195, "learning_rate": 8.912253983130272e-06, "loss": 0.0586, "step": 18570 }, { "epoch": 8.706654170571696, "grad_norm": 1.494296669960022, "learning_rate": 8.911668228678539e-06, "loss": 0.0597, "step": 18580 }, { "epoch": 8.711340206185566, "grad_norm": 1.8540481328964233, "learning_rate": 8.911082474226806e-06, "loss": 0.06, "step": 18590 }, { "epoch": 8.716026241799437, "grad_norm": 0.9429871439933777, "learning_rate": 8.910496719775071e-06, "loss": 0.0708, "step": 18600 }, { "epoch": 8.720712277413309, "grad_norm": 1.333791732788086, "learning_rate": 8.909910965323338e-06, "loss": 0.0583, "step": 18610 }, { "epoch": 8.72539831302718, "grad_norm": 1.1609207391738892, "learning_rate": 8.909325210871603e-06, "loss": 0.0499, "step": 18620 }, { "epoch": 8.73008434864105, "grad_norm": 1.9390841722488403, "learning_rate": 8.90873945641987e-06, "loss": 0.0751, "step": 18630 }, { "epoch": 8.73477038425492, "grad_norm": 1.693433165550232, "learning_rate": 8.908153701968135e-06, "loss": 0.0685, "step": 18640 }, { "epoch": 8.739456419868791, "grad_norm": 1.7784210443496704, "learning_rate": 8.907567947516402e-06, "loss": 0.0524, "step": 18650 }, { "epoch": 8.744142455482661, "grad_norm": 1.4945738315582275, "learning_rate": 8.906982193064669e-06, "loss": 0.064, "step": 18660 }, { "epoch": 8.748828491096532, "grad_norm": 1.7549676895141602, "learning_rate": 8.906396438612934e-06, "loss": 0.0634, "step": 18670 }, { "epoch": 8.753514526710402, "grad_norm": 1.1789377927780151, "learning_rate": 8.9058106841612e-06, "loss": 0.0597, "step": 18680 }, { "epoch": 8.758200562324273, "grad_norm": 1.983936071395874, "learning_rate": 8.905224929709466e-06, "loss": 0.0614, "step": 18690 }, { "epoch": 8.762886597938145, "grad_norm": 2.564476251602173, "learning_rate": 8.904639175257732e-06, "loss": 0.0711, "step": 18700 }, { "epoch": 8.767572633552016, "grad_norm": 0.5671543478965759, "learning_rate": 8.904053420805998e-06, "loss": 0.0586, "step": 18710 }, { "epoch": 8.772258669165886, "grad_norm": 1.1714857816696167, "learning_rate": 8.903467666354265e-06, "loss": 0.06, "step": 18720 }, { "epoch": 8.776944704779757, "grad_norm": 1.8699477910995483, "learning_rate": 8.90288191190253e-06, "loss": 0.052, "step": 18730 }, { "epoch": 8.781630740393627, "grad_norm": 1.0824236869812012, "learning_rate": 8.902296157450798e-06, "loss": 0.0638, "step": 18740 }, { "epoch": 8.786316776007498, "grad_norm": 1.3703303337097168, "learning_rate": 8.901710402999063e-06, "loss": 0.0563, "step": 18750 }, { "epoch": 8.786316776007498, "eval_loss": 0.037630029022693634, "eval_pearson_cosine": 0.7870129329535697, "eval_pearson_dot": 0.6393485188875303, "eval_pearson_euclidean": 0.7289305204204517, "eval_pearson_manhattan": 0.7285165698261729, "eval_runtime": 40.5046, "eval_samples_per_second": 37.033, "eval_spearman_cosine": 0.7878034848552876, "eval_spearman_dot": 0.6605642491363777, "eval_spearman_euclidean": 0.7454305721470555, "eval_spearman_manhattan": 0.745136975852769, "eval_steps_per_second": 37.033, "step": 18750 }, { "epoch": 8.791002811621368, "grad_norm": 1.745339035987854, "learning_rate": 8.90112464854733e-06, "loss": 0.0566, "step": 18760 }, { "epoch": 8.795688847235239, "grad_norm": 1.5828258991241455, "learning_rate": 8.900538894095597e-06, "loss": 0.0602, "step": 18770 }, { "epoch": 8.800374882849109, "grad_norm": 1.4292279481887817, "learning_rate": 8.899953139643862e-06, "loss": 0.0638, "step": 18780 }, { "epoch": 8.80506091846298, "grad_norm": 1.956358790397644, "learning_rate": 8.899367385192129e-06, "loss": 0.0667, "step": 18790 }, { "epoch": 8.80974695407685, "grad_norm": 0.9023747444152832, "learning_rate": 8.898781630740394e-06, "loss": 0.0662, "step": 18800 }, { "epoch": 8.814432989690722, "grad_norm": 2.1007392406463623, "learning_rate": 8.89819587628866e-06, "loss": 0.0561, "step": 18810 }, { "epoch": 8.819119025304593, "grad_norm": 2.0597100257873535, "learning_rate": 8.897610121836928e-06, "loss": 0.0666, "step": 18820 }, { "epoch": 8.823805060918463, "grad_norm": 1.1200934648513794, "learning_rate": 8.897024367385193e-06, "loss": 0.0538, "step": 18830 }, { "epoch": 8.828491096532334, "grad_norm": 2.032970428466797, "learning_rate": 8.896438612933458e-06, "loss": 0.0734, "step": 18840 }, { "epoch": 8.833177132146204, "grad_norm": 1.5491752624511719, "learning_rate": 8.895852858481725e-06, "loss": 0.0638, "step": 18850 }, { "epoch": 8.837863167760075, "grad_norm": 0.7450467348098755, "learning_rate": 8.89526710402999e-06, "loss": 0.0752, "step": 18860 }, { "epoch": 8.842549203373945, "grad_norm": 1.0671043395996094, "learning_rate": 8.894681349578257e-06, "loss": 0.0562, "step": 18870 }, { "epoch": 8.847235238987816, "grad_norm": 1.3302968740463257, "learning_rate": 8.894095595126524e-06, "loss": 0.0573, "step": 18880 }, { "epoch": 8.851921274601686, "grad_norm": 1.423279881477356, "learning_rate": 8.89350984067479e-06, "loss": 0.0645, "step": 18890 }, { "epoch": 8.856607310215558, "grad_norm": 1.1250574588775635, "learning_rate": 8.892924086223056e-06, "loss": 0.0616, "step": 18900 }, { "epoch": 8.861293345829429, "grad_norm": 1.3438372611999512, "learning_rate": 8.892338331771322e-06, "loss": 0.0525, "step": 18910 }, { "epoch": 8.8659793814433, "grad_norm": 1.5097957849502563, "learning_rate": 8.891752577319588e-06, "loss": 0.0593, "step": 18920 }, { "epoch": 8.87066541705717, "grad_norm": 1.9522205591201782, "learning_rate": 8.891166822867855e-06, "loss": 0.0674, "step": 18930 }, { "epoch": 8.87535145267104, "grad_norm": 1.1841950416564941, "learning_rate": 8.89058106841612e-06, "loss": 0.0641, "step": 18940 }, { "epoch": 8.880037488284911, "grad_norm": 1.658074140548706, "learning_rate": 8.889995313964388e-06, "loss": 0.0737, "step": 18950 }, { "epoch": 8.884723523898781, "grad_norm": 1.5924397706985474, "learning_rate": 8.889409559512653e-06, "loss": 0.07, "step": 18960 }, { "epoch": 8.889409559512652, "grad_norm": 1.379166603088379, "learning_rate": 8.888823805060918e-06, "loss": 0.0689, "step": 18970 }, { "epoch": 8.894095595126522, "grad_norm": 1.3292274475097656, "learning_rate": 8.888238050609187e-06, "loss": 0.0564, "step": 18980 }, { "epoch": 8.898781630740393, "grad_norm": 1.4383434057235718, "learning_rate": 8.887652296157452e-06, "loss": 0.0583, "step": 18990 }, { "epoch": 8.903467666354265, "grad_norm": 2.1288797855377197, "learning_rate": 8.887066541705717e-06, "loss": 0.0669, "step": 19000 }, { "epoch": 8.903467666354265, "eval_loss": 0.03827948495745659, "eval_pearson_cosine": 0.7850468616972819, "eval_pearson_dot": 0.6358914679070722, "eval_pearson_euclidean": 0.7244498308050709, "eval_pearson_manhattan": 0.7238488356503296, "eval_runtime": 40.8545, "eval_samples_per_second": 36.716, "eval_spearman_cosine": 0.7865593789879696, "eval_spearman_dot": 0.6571265794919958, "eval_spearman_euclidean": 0.7437161421017117, "eval_spearman_manhattan": 0.7432616809242956, "eval_steps_per_second": 36.716, "step": 19000 }, { "epoch": 8.908153701968136, "grad_norm": 1.1705414056777954, "learning_rate": 8.886480787253984e-06, "loss": 0.0549, "step": 19010 }, { "epoch": 8.912839737582006, "grad_norm": 2.2512776851654053, "learning_rate": 8.885895032802249e-06, "loss": 0.0715, "step": 19020 }, { "epoch": 8.917525773195877, "grad_norm": 1.7541801929473877, "learning_rate": 8.885309278350516e-06, "loss": 0.0657, "step": 19030 }, { "epoch": 8.922211808809747, "grad_norm": 1.3972922563552856, "learning_rate": 8.884723523898783e-06, "loss": 0.0516, "step": 19040 }, { "epoch": 8.926897844423618, "grad_norm": 0.9502004384994507, "learning_rate": 8.884137769447048e-06, "loss": 0.0664, "step": 19050 }, { "epoch": 8.931583880037488, "grad_norm": 2.1048943996429443, "learning_rate": 8.883552014995315e-06, "loss": 0.0668, "step": 19060 }, { "epoch": 8.936269915651359, "grad_norm": 2.441774368286133, "learning_rate": 8.88296626054358e-06, "loss": 0.0681, "step": 19070 }, { "epoch": 8.940955951265229, "grad_norm": 1.6815327405929565, "learning_rate": 8.882380506091847e-06, "loss": 0.0583, "step": 19080 }, { "epoch": 8.9456419868791, "grad_norm": 2.0613820552825928, "learning_rate": 8.881794751640114e-06, "loss": 0.0605, "step": 19090 }, { "epoch": 8.950328022492972, "grad_norm": 2.164487838745117, "learning_rate": 8.88120899718838e-06, "loss": 0.0749, "step": 19100 }, { "epoch": 8.955014058106842, "grad_norm": 1.838508129119873, "learning_rate": 8.880623242736646e-06, "loss": 0.0588, "step": 19110 }, { "epoch": 8.959700093720713, "grad_norm": 2.338103771209717, "learning_rate": 8.880037488284912e-06, "loss": 0.06, "step": 19120 }, { "epoch": 8.964386129334583, "grad_norm": 1.5945453643798828, "learning_rate": 8.879451733833177e-06, "loss": 0.062, "step": 19130 }, { "epoch": 8.969072164948454, "grad_norm": 1.4666954278945923, "learning_rate": 8.878865979381444e-06, "loss": 0.0593, "step": 19140 }, { "epoch": 8.973758200562324, "grad_norm": 2.2944576740264893, "learning_rate": 8.87828022492971e-06, "loss": 0.0621, "step": 19150 }, { "epoch": 8.978444236176195, "grad_norm": 1.8283967971801758, "learning_rate": 8.877694470477976e-06, "loss": 0.0646, "step": 19160 }, { "epoch": 8.983130271790065, "grad_norm": 1.3863099813461304, "learning_rate": 8.877108716026243e-06, "loss": 0.0524, "step": 19170 }, { "epoch": 8.987816307403936, "grad_norm": 2.2185399532318115, "learning_rate": 8.876522961574508e-06, "loss": 0.0741, "step": 19180 }, { "epoch": 8.992502343017806, "grad_norm": 1.0174760818481445, "learning_rate": 8.875937207122775e-06, "loss": 0.0613, "step": 19190 }, { "epoch": 8.997188378631678, "grad_norm": 2.3824729919433594, "learning_rate": 8.87535145267104e-06, "loss": 0.0588, "step": 19200 }, { "epoch": 9.001874414245549, "grad_norm": 0.5327388048171997, "learning_rate": 8.874765698219307e-06, "loss": 0.0531, "step": 19210 }, { "epoch": 9.00656044985942, "grad_norm": 1.4109519720077515, "learning_rate": 8.874179943767574e-06, "loss": 0.0435, "step": 19220 }, { "epoch": 9.01124648547329, "grad_norm": 1.2190899848937988, "learning_rate": 8.873594189315839e-06, "loss": 0.0499, "step": 19230 }, { "epoch": 9.01593252108716, "grad_norm": 1.7541508674621582, "learning_rate": 8.873008434864106e-06, "loss": 0.046, "step": 19240 }, { "epoch": 9.02061855670103, "grad_norm": 0.7862921953201294, "learning_rate": 8.872422680412371e-06, "loss": 0.0436, "step": 19250 }, { "epoch": 9.02061855670103, "eval_loss": 0.03772435337305069, "eval_pearson_cosine": 0.7855273741960076, "eval_pearson_dot": 0.6489285063428554, "eval_pearson_euclidean": 0.7292628651619779, "eval_pearson_manhattan": 0.728854161244513, "eval_runtime": 39.9971, "eval_samples_per_second": 37.503, "eval_spearman_cosine": 0.7855635384218336, "eval_spearman_dot": 0.6695729920402427, "eval_spearman_euclidean": 0.7464551903137364, "eval_spearman_manhattan": 0.7461578348935499, "eval_steps_per_second": 37.503, "step": 19250 }, { "epoch": 9.025304592314901, "grad_norm": 1.5160027742385864, "learning_rate": 8.871836925960638e-06, "loss": 0.0402, "step": 19260 }, { "epoch": 9.029990627928772, "grad_norm": 0.9218592047691345, "learning_rate": 8.871251171508905e-06, "loss": 0.0548, "step": 19270 }, { "epoch": 9.034676663542642, "grad_norm": 1.078574299812317, "learning_rate": 8.87066541705717e-06, "loss": 0.0501, "step": 19280 }, { "epoch": 9.039362699156513, "grad_norm": 1.2724040746688843, "learning_rate": 8.870079662605436e-06, "loss": 0.0455, "step": 19290 }, { "epoch": 9.044048734770385, "grad_norm": 1.7886642217636108, "learning_rate": 8.869493908153702e-06, "loss": 0.0483, "step": 19300 }, { "epoch": 9.048734770384256, "grad_norm": 2.028181552886963, "learning_rate": 8.868908153701968e-06, "loss": 0.0453, "step": 19310 }, { "epoch": 9.053420805998126, "grad_norm": 1.9034656286239624, "learning_rate": 8.868322399250235e-06, "loss": 0.049, "step": 19320 }, { "epoch": 9.058106841611997, "grad_norm": 1.34951651096344, "learning_rate": 8.867736644798502e-06, "loss": 0.0486, "step": 19330 }, { "epoch": 9.062792877225867, "grad_norm": 2.447568655014038, "learning_rate": 8.867150890346767e-06, "loss": 0.0464, "step": 19340 }, { "epoch": 9.067478912839738, "grad_norm": 0.8183003664016724, "learning_rate": 8.866565135895034e-06, "loss": 0.0469, "step": 19350 }, { "epoch": 9.072164948453608, "grad_norm": 1.3520687818527222, "learning_rate": 8.865979381443299e-06, "loss": 0.052, "step": 19360 }, { "epoch": 9.076850984067478, "grad_norm": 1.827717900276184, "learning_rate": 8.865393626991566e-06, "loss": 0.052, "step": 19370 }, { "epoch": 9.081537019681349, "grad_norm": 1.5364701747894287, "learning_rate": 8.864807872539833e-06, "loss": 0.0522, "step": 19380 }, { "epoch": 9.08622305529522, "grad_norm": 1.0967464447021484, "learning_rate": 8.864222118088098e-06, "loss": 0.0546, "step": 19390 }, { "epoch": 9.090909090909092, "grad_norm": 1.141569972038269, "learning_rate": 8.863636363636365e-06, "loss": 0.054, "step": 19400 }, { "epoch": 9.095595126522962, "grad_norm": 1.025898814201355, "learning_rate": 8.86305060918463e-06, "loss": 0.0549, "step": 19410 }, { "epoch": 9.100281162136833, "grad_norm": 0.9598554968833923, "learning_rate": 8.862464854732897e-06, "loss": 0.0562, "step": 19420 }, { "epoch": 9.104967197750703, "grad_norm": 1.6386889219284058, "learning_rate": 8.861879100281164e-06, "loss": 0.0447, "step": 19430 }, { "epoch": 9.109653233364574, "grad_norm": 1.3437844514846802, "learning_rate": 8.861293345829429e-06, "loss": 0.0545, "step": 19440 }, { "epoch": 9.114339268978444, "grad_norm": 0.9290686249732971, "learning_rate": 8.860707591377694e-06, "loss": 0.0472, "step": 19450 }, { "epoch": 9.119025304592315, "grad_norm": 1.5257052183151245, "learning_rate": 8.860121836925961e-06, "loss": 0.0499, "step": 19460 }, { "epoch": 9.123711340206185, "grad_norm": 1.8884594440460205, "learning_rate": 8.859536082474226e-06, "loss": 0.0607, "step": 19470 }, { "epoch": 9.128397375820056, "grad_norm": 1.6822651624679565, "learning_rate": 8.858950328022493e-06, "loss": 0.0504, "step": 19480 }, { "epoch": 9.133083411433926, "grad_norm": 1.626015067100525, "learning_rate": 8.85836457357076e-06, "loss": 0.0516, "step": 19490 }, { "epoch": 9.137769447047798, "grad_norm": 1.1759178638458252, "learning_rate": 8.857778819119026e-06, "loss": 0.047, "step": 19500 }, { "epoch": 9.137769447047798, "eval_loss": 0.03765318915247917, "eval_pearson_cosine": 0.7869684109175026, "eval_pearson_dot": 0.6458746869453549, "eval_pearson_euclidean": 0.7253508328002916, "eval_pearson_manhattan": 0.7249120216278655, "eval_runtime": 39.8108, "eval_samples_per_second": 37.678, "eval_spearman_cosine": 0.7881589626771033, "eval_spearman_dot": 0.6693627499015223, "eval_spearman_euclidean": 0.7413480639045013, "eval_spearman_manhattan": 0.7414303112939764, "eval_steps_per_second": 37.678, "step": 19500 }, { "epoch": 9.142455482661669, "grad_norm": 1.134598731994629, "learning_rate": 8.857193064667292e-06, "loss": 0.0531, "step": 19510 }, { "epoch": 9.14714151827554, "grad_norm": 1.447082757949829, "learning_rate": 8.856607310215558e-06, "loss": 0.0531, "step": 19520 }, { "epoch": 9.15182755388941, "grad_norm": 1.004354476928711, "learning_rate": 8.856021555763825e-06, "loss": 0.0511, "step": 19530 }, { "epoch": 9.15651358950328, "grad_norm": 1.6353479623794556, "learning_rate": 8.855435801312092e-06, "loss": 0.0467, "step": 19540 }, { "epoch": 9.16119962511715, "grad_norm": 1.8899836540222168, "learning_rate": 8.854850046860357e-06, "loss": 0.0582, "step": 19550 }, { "epoch": 9.165885660731021, "grad_norm": 1.306091070175171, "learning_rate": 8.854264292408624e-06, "loss": 0.0571, "step": 19560 }, { "epoch": 9.170571696344892, "grad_norm": 1.7783139944076538, "learning_rate": 8.853678537956889e-06, "loss": 0.0543, "step": 19570 }, { "epoch": 9.175257731958762, "grad_norm": 1.1551589965820312, "learning_rate": 8.853092783505156e-06, "loss": 0.0626, "step": 19580 }, { "epoch": 9.179943767572633, "grad_norm": 0.8448215126991272, "learning_rate": 8.852507029053423e-06, "loss": 0.0509, "step": 19590 }, { "epoch": 9.184629803186505, "grad_norm": 1.3088339567184448, "learning_rate": 8.851921274601688e-06, "loss": 0.0532, "step": 19600 }, { "epoch": 9.189315838800376, "grad_norm": 1.2790261507034302, "learning_rate": 8.851335520149953e-06, "loss": 0.0366, "step": 19610 }, { "epoch": 9.194001874414246, "grad_norm": 1.4637041091918945, "learning_rate": 8.85074976569822e-06, "loss": 0.0476, "step": 19620 }, { "epoch": 9.198687910028116, "grad_norm": 1.1702561378479004, "learning_rate": 8.850164011246485e-06, "loss": 0.0539, "step": 19630 }, { "epoch": 9.203373945641987, "grad_norm": 1.4241745471954346, "learning_rate": 8.849578256794752e-06, "loss": 0.0488, "step": 19640 }, { "epoch": 9.208059981255857, "grad_norm": 1.3767116069793701, "learning_rate": 8.848992502343019e-06, "loss": 0.0501, "step": 19650 }, { "epoch": 9.212746016869728, "grad_norm": 0.946832001209259, "learning_rate": 8.848406747891284e-06, "loss": 0.0509, "step": 19660 }, { "epoch": 9.217432052483598, "grad_norm": 2.132277011871338, "learning_rate": 8.847820993439551e-06, "loss": 0.0517, "step": 19670 }, { "epoch": 9.222118088097469, "grad_norm": 3.003037929534912, "learning_rate": 8.847235238987816e-06, "loss": 0.0601, "step": 19680 }, { "epoch": 9.22680412371134, "grad_norm": 0.8297474384307861, "learning_rate": 8.846649484536083e-06, "loss": 0.0381, "step": 19690 }, { "epoch": 9.231490159325212, "grad_norm": 0.8142613768577576, "learning_rate": 8.846063730084349e-06, "loss": 0.0528, "step": 19700 }, { "epoch": 9.236176194939082, "grad_norm": 1.9133763313293457, "learning_rate": 8.845477975632616e-06, "loss": 0.0536, "step": 19710 }, { "epoch": 9.240862230552953, "grad_norm": 1.1931358575820923, "learning_rate": 8.844892221180882e-06, "loss": 0.0558, "step": 19720 }, { "epoch": 9.245548266166823, "grad_norm": 2.3464787006378174, "learning_rate": 8.844306466729148e-06, "loss": 0.0505, "step": 19730 }, { "epoch": 9.250234301780694, "grad_norm": 1.3109287023544312, "learning_rate": 8.843720712277415e-06, "loss": 0.0582, "step": 19740 }, { "epoch": 9.254920337394564, "grad_norm": 1.866816520690918, "learning_rate": 8.84313495782568e-06, "loss": 0.0482, "step": 19750 }, { "epoch": 9.254920337394564, "eval_loss": 0.03766845539212227, "eval_pearson_cosine": 0.786280047827276, "eval_pearson_dot": 0.6498320134943469, "eval_pearson_euclidean": 0.7306029375409793, "eval_pearson_manhattan": 0.7296493603800656, "eval_runtime": 40.2507, "eval_samples_per_second": 37.266, "eval_spearman_cosine": 0.7871053277749581, "eval_spearman_dot": 0.6689992229589644, "eval_spearman_euclidean": 0.7449412319412662, "eval_spearman_manhattan": 0.7442196282250385, "eval_steps_per_second": 37.266, "step": 19750 }, { "epoch": 9.259606373008435, "grad_norm": 1.386021614074707, "learning_rate": 8.842549203373947e-06, "loss": 0.0512, "step": 19760 }, { "epoch": 9.264292408622305, "grad_norm": 1.7170544862747192, "learning_rate": 8.841963448922212e-06, "loss": 0.0567, "step": 19770 }, { "epoch": 9.268978444236176, "grad_norm": 2.6358816623687744, "learning_rate": 8.841377694470479e-06, "loss": 0.0611, "step": 19780 }, { "epoch": 9.273664479850046, "grad_norm": 1.0627405643463135, "learning_rate": 8.840791940018744e-06, "loss": 0.0516, "step": 19790 }, { "epoch": 9.278350515463918, "grad_norm": 2.116197109222412, "learning_rate": 8.840206185567011e-06, "loss": 0.0442, "step": 19800 }, { "epoch": 9.283036551077789, "grad_norm": 1.0570743083953857, "learning_rate": 8.839620431115276e-06, "loss": 0.0517, "step": 19810 }, { "epoch": 9.28772258669166, "grad_norm": 0.9444879293441772, "learning_rate": 8.839034676663543e-06, "loss": 0.0427, "step": 19820 }, { "epoch": 9.29240862230553, "grad_norm": 0.820633590221405, "learning_rate": 8.83844892221181e-06, "loss": 0.0505, "step": 19830 }, { "epoch": 9.2970946579194, "grad_norm": 0.9164274334907532, "learning_rate": 8.837863167760075e-06, "loss": 0.0539, "step": 19840 }, { "epoch": 9.30178069353327, "grad_norm": 1.6659798622131348, "learning_rate": 8.837277413308342e-06, "loss": 0.055, "step": 19850 }, { "epoch": 9.306466729147141, "grad_norm": 1.224489450454712, "learning_rate": 8.836691658856607e-06, "loss": 0.0487, "step": 19860 }, { "epoch": 9.311152764761012, "grad_norm": 1.6015446186065674, "learning_rate": 8.836105904404874e-06, "loss": 0.0622, "step": 19870 }, { "epoch": 9.315838800374882, "grad_norm": 2.066589593887329, "learning_rate": 8.835520149953141e-06, "loss": 0.0562, "step": 19880 }, { "epoch": 9.320524835988753, "grad_norm": 1.8341182470321655, "learning_rate": 8.834934395501406e-06, "loss": 0.0414, "step": 19890 }, { "epoch": 9.325210871602625, "grad_norm": 2.1060688495635986, "learning_rate": 8.834348641049673e-06, "loss": 0.0423, "step": 19900 }, { "epoch": 9.329896907216495, "grad_norm": 2.0976791381835938, "learning_rate": 8.833762886597939e-06, "loss": 0.0562, "step": 19910 }, { "epoch": 9.334582942830366, "grad_norm": 1.7656900882720947, "learning_rate": 8.833177132146204e-06, "loss": 0.0454, "step": 19920 }, { "epoch": 9.339268978444236, "grad_norm": 0.9391831755638123, "learning_rate": 8.832591377694472e-06, "loss": 0.0471, "step": 19930 }, { "epoch": 9.343955014058107, "grad_norm": 1.8361108303070068, "learning_rate": 8.832005623242738e-06, "loss": 0.0521, "step": 19940 }, { "epoch": 9.348641049671977, "grad_norm": 1.4012130498886108, "learning_rate": 8.831419868791003e-06, "loss": 0.0476, "step": 19950 }, { "epoch": 9.353327085285848, "grad_norm": 1.4812968969345093, "learning_rate": 8.83083411433927e-06, "loss": 0.0356, "step": 19960 }, { "epoch": 9.358013120899718, "grad_norm": 1.4447283744812012, "learning_rate": 8.830248359887535e-06, "loss": 0.046, "step": 19970 }, { "epoch": 9.362699156513589, "grad_norm": 1.9198623895645142, "learning_rate": 8.829662605435802e-06, "loss": 0.0546, "step": 19980 }, { "epoch": 9.36738519212746, "grad_norm": 0.8466697335243225, "learning_rate": 8.829076850984069e-06, "loss": 0.0481, "step": 19990 }, { "epoch": 9.372071227741332, "grad_norm": 1.5158565044403076, "learning_rate": 8.828491096532334e-06, "loss": 0.0529, "step": 20000 }, { "epoch": 9.372071227741332, "eval_loss": 0.0377335324883461, "eval_pearson_cosine": 0.7872657190030239, "eval_pearson_dot": 0.6489881022917316, "eval_pearson_euclidean": 0.7290286852364005, "eval_pearson_manhattan": 0.7285143498985862, "eval_runtime": 39.7128, "eval_samples_per_second": 37.771, "eval_spearman_cosine": 0.7888105939241997, "eval_spearman_dot": 0.6689738777456538, "eval_spearman_euclidean": 0.7426040363283044, "eval_spearman_manhattan": 0.742345267890976, "eval_steps_per_second": 37.771, "step": 20000 }, { "epoch": 9.376757263355202, "grad_norm": 1.831284999847412, "learning_rate": 8.827905342080601e-06, "loss": 0.0489, "step": 20010 }, { "epoch": 9.381443298969073, "grad_norm": 1.498917818069458, "learning_rate": 8.827319587628866e-06, "loss": 0.0497, "step": 20020 }, { "epoch": 9.386129334582943, "grad_norm": 1.7997996807098389, "learning_rate": 8.826733833177133e-06, "loss": 0.0543, "step": 20030 }, { "epoch": 9.390815370196814, "grad_norm": 1.4676984548568726, "learning_rate": 8.8261480787254e-06, "loss": 0.0402, "step": 20040 }, { "epoch": 9.395501405810684, "grad_norm": 1.4647475481033325, "learning_rate": 8.825562324273665e-06, "loss": 0.0483, "step": 20050 }, { "epoch": 9.400187441424555, "grad_norm": 1.9055359363555908, "learning_rate": 8.824976569821932e-06, "loss": 0.057, "step": 20060 }, { "epoch": 9.404873477038425, "grad_norm": 1.243730068206787, "learning_rate": 8.824390815370197e-06, "loss": 0.0521, "step": 20070 }, { "epoch": 9.409559512652296, "grad_norm": 2.290194272994995, "learning_rate": 8.823805060918463e-06, "loss": 0.0536, "step": 20080 }, { "epoch": 9.414245548266166, "grad_norm": 1.28463613986969, "learning_rate": 8.823219306466731e-06, "loss": 0.0627, "step": 20090 }, { "epoch": 9.418931583880038, "grad_norm": 1.6804534196853638, "learning_rate": 8.822633552014996e-06, "loss": 0.0512, "step": 20100 }, { "epoch": 9.423617619493909, "grad_norm": 0.8809636831283569, "learning_rate": 8.822047797563262e-06, "loss": 0.0429, "step": 20110 }, { "epoch": 9.42830365510778, "grad_norm": 1.8962526321411133, "learning_rate": 8.821462043111529e-06, "loss": 0.0531, "step": 20120 }, { "epoch": 9.43298969072165, "grad_norm": 1.0176962614059448, "learning_rate": 8.820876288659794e-06, "loss": 0.0467, "step": 20130 }, { "epoch": 9.43767572633552, "grad_norm": 1.49270761013031, "learning_rate": 8.82029053420806e-06, "loss": 0.0578, "step": 20140 }, { "epoch": 9.44236176194939, "grad_norm": 1.4182747602462769, "learning_rate": 8.819704779756328e-06, "loss": 0.051, "step": 20150 }, { "epoch": 9.447047797563261, "grad_norm": 1.2575933933258057, "learning_rate": 8.819119025304593e-06, "loss": 0.0503, "step": 20160 }, { "epoch": 9.451733833177132, "grad_norm": 1.8485591411590576, "learning_rate": 8.81853327085286e-06, "loss": 0.0578, "step": 20170 }, { "epoch": 9.456419868791002, "grad_norm": 1.7406198978424072, "learning_rate": 8.817947516401125e-06, "loss": 0.0532, "step": 20180 }, { "epoch": 9.461105904404873, "grad_norm": 1.138297438621521, "learning_rate": 8.817361761949392e-06, "loss": 0.0484, "step": 20190 }, { "epoch": 9.465791940018745, "grad_norm": 1.2107694149017334, "learning_rate": 8.816776007497657e-06, "loss": 0.045, "step": 20200 }, { "epoch": 9.470477975632615, "grad_norm": 1.5909892320632935, "learning_rate": 8.816190253045924e-06, "loss": 0.0463, "step": 20210 }, { "epoch": 9.475164011246486, "grad_norm": 1.1377689838409424, "learning_rate": 8.815604498594191e-06, "loss": 0.0688, "step": 20220 }, { "epoch": 9.479850046860356, "grad_norm": 2.0724937915802, "learning_rate": 8.815018744142456e-06, "loss": 0.0547, "step": 20230 }, { "epoch": 9.484536082474227, "grad_norm": 0.9459996819496155, "learning_rate": 8.814432989690721e-06, "loss": 0.0482, "step": 20240 }, { "epoch": 9.489222118088097, "grad_norm": 0.7871867418289185, "learning_rate": 8.813847235238988e-06, "loss": 0.0429, "step": 20250 }, { "epoch": 9.489222118088097, "eval_loss": 0.03782571852207184, "eval_pearson_cosine": 0.786819398080425, "eval_pearson_dot": 0.6502888686958528, "eval_pearson_euclidean": 0.7291925678539002, "eval_pearson_manhattan": 0.7285750403533555, "eval_runtime": 41.4193, "eval_samples_per_second": 36.215, "eval_spearman_cosine": 0.7882680919473954, "eval_spearman_dot": 0.6683716175414093, "eval_spearman_euclidean": 0.7431012804543077, "eval_spearman_manhattan": 0.7425560629845656, "eval_steps_per_second": 36.215, "step": 20250 }, { "epoch": 9.493908153701968, "grad_norm": 1.718775749206543, "learning_rate": 8.813261480787255e-06, "loss": 0.0528, "step": 20260 }, { "epoch": 9.498594189315838, "grad_norm": 1.860888957977295, "learning_rate": 8.81267572633552e-06, "loss": 0.051, "step": 20270 }, { "epoch": 9.503280224929709, "grad_norm": 1.33186674118042, "learning_rate": 8.812089971883787e-06, "loss": 0.0558, "step": 20280 }, { "epoch": 9.50796626054358, "grad_norm": 1.3585968017578125, "learning_rate": 8.811504217432053e-06, "loss": 0.0418, "step": 20290 }, { "epoch": 9.512652296157452, "grad_norm": 1.2041314840316772, "learning_rate": 8.81091846298032e-06, "loss": 0.0661, "step": 20300 }, { "epoch": 9.517338331771322, "grad_norm": 1.2717355489730835, "learning_rate": 8.810332708528585e-06, "loss": 0.0511, "step": 20310 }, { "epoch": 9.522024367385193, "grad_norm": 0.9652617573738098, "learning_rate": 8.809746954076852e-06, "loss": 0.0514, "step": 20320 }, { "epoch": 9.526710402999063, "grad_norm": 1.9312084913253784, "learning_rate": 8.809161199625119e-06, "loss": 0.0607, "step": 20330 }, { "epoch": 9.531396438612934, "grad_norm": 1.669273018836975, "learning_rate": 8.808575445173384e-06, "loss": 0.0468, "step": 20340 }, { "epoch": 9.536082474226804, "grad_norm": 1.204368233680725, "learning_rate": 8.80798969072165e-06, "loss": 0.0409, "step": 20350 }, { "epoch": 9.540768509840674, "grad_norm": 1.2132142782211304, "learning_rate": 8.807403936269916e-06, "loss": 0.0448, "step": 20360 }, { "epoch": 9.545454545454545, "grad_norm": 0.8759263157844543, "learning_rate": 8.806818181818183e-06, "loss": 0.0486, "step": 20370 }, { "epoch": 9.550140581068415, "grad_norm": 0.980694591999054, "learning_rate": 8.80623242736645e-06, "loss": 0.04, "step": 20380 }, { "epoch": 9.554826616682288, "grad_norm": 1.7257814407348633, "learning_rate": 8.805646672914715e-06, "loss": 0.0551, "step": 20390 }, { "epoch": 9.559512652296158, "grad_norm": 0.9855765700340271, "learning_rate": 8.80506091846298e-06, "loss": 0.0442, "step": 20400 }, { "epoch": 9.564198687910029, "grad_norm": 2.2688076496124268, "learning_rate": 8.804475164011247e-06, "loss": 0.0474, "step": 20410 }, { "epoch": 9.5688847235239, "grad_norm": 0.8345751762390137, "learning_rate": 8.803889409559512e-06, "loss": 0.0507, "step": 20420 }, { "epoch": 9.57357075913777, "grad_norm": 1.0074180364608765, "learning_rate": 8.80330365510778e-06, "loss": 0.0487, "step": 20430 }, { "epoch": 9.57825679475164, "grad_norm": 1.1515982151031494, "learning_rate": 8.802717900656046e-06, "loss": 0.0443, "step": 20440 }, { "epoch": 9.58294283036551, "grad_norm": 0.5248059630393982, "learning_rate": 8.802132146204311e-06, "loss": 0.0561, "step": 20450 }, { "epoch": 9.587628865979381, "grad_norm": 1.2470523118972778, "learning_rate": 8.801546391752578e-06, "loss": 0.0469, "step": 20460 }, { "epoch": 9.592314901593252, "grad_norm": 2.120579957962036, "learning_rate": 8.800960637300844e-06, "loss": 0.0513, "step": 20470 }, { "epoch": 9.597000937207122, "grad_norm": 2.442443609237671, "learning_rate": 8.80037488284911e-06, "loss": 0.0635, "step": 20480 }, { "epoch": 9.601686972820993, "grad_norm": 2.420138120651245, "learning_rate": 8.799789128397377e-06, "loss": 0.0626, "step": 20490 }, { "epoch": 9.606373008434865, "grad_norm": 2.3432815074920654, "learning_rate": 8.799203373945643e-06, "loss": 0.0534, "step": 20500 }, { "epoch": 9.606373008434865, "eval_loss": 0.037995509803295135, "eval_pearson_cosine": 0.786149907730362, "eval_pearson_dot": 0.6445644977545584, "eval_pearson_euclidean": 0.7304901967314237, "eval_pearson_manhattan": 0.7299852754916856, "eval_runtime": 40.4167, "eval_samples_per_second": 37.113, "eval_spearman_cosine": 0.788096924565833, "eval_spearman_dot": 0.6634744984860802, "eval_spearman_euclidean": 0.7450878530420201, "eval_spearman_manhattan": 0.7443460197740337, "eval_steps_per_second": 37.113, "step": 20500 }, { "epoch": 9.611059044048735, "grad_norm": 2.5431413650512695, "learning_rate": 8.79861761949391e-06, "loss": 0.0499, "step": 20510 }, { "epoch": 9.615745079662606, "grad_norm": 1.4701391458511353, "learning_rate": 8.798031865042175e-06, "loss": 0.0528, "step": 20520 }, { "epoch": 9.620431115276476, "grad_norm": 1.0605581998825073, "learning_rate": 8.79744611059044e-06, "loss": 0.0513, "step": 20530 }, { "epoch": 9.625117150890347, "grad_norm": 1.7231255769729614, "learning_rate": 8.796860356138709e-06, "loss": 0.0432, "step": 20540 }, { "epoch": 9.629803186504217, "grad_norm": 2.4519450664520264, "learning_rate": 8.796274601686974e-06, "loss": 0.0555, "step": 20550 }, { "epoch": 9.634489222118088, "grad_norm": 1.7406028509140015, "learning_rate": 8.795688847235239e-06, "loss": 0.0547, "step": 20560 }, { "epoch": 9.639175257731958, "grad_norm": 1.357200026512146, "learning_rate": 8.795103092783506e-06, "loss": 0.0548, "step": 20570 }, { "epoch": 9.643861293345829, "grad_norm": 1.7510253190994263, "learning_rate": 8.794517338331771e-06, "loss": 0.0541, "step": 20580 }, { "epoch": 9.648547328959701, "grad_norm": 2.1982178688049316, "learning_rate": 8.793931583880038e-06, "loss": 0.0572, "step": 20590 }, { "epoch": 9.653233364573572, "grad_norm": 1.6132203340530396, "learning_rate": 8.793345829428305e-06, "loss": 0.0467, "step": 20600 }, { "epoch": 9.657919400187442, "grad_norm": 1.165385127067566, "learning_rate": 8.79276007497657e-06, "loss": 0.0463, "step": 20610 }, { "epoch": 9.662605435801312, "grad_norm": 2.306887149810791, "learning_rate": 8.792174320524837e-06, "loss": 0.0529, "step": 20620 }, { "epoch": 9.667291471415183, "grad_norm": 1.740670084953308, "learning_rate": 8.791588566073102e-06, "loss": 0.0497, "step": 20630 }, { "epoch": 9.671977507029053, "grad_norm": 1.0078073740005493, "learning_rate": 8.79100281162137e-06, "loss": 0.0495, "step": 20640 }, { "epoch": 9.676663542642924, "grad_norm": 1.454647421836853, "learning_rate": 8.790417057169636e-06, "loss": 0.0477, "step": 20650 }, { "epoch": 9.681349578256794, "grad_norm": 1.6520277261734009, "learning_rate": 8.789831302717901e-06, "loss": 0.0499, "step": 20660 }, { "epoch": 9.686035613870665, "grad_norm": 2.0566940307617188, "learning_rate": 8.789245548266168e-06, "loss": 0.0504, "step": 20670 }, { "epoch": 9.690721649484535, "grad_norm": 1.7212245464324951, "learning_rate": 8.788659793814434e-06, "loss": 0.0558, "step": 20680 }, { "epoch": 9.695407685098406, "grad_norm": 0.9179878234863281, "learning_rate": 8.788074039362699e-06, "loss": 0.055, "step": 20690 }, { "epoch": 9.700093720712278, "grad_norm": 1.1311330795288086, "learning_rate": 8.787488284910966e-06, "loss": 0.0555, "step": 20700 }, { "epoch": 9.704779756326149, "grad_norm": 1.4247910976409912, "learning_rate": 8.786902530459233e-06, "loss": 0.0522, "step": 20710 }, { "epoch": 9.70946579194002, "grad_norm": 2.309624195098877, "learning_rate": 8.786316776007498e-06, "loss": 0.0492, "step": 20720 }, { "epoch": 9.71415182755389, "grad_norm": 0.9960254430770874, "learning_rate": 8.785731021555765e-06, "loss": 0.0461, "step": 20730 }, { "epoch": 9.71883786316776, "grad_norm": 0.9048061966896057, "learning_rate": 8.78514526710403e-06, "loss": 0.0497, "step": 20740 }, { "epoch": 9.72352389878163, "grad_norm": 1.7553735971450806, "learning_rate": 8.784559512652297e-06, "loss": 0.0531, "step": 20750 }, { "epoch": 9.72352389878163, "eval_loss": 0.0375310480594635, "eval_pearson_cosine": 0.7885717010435052, "eval_pearson_dot": 0.6441669695807519, "eval_pearson_euclidean": 0.7356023128188269, "eval_pearson_manhattan": 0.7349906496289833, "eval_runtime": 42.7003, "eval_samples_per_second": 35.129, "eval_spearman_cosine": 0.7894128881355192, "eval_spearman_dot": 0.6634003738795025, "eval_spearman_euclidean": 0.7498228606359407, "eval_spearman_manhattan": 0.7492125285743606, "eval_steps_per_second": 35.129, "step": 20750 }, { "epoch": 9.728209934395501, "grad_norm": 2.050300121307373, "learning_rate": 8.783973758200564e-06, "loss": 0.0559, "step": 20760 }, { "epoch": 9.732895970009372, "grad_norm": 1.7900549173355103, "learning_rate": 8.783388003748829e-06, "loss": 0.0468, "step": 20770 }, { "epoch": 9.737582005623242, "grad_norm": 2.7999625205993652, "learning_rate": 8.782802249297096e-06, "loss": 0.0567, "step": 20780 }, { "epoch": 9.742268041237114, "grad_norm": 1.746066927909851, "learning_rate": 8.782216494845361e-06, "loss": 0.0462, "step": 20790 }, { "epoch": 9.746954076850985, "grad_norm": 1.6031302213668823, "learning_rate": 8.781630740393628e-06, "loss": 0.0569, "step": 20800 }, { "epoch": 9.751640112464855, "grad_norm": 0.795835554599762, "learning_rate": 8.781044985941893e-06, "loss": 0.0471, "step": 20810 }, { "epoch": 9.756326148078726, "grad_norm": 1.4143311977386475, "learning_rate": 8.78045923149016e-06, "loss": 0.0495, "step": 20820 }, { "epoch": 9.761012183692596, "grad_norm": 1.2782717943191528, "learning_rate": 8.779873477038427e-06, "loss": 0.0594, "step": 20830 }, { "epoch": 9.765698219306467, "grad_norm": 0.9974650144577026, "learning_rate": 8.779287722586692e-06, "loss": 0.0485, "step": 20840 }, { "epoch": 9.770384254920337, "grad_norm": 1.5415414571762085, "learning_rate": 8.778701968134958e-06, "loss": 0.0476, "step": 20850 }, { "epoch": 9.775070290534208, "grad_norm": 0.8162183165550232, "learning_rate": 8.778116213683225e-06, "loss": 0.0529, "step": 20860 }, { "epoch": 9.779756326148078, "grad_norm": 1.532882571220398, "learning_rate": 8.777530459231491e-06, "loss": 0.0484, "step": 20870 }, { "epoch": 9.784442361761949, "grad_norm": 1.609947919845581, "learning_rate": 8.776944704779757e-06, "loss": 0.0575, "step": 20880 }, { "epoch": 9.78912839737582, "grad_norm": 1.7726844549179077, "learning_rate": 8.776358950328024e-06, "loss": 0.055, "step": 20890 }, { "epoch": 9.793814432989691, "grad_norm": 0.7471759915351868, "learning_rate": 8.775773195876289e-06, "loss": 0.0439, "step": 20900 }, { "epoch": 9.798500468603562, "grad_norm": 1.8393468856811523, "learning_rate": 8.775187441424556e-06, "loss": 0.0576, "step": 20910 }, { "epoch": 9.803186504217432, "grad_norm": 1.570793867111206, "learning_rate": 8.774601686972821e-06, "loss": 0.0518, "step": 20920 }, { "epoch": 9.807872539831303, "grad_norm": 2.121197462081909, "learning_rate": 8.774015932521088e-06, "loss": 0.0499, "step": 20930 }, { "epoch": 9.812558575445173, "grad_norm": 1.4100779294967651, "learning_rate": 8.773430178069355e-06, "loss": 0.0518, "step": 20940 }, { "epoch": 9.817244611059044, "grad_norm": 2.057370901107788, "learning_rate": 8.77284442361762e-06, "loss": 0.0445, "step": 20950 }, { "epoch": 9.821930646672914, "grad_norm": 1.0159096717834473, "learning_rate": 8.772258669165887e-06, "loss": 0.0488, "step": 20960 }, { "epoch": 9.826616682286785, "grad_norm": 2.1321892738342285, "learning_rate": 8.771672914714152e-06, "loss": 0.0435, "step": 20970 }, { "epoch": 9.831302717900655, "grad_norm": 1.727754831314087, "learning_rate": 8.771087160262419e-06, "loss": 0.0506, "step": 20980 }, { "epoch": 9.835988753514528, "grad_norm": 1.4544596672058105, "learning_rate": 8.770501405810686e-06, "loss": 0.0538, "step": 20990 }, { "epoch": 9.840674789128398, "grad_norm": 1.8320542573928833, "learning_rate": 8.769915651358951e-06, "loss": 0.0464, "step": 21000 }, { "epoch": 9.840674789128398, "eval_loss": 0.037997569888830185, "eval_pearson_cosine": 0.7860642455644182, "eval_pearson_dot": 0.6414601204917716, "eval_pearson_euclidean": 0.7319623204333681, "eval_pearson_manhattan": 0.731351329880491, "eval_runtime": 42.4574, "eval_samples_per_second": 35.33, "eval_spearman_cosine": 0.7870558046080526, "eval_spearman_dot": 0.6600440085619812, "eval_spearman_euclidean": 0.746835302683809, "eval_spearman_manhattan": 0.7463752489757238, "eval_steps_per_second": 35.33, "step": 21000 }, { "epoch": 9.845360824742269, "grad_norm": 1.0928393602371216, "learning_rate": 8.769329896907216e-06, "loss": 0.0467, "step": 21010 }, { "epoch": 9.850046860356139, "grad_norm": 1.111713171005249, "learning_rate": 8.768744142455483e-06, "loss": 0.0621, "step": 21020 }, { "epoch": 9.85473289597001, "grad_norm": 1.0577993392944336, "learning_rate": 8.768158388003749e-06, "loss": 0.0547, "step": 21030 }, { "epoch": 9.85941893158388, "grad_norm": 1.453430414199829, "learning_rate": 8.767572633552015e-06, "loss": 0.0476, "step": 21040 }, { "epoch": 9.86410496719775, "grad_norm": 1.527876377105713, "learning_rate": 8.766986879100282e-06, "loss": 0.0476, "step": 21050 }, { "epoch": 9.868791002811621, "grad_norm": 1.6117839813232422, "learning_rate": 8.766401124648548e-06, "loss": 0.06, "step": 21060 }, { "epoch": 9.873477038425492, "grad_norm": 1.4520413875579834, "learning_rate": 8.765815370196815e-06, "loss": 0.049, "step": 21070 }, { "epoch": 9.878163074039362, "grad_norm": 0.7348192930221558, "learning_rate": 8.76522961574508e-06, "loss": 0.0531, "step": 21080 }, { "epoch": 9.882849109653232, "grad_norm": 2.0447332859039307, "learning_rate": 8.764643861293347e-06, "loss": 0.0499, "step": 21090 }, { "epoch": 9.887535145267105, "grad_norm": 1.4770324230194092, "learning_rate": 8.764058106841614e-06, "loss": 0.0467, "step": 21100 }, { "epoch": 9.892221180880975, "grad_norm": 2.313798189163208, "learning_rate": 8.763472352389879e-06, "loss": 0.0553, "step": 21110 }, { "epoch": 9.896907216494846, "grad_norm": 1.1835334300994873, "learning_rate": 8.762886597938146e-06, "loss": 0.0554, "step": 21120 }, { "epoch": 9.901593252108716, "grad_norm": 1.580556035041809, "learning_rate": 8.762300843486411e-06, "loss": 0.0507, "step": 21130 }, { "epoch": 9.906279287722587, "grad_norm": 0.9451290965080261, "learning_rate": 8.761715089034676e-06, "loss": 0.0451, "step": 21140 }, { "epoch": 9.910965323336457, "grad_norm": 1.0342776775360107, "learning_rate": 8.761129334582945e-06, "loss": 0.0497, "step": 21150 }, { "epoch": 9.915651358950328, "grad_norm": 1.1484373807907104, "learning_rate": 8.76054358013121e-06, "loss": 0.0466, "step": 21160 }, { "epoch": 9.920337394564198, "grad_norm": 2.1361167430877686, "learning_rate": 8.759957825679475e-06, "loss": 0.0631, "step": 21170 }, { "epoch": 9.925023430178069, "grad_norm": 1.0633893013000488, "learning_rate": 8.759372071227742e-06, "loss": 0.0468, "step": 21180 }, { "epoch": 9.929709465791941, "grad_norm": 2.109058380126953, "learning_rate": 8.758786316776007e-06, "loss": 0.0572, "step": 21190 }, { "epoch": 9.934395501405811, "grad_norm": 1.6682454347610474, "learning_rate": 8.758200562324274e-06, "loss": 0.0542, "step": 21200 }, { "epoch": 9.939081537019682, "grad_norm": 1.6142311096191406, "learning_rate": 8.757614807872541e-06, "loss": 0.0508, "step": 21210 }, { "epoch": 9.943767572633552, "grad_norm": 1.564691424369812, "learning_rate": 8.757029053420806e-06, "loss": 0.0518, "step": 21220 }, { "epoch": 9.948453608247423, "grad_norm": 1.526147723197937, "learning_rate": 8.756443298969073e-06, "loss": 0.0541, "step": 21230 }, { "epoch": 9.953139643861293, "grad_norm": 1.049249291419983, "learning_rate": 8.755857544517339e-06, "loss": 0.0564, "step": 21240 }, { "epoch": 9.957825679475164, "grad_norm": 2.233095407485962, "learning_rate": 8.755271790065605e-06, "loss": 0.0406, "step": 21250 }, { "epoch": 9.957825679475164, "eval_loss": 0.03874640911817551, "eval_pearson_cosine": 0.7823759183217831, "eval_pearson_dot": 0.6362060503787177, "eval_pearson_euclidean": 0.7337201116603183, "eval_pearson_manhattan": 0.7335170834683709, "eval_runtime": 42.9733, "eval_samples_per_second": 34.905, "eval_spearman_cosine": 0.7845387476571652, "eval_spearman_dot": 0.6531911497158578, "eval_spearman_euclidean": 0.7463049410965222, "eval_spearman_manhattan": 0.7460162771900863, "eval_steps_per_second": 34.905, "step": 21250 }, { "epoch": 9.962511715089034, "grad_norm": 1.1485975980758667, "learning_rate": 8.754686035613872e-06, "loss": 0.0483, "step": 21260 }, { "epoch": 9.967197750702905, "grad_norm": 1.0608566999435425, "learning_rate": 8.754100281162138e-06, "loss": 0.0488, "step": 21270 }, { "epoch": 9.971883786316775, "grad_norm": 2.202392339706421, "learning_rate": 8.753514526710405e-06, "loss": 0.0559, "step": 21280 }, { "epoch": 9.976569821930646, "grad_norm": 1.6414839029312134, "learning_rate": 8.75292877225867e-06, "loss": 0.064, "step": 21290 }, { "epoch": 9.981255857544518, "grad_norm": 1.7389216423034668, "learning_rate": 8.752343017806935e-06, "loss": 0.0464, "step": 21300 }, { "epoch": 9.985941893158389, "grad_norm": 1.7043269872665405, "learning_rate": 8.751757263355202e-06, "loss": 0.0473, "step": 21310 }, { "epoch": 9.990627928772259, "grad_norm": 1.5085012912750244, "learning_rate": 8.751171508903469e-06, "loss": 0.0496, "step": 21320 }, { "epoch": 9.99531396438613, "grad_norm": 1.686661720275879, "learning_rate": 8.750585754451734e-06, "loss": 0.051, "step": 21330 }, { "epoch": 10.0, "grad_norm": 1.0168319940567017, "learning_rate": 8.750000000000001e-06, "loss": 0.0477, "step": 21340 } ], "logging_steps": 10, "max_steps": 21340, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }