| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "eval_steps": 250, | |
| "global_step": 10670, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00937207122774133, | |
| "grad_norm": 0.573442816734314, | |
| "learning_rate": 9.999926781765732e-06, | |
| "loss": 1.312, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01874414245548266, | |
| "grad_norm": 1.0577057600021362, | |
| "learning_rate": 9.999853563531462e-06, | |
| "loss": 1.2611, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.028116213683223992, | |
| "grad_norm": 1.358649492263794, | |
| "learning_rate": 9.999780345297193e-06, | |
| "loss": 1.1822, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03748828491096532, | |
| "grad_norm": 1.7219270467758179, | |
| "learning_rate": 9.999707127062924e-06, | |
| "loss": 1.062, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.046860356138706656, | |
| "grad_norm": 1.7191277742385864, | |
| "learning_rate": 9.999633908828655e-06, | |
| "loss": 0.9325, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.056232427366447985, | |
| "grad_norm": 1.6047089099884033, | |
| "learning_rate": 9.999560690594387e-06, | |
| "loss": 0.7909, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06560449859418932, | |
| "grad_norm": 1.1597000360488892, | |
| "learning_rate": 9.999487472360118e-06, | |
| "loss": 0.6858, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07497656982193064, | |
| "grad_norm": 1.4232110977172852, | |
| "learning_rate": 9.999414254125849e-06, | |
| "loss": 0.6554, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.08434864104967198, | |
| "grad_norm": 1.3652020692825317, | |
| "learning_rate": 9.99934103589158e-06, | |
| "loss": 0.5937, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.09372071227741331, | |
| "grad_norm": 1.299221396446228, | |
| "learning_rate": 9.99926781765731e-06, | |
| "loss": 0.5778, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.10309278350515463, | |
| "grad_norm": 1.367699146270752, | |
| "learning_rate": 9.99919459942304e-06, | |
| "loss": 0.5562, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.11246485473289597, | |
| "grad_norm": 1.2190635204315186, | |
| "learning_rate": 9.999121381188772e-06, | |
| "loss": 0.5259, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1218369259606373, | |
| "grad_norm": 1.1808373928070068, | |
| "learning_rate": 9.999048162954504e-06, | |
| "loss": 0.5158, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.13120899718837864, | |
| "grad_norm": 1.5956122875213623, | |
| "learning_rate": 9.998974944720235e-06, | |
| "loss": 0.4877, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.14058106841611998, | |
| "grad_norm": 1.2425106763839722, | |
| "learning_rate": 9.998901726485964e-06, | |
| "loss": 0.4858, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.14995313964386128, | |
| "grad_norm": 1.284425139427185, | |
| "learning_rate": 9.998828508251696e-06, | |
| "loss": 0.4426, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.15932521087160262, | |
| "grad_norm": 1.4248498678207397, | |
| "learning_rate": 9.998755290017427e-06, | |
| "loss": 0.4644, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.16869728209934395, | |
| "grad_norm": 2.5712969303131104, | |
| "learning_rate": 9.998682071783158e-06, | |
| "loss": 0.4363, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.1780693533270853, | |
| "grad_norm": 1.572169542312622, | |
| "learning_rate": 9.998608853548888e-06, | |
| "loss": 0.4206, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.18744142455482662, | |
| "grad_norm": 1.4508352279663086, | |
| "learning_rate": 9.998535635314621e-06, | |
| "loss": 0.4247, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.19681349578256796, | |
| "grad_norm": 1.2668938636779785, | |
| "learning_rate": 9.99846241708035e-06, | |
| "loss": 0.4302, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.20618556701030927, | |
| "grad_norm": 1.0630348920822144, | |
| "learning_rate": 9.99838919884608e-06, | |
| "loss": 0.3987, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.2155576382380506, | |
| "grad_norm": 1.1395602226257324, | |
| "learning_rate": 9.998315980611813e-06, | |
| "loss": 0.3746, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.22492970946579194, | |
| "grad_norm": 1.6570693254470825, | |
| "learning_rate": 9.998242762377544e-06, | |
| "loss": 0.3954, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.23430178069353327, | |
| "grad_norm": 1.2213038206100464, | |
| "learning_rate": 9.998169544143275e-06, | |
| "loss": 0.3877, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.23430178069353327, | |
| "eval_loss": 0.15415821969509125, | |
| "eval_pearson_cosine": 0.7471039295196533, | |
| "eval_pearson_dot": 0.6414342522621155, | |
| "eval_pearson_euclidean": 0.739482581615448, | |
| "eval_pearson_manhattan": 0.7393465042114258, | |
| "eval_runtime": 29.8457, | |
| "eval_samples_per_second": 50.258, | |
| "eval_spearman_cosine": 0.7499078042299374, | |
| "eval_spearman_dot": 0.6346699933138464, | |
| "eval_spearman_euclidean": 0.7397365400334271, | |
| "eval_spearman_manhattan": 0.7393369553461101, | |
| "eval_steps_per_second": 6.299, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.2436738519212746, | |
| "grad_norm": 1.3511942625045776, | |
| "learning_rate": 9.998096325909005e-06, | |
| "loss": 0.3685, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.2530459231490159, | |
| "grad_norm": 1.3458188772201538, | |
| "learning_rate": 9.998023107674736e-06, | |
| "loss": 0.367, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.2624179943767573, | |
| "grad_norm": 1.424850344657898, | |
| "learning_rate": 9.997949889440467e-06, | |
| "loss": 0.3511, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.2717900656044986, | |
| "grad_norm": 1.4595459699630737, | |
| "learning_rate": 9.997876671206198e-06, | |
| "loss": 0.3389, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.28116213683223995, | |
| "grad_norm": 1.167495608329773, | |
| "learning_rate": 9.997803452971928e-06, | |
| "loss": 0.3335, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.29053420805998126, | |
| "grad_norm": 1.1749252080917358, | |
| "learning_rate": 9.997730234737661e-06, | |
| "loss": 0.3339, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.29990627928772257, | |
| "grad_norm": 1.2500739097595215, | |
| "learning_rate": 9.99765701650339e-06, | |
| "loss": 0.3215, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.30927835051546393, | |
| "grad_norm": 1.332942247390747, | |
| "learning_rate": 9.99758379826912e-06, | |
| "loss": 0.3093, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.31865042174320524, | |
| "grad_norm": 1.173511266708374, | |
| "learning_rate": 9.997510580034853e-06, | |
| "loss": 0.3234, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.3280224929709466, | |
| "grad_norm": 1.3587061166763306, | |
| "learning_rate": 9.997437361800584e-06, | |
| "loss": 0.3285, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.3373945641986879, | |
| "grad_norm": 1.4196358919143677, | |
| "learning_rate": 9.997364143566315e-06, | |
| "loss": 0.3078, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.3467666354264292, | |
| "grad_norm": 1.1899330615997314, | |
| "learning_rate": 9.997290925332045e-06, | |
| "loss": 0.2952, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.3561387066541706, | |
| "grad_norm": 1.3728539943695068, | |
| "learning_rate": 9.997217707097776e-06, | |
| "loss": 0.2912, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.3655107778819119, | |
| "grad_norm": 1.6375203132629395, | |
| "learning_rate": 9.997144488863507e-06, | |
| "loss": 0.3153, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.37488284910965325, | |
| "grad_norm": 1.3330031633377075, | |
| "learning_rate": 9.997071270629238e-06, | |
| "loss": 0.2858, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.38425492033739456, | |
| "grad_norm": 1.2047045230865479, | |
| "learning_rate": 9.99699805239497e-06, | |
| "loss": 0.3004, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.3936269915651359, | |
| "grad_norm": 1.280134916305542, | |
| "learning_rate": 9.9969248341607e-06, | |
| "loss": 0.2819, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.4029990627928772, | |
| "grad_norm": 1.2952693700790405, | |
| "learning_rate": 9.99685161592643e-06, | |
| "loss": 0.2772, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.41237113402061853, | |
| "grad_norm": 1.1937365531921387, | |
| "learning_rate": 9.996778397692162e-06, | |
| "loss": 0.3024, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.4217432052483599, | |
| "grad_norm": 1.226347804069519, | |
| "learning_rate": 9.996705179457893e-06, | |
| "loss": 0.2844, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.4311152764761012, | |
| "grad_norm": 1.5503312349319458, | |
| "learning_rate": 9.996631961223624e-06, | |
| "loss": 0.2634, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.44048734770384257, | |
| "grad_norm": 1.4498707056045532, | |
| "learning_rate": 9.996558742989355e-06, | |
| "loss": 0.2697, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.4498594189315839, | |
| "grad_norm": 1.2823820114135742, | |
| "learning_rate": 9.996485524755087e-06, | |
| "loss": 0.2927, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.4592314901593252, | |
| "grad_norm": 1.1089231967926025, | |
| "learning_rate": 9.996412306520816e-06, | |
| "loss": 0.2669, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.46860356138706655, | |
| "grad_norm": 1.3862818479537964, | |
| "learning_rate": 9.996339088286547e-06, | |
| "loss": 0.2805, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.46860356138706655, | |
| "eval_loss": 0.11416644603013992, | |
| "eval_pearson_cosine": 0.7577512264251709, | |
| "eval_pearson_dot": 0.6366492509841919, | |
| "eval_pearson_euclidean": 0.7618618011474609, | |
| "eval_pearson_manhattan": 0.7619431614875793, | |
| "eval_runtime": 22.679, | |
| "eval_samples_per_second": 66.14, | |
| "eval_spearman_cosine": 0.7643092952449725, | |
| "eval_spearman_dot": 0.6341280960850315, | |
| "eval_spearman_euclidean": 0.7653570734883524, | |
| "eval_spearman_manhattan": 0.7652284643248553, | |
| "eval_steps_per_second": 8.29, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.47797563261480785, | |
| "grad_norm": 1.079265832901001, | |
| "learning_rate": 9.99626587005228e-06, | |
| "loss": 0.2649, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.4873477038425492, | |
| "grad_norm": 1.3966060876846313, | |
| "learning_rate": 9.99619265181801e-06, | |
| "loss": 0.279, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.4967197750702905, | |
| "grad_norm": 1.197001576423645, | |
| "learning_rate": 9.99611943358374e-06, | |
| "loss": 0.263, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.5060918462980318, | |
| "grad_norm": 1.414509892463684, | |
| "learning_rate": 9.996046215349472e-06, | |
| "loss": 0.2816, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.5154639175257731, | |
| "grad_norm": 1.4723501205444336, | |
| "learning_rate": 9.995972997115202e-06, | |
| "loss": 0.2696, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.5248359887535146, | |
| "grad_norm": 1.1838375329971313, | |
| "learning_rate": 9.995899778880933e-06, | |
| "loss": 0.2686, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.5342080599812559, | |
| "grad_norm": 1.2640224695205688, | |
| "learning_rate": 9.995826560646664e-06, | |
| "loss": 0.2842, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.5435801312089972, | |
| "grad_norm": 1.2584717273712158, | |
| "learning_rate": 9.995753342412395e-06, | |
| "loss": 0.2505, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.5529522024367385, | |
| "grad_norm": 1.3276816606521606, | |
| "learning_rate": 9.995680124178127e-06, | |
| "loss": 0.2764, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.5623242736644799, | |
| "grad_norm": 1.5065838098526, | |
| "learning_rate": 9.995606905943858e-06, | |
| "loss": 0.2778, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5716963448922212, | |
| "grad_norm": 1.1485587358474731, | |
| "learning_rate": 9.995533687709588e-06, | |
| "loss": 0.2533, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.5810684161199625, | |
| "grad_norm": 1.242677927017212, | |
| "learning_rate": 9.99546046947532e-06, | |
| "loss": 0.2549, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.5904404873477038, | |
| "grad_norm": 1.4471759796142578, | |
| "learning_rate": 9.99538725124105e-06, | |
| "loss": 0.2734, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.5998125585754451, | |
| "grad_norm": 1.3379895687103271, | |
| "learning_rate": 9.99531403300678e-06, | |
| "loss": 0.2551, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.6091846298031866, | |
| "grad_norm": 1.2373607158660889, | |
| "learning_rate": 9.995240814772511e-06, | |
| "loss": 0.2358, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.6185567010309279, | |
| "grad_norm": 1.2897976636886597, | |
| "learning_rate": 9.995167596538242e-06, | |
| "loss": 0.2572, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.6279287722586692, | |
| "grad_norm": 1.3715548515319824, | |
| "learning_rate": 9.995094378303973e-06, | |
| "loss": 0.2554, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.6373008434864105, | |
| "grad_norm": 1.3889539241790771, | |
| "learning_rate": 9.995021160069704e-06, | |
| "loss": 0.2502, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.6466729147141518, | |
| "grad_norm": 1.3987656831741333, | |
| "learning_rate": 9.994947941835436e-06, | |
| "loss": 0.2449, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.6560449859418932, | |
| "grad_norm": 1.4677623510360718, | |
| "learning_rate": 9.994874723601167e-06, | |
| "loss": 0.2438, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.6654170571696345, | |
| "grad_norm": 1.238258719444275, | |
| "learning_rate": 9.994801505366898e-06, | |
| "loss": 0.2609, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.6747891283973758, | |
| "grad_norm": 1.2697819471359253, | |
| "learning_rate": 9.994728287132628e-06, | |
| "loss": 0.2685, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.6841611996251171, | |
| "grad_norm": 1.1607269048690796, | |
| "learning_rate": 9.99465506889836e-06, | |
| "loss": 0.2342, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.6935332708528584, | |
| "grad_norm": 1.2666348218917847, | |
| "learning_rate": 9.99458185066409e-06, | |
| "loss": 0.2308, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.7029053420805998, | |
| "grad_norm": 1.252940058708191, | |
| "learning_rate": 9.99450863242982e-06, | |
| "loss": 0.2331, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.7029053420805998, | |
| "eval_loss": 0.09498214721679688, | |
| "eval_pearson_cosine": 0.7673527002334595, | |
| "eval_pearson_dot": 0.6584292054176331, | |
| "eval_pearson_euclidean": 0.7682392001152039, | |
| "eval_pearson_manhattan": 0.7685161232948303, | |
| "eval_runtime": 21.4883, | |
| "eval_samples_per_second": 69.805, | |
| "eval_spearman_cosine": 0.7771628917615258, | |
| "eval_spearman_dot": 0.6570265964452069, | |
| "eval_spearman_euclidean": 0.7740883932373563, | |
| "eval_spearman_manhattan": 0.7747253819422362, | |
| "eval_steps_per_second": 8.749, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.7122774133083412, | |
| "grad_norm": 1.204959750175476, | |
| "learning_rate": 9.994435414195553e-06, | |
| "loss": 0.2514, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.7216494845360825, | |
| "grad_norm": 2.5355069637298584, | |
| "learning_rate": 9.994362195961284e-06, | |
| "loss": 0.2473, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.7310215557638238, | |
| "grad_norm": 1.2129027843475342, | |
| "learning_rate": 9.994288977727013e-06, | |
| "loss": 0.2302, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.7403936269915652, | |
| "grad_norm": 1.109953761100769, | |
| "learning_rate": 9.994215759492745e-06, | |
| "loss": 0.2264, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.7497656982193065, | |
| "grad_norm": 1.443888545036316, | |
| "learning_rate": 9.994142541258476e-06, | |
| "loss": 0.2372, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.7591377694470478, | |
| "grad_norm": 1.3083347082138062, | |
| "learning_rate": 9.994069323024207e-06, | |
| "loss": 0.2417, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.7685098406747891, | |
| "grad_norm": 1.0919073820114136, | |
| "learning_rate": 9.993996104789938e-06, | |
| "loss": 0.2331, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.7778819119025304, | |
| "grad_norm": 1.3770041465759277, | |
| "learning_rate": 9.993922886555668e-06, | |
| "loss": 0.2692, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.7872539831302718, | |
| "grad_norm": 1.2099621295928955, | |
| "learning_rate": 9.993849668321399e-06, | |
| "loss": 0.2279, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.7966260543580131, | |
| "grad_norm": 1.1606112718582153, | |
| "learning_rate": 9.99377645008713e-06, | |
| "loss": 0.2474, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.8059981255857545, | |
| "grad_norm": 1.472863793373108, | |
| "learning_rate": 9.993703231852862e-06, | |
| "loss": 0.2298, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.8153701968134958, | |
| "grad_norm": 1.2455284595489502, | |
| "learning_rate": 9.993630013618593e-06, | |
| "loss": 0.2371, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.8247422680412371, | |
| "grad_norm": 1.3777674436569214, | |
| "learning_rate": 9.993556795384324e-06, | |
| "loss": 0.2434, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.8341143392689785, | |
| "grad_norm": 0.9551514983177185, | |
| "learning_rate": 9.993483577150055e-06, | |
| "loss": 0.2074, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.8434864104967198, | |
| "grad_norm": 1.0588115453720093, | |
| "learning_rate": 9.993410358915785e-06, | |
| "loss": 0.2162, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.8528584817244611, | |
| "grad_norm": 1.3450068235397339, | |
| "learning_rate": 9.993337140681516e-06, | |
| "loss": 0.2272, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.8622305529522024, | |
| "grad_norm": 1.6997965574264526, | |
| "learning_rate": 9.993263922447247e-06, | |
| "loss": 0.2315, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.8716026241799437, | |
| "grad_norm": 1.2186520099639893, | |
| "learning_rate": 9.993190704212978e-06, | |
| "loss": 0.2426, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.8809746954076851, | |
| "grad_norm": 1.0515309572219849, | |
| "learning_rate": 9.99311748597871e-06, | |
| "loss": 0.2328, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.8903467666354264, | |
| "grad_norm": 1.29239821434021, | |
| "learning_rate": 9.993044267744439e-06, | |
| "loss": 0.2263, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.8997188378631678, | |
| "grad_norm": 1.7695139646530151, | |
| "learning_rate": 9.99297104951017e-06, | |
| "loss": 0.2466, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 1.359837293624878, | |
| "learning_rate": 9.992897831275902e-06, | |
| "loss": 0.2215, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.9184629803186504, | |
| "grad_norm": 1.2525417804718018, | |
| "learning_rate": 9.992824613041633e-06, | |
| "loss": 0.2295, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.9278350515463918, | |
| "grad_norm": 1.2337384223937988, | |
| "learning_rate": 9.992751394807364e-06, | |
| "loss": 0.2101, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.9372071227741331, | |
| "grad_norm": 1.1121580600738525, | |
| "learning_rate": 9.992678176573095e-06, | |
| "loss": 0.2455, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9372071227741331, | |
| "eval_loss": 0.09235719591379166, | |
| "eval_pearson_cosine": 0.7676932215690613, | |
| "eval_pearson_dot": 0.6569437980651855, | |
| "eval_pearson_euclidean": 0.7712024450302124, | |
| "eval_pearson_manhattan": 0.7713895440101624, | |
| "eval_runtime": 21.9039, | |
| "eval_samples_per_second": 68.481, | |
| "eval_spearman_cosine": 0.7780572781571132, | |
| "eval_spearman_dot": 0.6557682135268442, | |
| "eval_spearman_euclidean": 0.7775782712174545, | |
| "eval_spearman_manhattan": 0.7778181970888292, | |
| "eval_steps_per_second": 8.583, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9465791940018744, | |
| "grad_norm": 1.1828556060791016, | |
| "learning_rate": 9.992604958338825e-06, | |
| "loss": 0.2168, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.9559512652296157, | |
| "grad_norm": 1.2189664840698242, | |
| "learning_rate": 9.992531740104556e-06, | |
| "loss": 0.2072, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.9653233364573571, | |
| "grad_norm": 1.6102409362792969, | |
| "learning_rate": 9.992458521870287e-06, | |
| "loss": 0.2228, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.9746954076850984, | |
| "grad_norm": 1.6891916990280151, | |
| "learning_rate": 9.99238530363602e-06, | |
| "loss": 0.2404, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.9840674789128397, | |
| "grad_norm": 1.2274008989334106, | |
| "learning_rate": 9.99231208540175e-06, | |
| "loss": 0.2225, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.993439550140581, | |
| "grad_norm": 1.2388169765472412, | |
| "learning_rate": 9.992238867167479e-06, | |
| "loss": 0.2215, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.0028116213683225, | |
| "grad_norm": 1.2347650527954102, | |
| "learning_rate": 9.992165648933211e-06, | |
| "loss": 0.2239, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.0121836925960637, | |
| "grad_norm": 1.1266793012619019, | |
| "learning_rate": 9.992092430698942e-06, | |
| "loss": 0.1932, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.021555763823805, | |
| "grad_norm": 1.5187146663665771, | |
| "learning_rate": 9.992019212464673e-06, | |
| "loss": 0.205, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.0309278350515463, | |
| "grad_norm": 1.4463717937469482, | |
| "learning_rate": 9.991945994230404e-06, | |
| "loss": 0.1818, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.0402999062792877, | |
| "grad_norm": 1.6186790466308594, | |
| "learning_rate": 9.991872775996136e-06, | |
| "loss": 0.2076, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.0496719775070291, | |
| "grad_norm": 1.3895883560180664, | |
| "learning_rate": 9.991799557761865e-06, | |
| "loss": 0.2096, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.0590440487347703, | |
| "grad_norm": 1.296912670135498, | |
| "learning_rate": 9.991726339527596e-06, | |
| "loss": 0.2046, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.0684161199625117, | |
| "grad_norm": 1.5527839660644531, | |
| "learning_rate": 9.991653121293328e-06, | |
| "loss": 0.1972, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.077788191190253, | |
| "grad_norm": 1.4777096509933472, | |
| "learning_rate": 9.99157990305906e-06, | |
| "loss": 0.2086, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.0871602624179943, | |
| "grad_norm": 1.3155533075332642, | |
| "learning_rate": 9.99150668482479e-06, | |
| "loss": 0.1969, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.0965323336457358, | |
| "grad_norm": 1.5277265310287476, | |
| "learning_rate": 9.99143346659052e-06, | |
| "loss": 0.1923, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.105904404873477, | |
| "grad_norm": 1.3764179944992065, | |
| "learning_rate": 9.991360248356251e-06, | |
| "loss": 0.1916, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.1152764761012184, | |
| "grad_norm": 1.6024688482284546, | |
| "learning_rate": 9.991287030121982e-06, | |
| "loss": 0.185, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.1246485473289598, | |
| "grad_norm": 1.2752821445465088, | |
| "learning_rate": 9.991213811887713e-06, | |
| "loss": 0.1829, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.134020618556701, | |
| "grad_norm": 1.4704368114471436, | |
| "learning_rate": 9.991140593653444e-06, | |
| "loss": 0.2006, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.1433926897844424, | |
| "grad_norm": 1.3614213466644287, | |
| "learning_rate": 9.991067375419176e-06, | |
| "loss": 0.1776, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.1527647610121836, | |
| "grad_norm": 1.2852075099945068, | |
| "learning_rate": 9.990994157184905e-06, | |
| "loss": 0.2116, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.162136832239925, | |
| "grad_norm": 1.1774332523345947, | |
| "learning_rate": 9.990920938950636e-06, | |
| "loss": 0.1909, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.1715089034676662, | |
| "grad_norm": 1.0442605018615723, | |
| "learning_rate": 9.990847720716368e-06, | |
| "loss": 0.1933, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.1715089034676662, | |
| "eval_loss": 0.08017747104167938, | |
| "eval_pearson_cosine": 0.7703680992126465, | |
| "eval_pearson_dot": 0.6808142066001892, | |
| "eval_pearson_euclidean": 0.7676056623458862, | |
| "eval_pearson_manhattan": 0.7677772045135498, | |
| "eval_runtime": 22.1599, | |
| "eval_samples_per_second": 67.69, | |
| "eval_spearman_cosine": 0.7790172740054649, | |
| "eval_spearman_dot": 0.6796557194170769, | |
| "eval_spearman_euclidean": 0.7739566900498013, | |
| "eval_spearman_manhattan": 0.7741509176342483, | |
| "eval_steps_per_second": 8.484, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.1808809746954076, | |
| "grad_norm": 1.3561466932296753, | |
| "learning_rate": 9.990774502482099e-06, | |
| "loss": 0.1921, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.190253045923149, | |
| "grad_norm": 1.2151105403900146, | |
| "learning_rate": 9.99070128424783e-06, | |
| "loss": 0.1865, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.1996251171508903, | |
| "grad_norm": 1.4363489151000977, | |
| "learning_rate": 9.99062806601356e-06, | |
| "loss": 0.2071, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.2089971883786317, | |
| "grad_norm": 1.1078994274139404, | |
| "learning_rate": 9.990554847779291e-06, | |
| "loss": 0.1984, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.218369259606373, | |
| "grad_norm": 1.4608142375946045, | |
| "learning_rate": 9.990481629545022e-06, | |
| "loss": 0.1926, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.2277413308341143, | |
| "grad_norm": 1.5290361642837524, | |
| "learning_rate": 9.990408411310753e-06, | |
| "loss": 0.1935, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.2371134020618557, | |
| "grad_norm": 1.09344482421875, | |
| "learning_rate": 9.990335193076485e-06, | |
| "loss": 0.2026, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.246485473289597, | |
| "grad_norm": 1.5567576885223389, | |
| "learning_rate": 9.990261974842216e-06, | |
| "loss": 0.1968, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.2558575445173383, | |
| "grad_norm": 1.243221402168274, | |
| "learning_rate": 9.990188756607947e-06, | |
| "loss": 0.1859, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.2652296157450795, | |
| "grad_norm": 1.5287493467330933, | |
| "learning_rate": 9.990115538373678e-06, | |
| "loss": 0.2067, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.274601686972821, | |
| "grad_norm": 1.1587677001953125, | |
| "learning_rate": 9.990042320139408e-06, | |
| "loss": 0.1848, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.2839737582005624, | |
| "grad_norm": 1.3521069288253784, | |
| "learning_rate": 9.989969101905139e-06, | |
| "loss": 0.1975, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.2933458294283038, | |
| "grad_norm": 1.1655584573745728, | |
| "learning_rate": 9.98989588367087e-06, | |
| "loss": 0.1963, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.302717900656045, | |
| "grad_norm": 1.1636890172958374, | |
| "learning_rate": 9.989822665436602e-06, | |
| "loss": 0.1768, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.3120899718837864, | |
| "grad_norm": 1.3106030225753784, | |
| "learning_rate": 9.989749447202333e-06, | |
| "loss": 0.1918, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.3214620431115276, | |
| "grad_norm": 1.314274787902832, | |
| "learning_rate": 9.989676228968062e-06, | |
| "loss": 0.1733, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.330834114339269, | |
| "grad_norm": 1.646234393119812, | |
| "learning_rate": 9.989603010733795e-06, | |
| "loss": 0.1797, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.3402061855670104, | |
| "grad_norm": 1.3321646451950073, | |
| "learning_rate": 9.989529792499525e-06, | |
| "loss": 0.1726, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.3495782567947516, | |
| "grad_norm": 1.3959871530532837, | |
| "learning_rate": 9.989456574265256e-06, | |
| "loss": 0.1889, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.358950328022493, | |
| "grad_norm": 1.1790053844451904, | |
| "learning_rate": 9.989383356030987e-06, | |
| "loss": 0.1779, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.3683223992502342, | |
| "grad_norm": 1.7612881660461426, | |
| "learning_rate": 9.989310137796718e-06, | |
| "loss": 0.1834, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.3776944704779757, | |
| "grad_norm": 1.2366232872009277, | |
| "learning_rate": 9.989236919562448e-06, | |
| "loss": 0.1996, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.387066541705717, | |
| "grad_norm": 1.550465703010559, | |
| "learning_rate": 9.989163701328179e-06, | |
| "loss": 0.1991, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.3964386129334583, | |
| "grad_norm": 1.2935107946395874, | |
| "learning_rate": 9.98909048309391e-06, | |
| "loss": 0.1956, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.4058106841611997, | |
| "grad_norm": 0.9709776639938354, | |
| "learning_rate": 9.989017264859642e-06, | |
| "loss": 0.1872, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.4058106841611997, | |
| "eval_loss": 0.07902642339468002, | |
| "eval_pearson_cosine": 0.7684531211853027, | |
| "eval_pearson_dot": 0.6580111980438232, | |
| "eval_pearson_euclidean": 0.768983006477356, | |
| "eval_pearson_manhattan": 0.7692690491676331, | |
| "eval_runtime": 23.5462, | |
| "eval_samples_per_second": 63.704, | |
| "eval_spearman_cosine": 0.7777241764238451, | |
| "eval_spearman_dot": 0.6568945327389543, | |
| "eval_spearman_euclidean": 0.7752386276211667, | |
| "eval_spearman_manhattan": 0.7755204438878311, | |
| "eval_steps_per_second": 7.984, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.415182755388941, | |
| "grad_norm": 1.5001726150512695, | |
| "learning_rate": 9.988944046625373e-06, | |
| "loss": 0.2094, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.4245548266166823, | |
| "grad_norm": 1.1697657108306885, | |
| "learning_rate": 9.988870828391102e-06, | |
| "loss": 0.1862, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.4339268978444237, | |
| "grad_norm": 1.3496723175048828, | |
| "learning_rate": 9.988797610156834e-06, | |
| "loss": 0.1863, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.443298969072165, | |
| "grad_norm": 1.3314088582992554, | |
| "learning_rate": 9.988724391922565e-06, | |
| "loss": 0.1809, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.4526710402999063, | |
| "grad_norm": 1.2966681718826294, | |
| "learning_rate": 9.988651173688296e-06, | |
| "loss": 0.1799, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.4620431115276475, | |
| "grad_norm": 1.141318917274475, | |
| "learning_rate": 9.988577955454027e-06, | |
| "loss": 0.1983, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.471415182755389, | |
| "grad_norm": 1.1170287132263184, | |
| "learning_rate": 9.98850473721976e-06, | |
| "loss": 0.1823, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.4807872539831304, | |
| "grad_norm": 1.4531837701797485, | |
| "learning_rate": 9.988431518985488e-06, | |
| "loss": 0.1693, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.4901593252108716, | |
| "grad_norm": 1.5249556303024292, | |
| "learning_rate": 9.988358300751219e-06, | |
| "loss": 0.2014, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.499531396438613, | |
| "grad_norm": 1.319170594215393, | |
| "learning_rate": 9.988285082516951e-06, | |
| "loss": 0.1841, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.5089034676663542, | |
| "grad_norm": 1.2907928228378296, | |
| "learning_rate": 9.988211864282682e-06, | |
| "loss": 0.1778, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.5182755388940956, | |
| "grad_norm": 1.170284628868103, | |
| "learning_rate": 9.988138646048413e-06, | |
| "loss": 0.1668, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.527647610121837, | |
| "grad_norm": 1.4182498455047607, | |
| "learning_rate": 9.988065427814144e-06, | |
| "loss": 0.1968, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.5370196813495782, | |
| "grad_norm": 1.3137290477752686, | |
| "learning_rate": 9.987992209579874e-06, | |
| "loss": 0.1734, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.5463917525773194, | |
| "grad_norm": 1.458721399307251, | |
| "learning_rate": 9.987918991345605e-06, | |
| "loss": 0.209, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.5557638238050608, | |
| "grad_norm": 1.1368082761764526, | |
| "learning_rate": 9.987845773111336e-06, | |
| "loss": 0.1831, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.5651358950328023, | |
| "grad_norm": 1.0743663311004639, | |
| "learning_rate": 9.987772554877068e-06, | |
| "loss": 0.1883, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.5745079662605437, | |
| "grad_norm": 1.4294681549072266, | |
| "learning_rate": 9.987699336642799e-06, | |
| "loss": 0.1851, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.5838800374882849, | |
| "grad_norm": 1.0537577867507935, | |
| "learning_rate": 9.987626118408528e-06, | |
| "loss": 0.1818, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.5932521087160263, | |
| "grad_norm": 1.3930073976516724, | |
| "learning_rate": 9.98755290017426e-06, | |
| "loss": 0.1876, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.6026241799437675, | |
| "grad_norm": 1.3290959596633911, | |
| "learning_rate": 9.987479681939991e-06, | |
| "loss": 0.1777, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.611996251171509, | |
| "grad_norm": 1.3895900249481201, | |
| "learning_rate": 9.987406463705722e-06, | |
| "loss": 0.1728, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.6213683223992503, | |
| "grad_norm": 1.336679220199585, | |
| "learning_rate": 9.987333245471453e-06, | |
| "loss": 0.202, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.6307403936269915, | |
| "grad_norm": 1.4338617324829102, | |
| "learning_rate": 9.987260027237184e-06, | |
| "loss": 0.1745, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.640112464854733, | |
| "grad_norm": 1.1854125261306763, | |
| "learning_rate": 9.987186809002914e-06, | |
| "loss": 0.1628, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.640112464854733, | |
| "eval_loss": 0.07191870361566544, | |
| "eval_pearson_cosine": 0.7651911973953247, | |
| "eval_pearson_dot": 0.6584045886993408, | |
| "eval_pearson_euclidean": 0.7615811228752136, | |
| "eval_pearson_manhattan": 0.7618914842605591, | |
| "eval_runtime": 22.2177, | |
| "eval_samples_per_second": 67.514, | |
| "eval_spearman_cosine": 0.7733826669765486, | |
| "eval_spearman_dot": 0.6574446699366203, | |
| "eval_spearman_euclidean": 0.7678793093449918, | |
| "eval_spearman_manhattan": 0.7684997409854779, | |
| "eval_steps_per_second": 8.462, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.6494845360824741, | |
| "grad_norm": 1.468126654624939, | |
| "learning_rate": 9.987113590768645e-06, | |
| "loss": 0.1714, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.6588566073102156, | |
| "grad_norm": 1.3639568090438843, | |
| "learning_rate": 9.987040372534378e-06, | |
| "loss": 0.1839, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.668228678537957, | |
| "grad_norm": 1.2494312524795532, | |
| "learning_rate": 9.986967154300108e-06, | |
| "loss": 0.1753, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.6776007497656982, | |
| "grad_norm": 1.2897909879684448, | |
| "learning_rate": 9.986893936065839e-06, | |
| "loss": 0.1704, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.6869728209934396, | |
| "grad_norm": 1.413866400718689, | |
| "learning_rate": 9.98682071783157e-06, | |
| "loss": 0.1868, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.6963448922211808, | |
| "grad_norm": 1.093849778175354, | |
| "learning_rate": 9.9867474995973e-06, | |
| "loss": 0.1889, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.7057169634489222, | |
| "grad_norm": 1.3857814073562622, | |
| "learning_rate": 9.986674281363031e-06, | |
| "loss": 0.1818, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.7150890346766636, | |
| "grad_norm": 1.3772344589233398, | |
| "learning_rate": 9.986601063128762e-06, | |
| "loss": 0.1683, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.7244611059044048, | |
| "grad_norm": 1.3299206495285034, | |
| "learning_rate": 9.986527844894493e-06, | |
| "loss": 0.1865, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.7338331771321462, | |
| "grad_norm": 1.3139843940734863, | |
| "learning_rate": 9.986454626660225e-06, | |
| "loss": 0.169, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.7432052483598874, | |
| "grad_norm": 1.3562296628952026, | |
| "learning_rate": 9.986381408425954e-06, | |
| "loss": 0.2012, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.7525773195876289, | |
| "grad_norm": 1.2332826852798462, | |
| "learning_rate": 9.986308190191685e-06, | |
| "loss": 0.1877, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.7619493908153703, | |
| "grad_norm": 1.083622932434082, | |
| "learning_rate": 9.986234971957418e-06, | |
| "loss": 0.2026, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.7713214620431117, | |
| "grad_norm": 1.6391818523406982, | |
| "learning_rate": 9.986161753723148e-06, | |
| "loss": 0.1902, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.780693533270853, | |
| "grad_norm": 1.0985593795776367, | |
| "learning_rate": 9.986088535488879e-06, | |
| "loss": 0.1845, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.790065604498594, | |
| "grad_norm": 1.609025001525879, | |
| "learning_rate": 9.98601531725461e-06, | |
| "loss": 0.1939, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.7994376757263355, | |
| "grad_norm": 1.0637205839157104, | |
| "learning_rate": 9.98594209902034e-06, | |
| "loss": 0.1775, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.808809746954077, | |
| "grad_norm": 1.159469723701477, | |
| "learning_rate": 9.985868880786071e-06, | |
| "loss": 0.161, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 1.1251918077468872, | |
| "learning_rate": 9.985795662551802e-06, | |
| "loss": 0.1965, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.8275538894095595, | |
| "grad_norm": 1.3804899454116821, | |
| "learning_rate": 9.985722444317534e-06, | |
| "loss": 0.1768, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.8369259606373007, | |
| "grad_norm": 1.194275140762329, | |
| "learning_rate": 9.985649226083265e-06, | |
| "loss": 0.1782, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.8462980318650422, | |
| "grad_norm": 1.5173845291137695, | |
| "learning_rate": 9.985576007848996e-06, | |
| "loss": 0.193, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.8556701030927836, | |
| "grad_norm": 1.7733920812606812, | |
| "learning_rate": 9.985502789614727e-06, | |
| "loss": 0.1804, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.865042174320525, | |
| "grad_norm": 1.1430355310440063, | |
| "learning_rate": 9.985429571380457e-06, | |
| "loss": 0.1869, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.8744142455482662, | |
| "grad_norm": 1.3633067607879639, | |
| "learning_rate": 9.985356353146188e-06, | |
| "loss": 0.1983, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.8744142455482662, | |
| "eval_loss": 0.07371454685926437, | |
| "eval_pearson_cosine": 0.7772414684295654, | |
| "eval_pearson_dot": 0.660416841506958, | |
| "eval_pearson_euclidean": 0.7648824453353882, | |
| "eval_pearson_manhattan": 0.7654331922531128, | |
| "eval_runtime": 22.1973, | |
| "eval_samples_per_second": 67.576, | |
| "eval_spearman_cosine": 0.7863920785446639, | |
| "eval_spearman_dot": 0.6607574545837009, | |
| "eval_spearman_euclidean": 0.7740511645049805, | |
| "eval_spearman_manhattan": 0.7747616492851076, | |
| "eval_steps_per_second": 8.47, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.8837863167760074, | |
| "grad_norm": 1.116107702255249, | |
| "learning_rate": 9.985283134911919e-06, | |
| "loss": 0.1775, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.8931583880037488, | |
| "grad_norm": 1.280927300453186, | |
| "learning_rate": 9.985209916677651e-06, | |
| "loss": 0.1853, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.9025304592314902, | |
| "grad_norm": 1.419044852256775, | |
| "learning_rate": 9.98513669844338e-06, | |
| "loss": 0.1767, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.9119025304592316, | |
| "grad_norm": 1.4140015840530396, | |
| "learning_rate": 9.985063480209111e-06, | |
| "loss": 0.1968, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.9212746016869728, | |
| "grad_norm": 1.23015296459198, | |
| "learning_rate": 9.984990261974844e-06, | |
| "loss": 0.1559, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.930646672914714, | |
| "grad_norm": 1.4209731817245483, | |
| "learning_rate": 9.984917043740574e-06, | |
| "loss": 0.18, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.9400187441424555, | |
| "grad_norm": 1.5270899534225464, | |
| "learning_rate": 9.984843825506305e-06, | |
| "loss": 0.1858, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.9493908153701969, | |
| "grad_norm": 2.0037920475006104, | |
| "learning_rate": 9.984770607272036e-06, | |
| "loss": 0.1812, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.9587628865979383, | |
| "grad_norm": 1.4397103786468506, | |
| "learning_rate": 9.984697389037767e-06, | |
| "loss": 0.1853, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.9681349578256795, | |
| "grad_norm": 1.555161476135254, | |
| "learning_rate": 9.984624170803497e-06, | |
| "loss": 0.1758, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.9775070290534207, | |
| "grad_norm": 1.1453354358673096, | |
| "learning_rate": 9.984550952569228e-06, | |
| "loss": 0.1821, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.986879100281162, | |
| "grad_norm": 1.3050484657287598, | |
| "learning_rate": 9.984477734334959e-06, | |
| "loss": 0.1828, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.9962511715089035, | |
| "grad_norm": 1.1858463287353516, | |
| "learning_rate": 9.984404516100691e-06, | |
| "loss": 0.1801, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 2.005623242736645, | |
| "grad_norm": 1.2467753887176514, | |
| "learning_rate": 9.984331297866422e-06, | |
| "loss": 0.1651, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 2.014995313964386, | |
| "grad_norm": 1.9730074405670166, | |
| "learning_rate": 9.984258079632151e-06, | |
| "loss": 0.1654, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.0243673851921273, | |
| "grad_norm": 1.384181261062622, | |
| "learning_rate": 9.984184861397884e-06, | |
| "loss": 0.151, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 2.0337394564198688, | |
| "grad_norm": 1.2262136936187744, | |
| "learning_rate": 9.984111643163614e-06, | |
| "loss": 0.1338, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 2.04311152764761, | |
| "grad_norm": 1.3417856693267822, | |
| "learning_rate": 9.984038424929345e-06, | |
| "loss": 0.1445, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 2.0524835988753516, | |
| "grad_norm": 1.3032526969909668, | |
| "learning_rate": 9.983965206695076e-06, | |
| "loss": 0.1675, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 2.0618556701030926, | |
| "grad_norm": 1.4586397409439087, | |
| "learning_rate": 9.983891988460808e-06, | |
| "loss": 0.1503, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.071227741330834, | |
| "grad_norm": 1.8017582893371582, | |
| "learning_rate": 9.983818770226537e-06, | |
| "loss": 0.1614, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 2.0805998125585754, | |
| "grad_norm": 1.1136542558670044, | |
| "learning_rate": 9.983745551992268e-06, | |
| "loss": 0.1385, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 2.089971883786317, | |
| "grad_norm": 1.48130202293396, | |
| "learning_rate": 9.983672333758e-06, | |
| "loss": 0.1448, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 2.0993439550140582, | |
| "grad_norm": 1.1847114562988281, | |
| "learning_rate": 9.983599115523731e-06, | |
| "loss": 0.1263, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 2.108716026241799, | |
| "grad_norm": 1.068515419960022, | |
| "learning_rate": 9.983525897289462e-06, | |
| "loss": 0.1448, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.108716026241799, | |
| "eval_loss": 0.0637284442782402, | |
| "eval_pearson_cosine": 0.766581654548645, | |
| "eval_pearson_dot": 0.652958333492279, | |
| "eval_pearson_euclidean": 0.76385897397995, | |
| "eval_pearson_manhattan": 0.7643536329269409, | |
| "eval_runtime": 24.9836, | |
| "eval_samples_per_second": 60.039, | |
| "eval_spearman_cosine": 0.7736502023043434, | |
| "eval_spearman_dot": 0.6506365364740643, | |
| "eval_spearman_euclidean": 0.7701725336122238, | |
| "eval_spearman_manhattan": 0.7705851416924343, | |
| "eval_steps_per_second": 7.525, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.1180880974695406, | |
| "grad_norm": 1.2607600688934326, | |
| "learning_rate": 9.983452679055193e-06, | |
| "loss": 0.1405, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 2.127460168697282, | |
| "grad_norm": 1.3096617460250854, | |
| "learning_rate": 9.983379460820924e-06, | |
| "loss": 0.159, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 2.1368322399250235, | |
| "grad_norm": 1.4220956563949585, | |
| "learning_rate": 9.983306242586654e-06, | |
| "loss": 0.1634, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 2.146204311152765, | |
| "grad_norm": 1.5565595626831055, | |
| "learning_rate": 9.983233024352385e-06, | |
| "loss": 0.1549, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 2.155576382380506, | |
| "grad_norm": 1.357906460762024, | |
| "learning_rate": 9.983159806118118e-06, | |
| "loss": 0.1503, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.1649484536082473, | |
| "grad_norm": 1.0181514024734497, | |
| "learning_rate": 9.983086587883848e-06, | |
| "loss": 0.1242, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 2.1743205248359887, | |
| "grad_norm": 1.2936785221099854, | |
| "learning_rate": 9.983013369649577e-06, | |
| "loss": 0.1516, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 2.18369259606373, | |
| "grad_norm": 1.353125810623169, | |
| "learning_rate": 9.98294015141531e-06, | |
| "loss": 0.1576, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 2.1930646672914715, | |
| "grad_norm": 1.5978926420211792, | |
| "learning_rate": 9.98286693318104e-06, | |
| "loss": 0.143, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 2.2024367385192125, | |
| "grad_norm": 1.643609642982483, | |
| "learning_rate": 9.982793714946771e-06, | |
| "loss": 0.1509, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.211808809746954, | |
| "grad_norm": 1.2868740558624268, | |
| "learning_rate": 9.982720496712502e-06, | |
| "loss": 0.1407, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 2.2211808809746953, | |
| "grad_norm": 1.662234902381897, | |
| "learning_rate": 9.982647278478233e-06, | |
| "loss": 0.1499, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 2.2305529522024368, | |
| "grad_norm": 1.7390748262405396, | |
| "learning_rate": 9.982574060243964e-06, | |
| "loss": 0.139, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 2.239925023430178, | |
| "grad_norm": 1.2645044326782227, | |
| "learning_rate": 9.982500842009694e-06, | |
| "loss": 0.1541, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 2.2492970946579196, | |
| "grad_norm": 1.5143808126449585, | |
| "learning_rate": 9.982427623775425e-06, | |
| "loss": 0.15, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.2586691658856606, | |
| "grad_norm": 1.516233205795288, | |
| "learning_rate": 9.982354405541158e-06, | |
| "loss": 0.1387, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 2.268041237113402, | |
| "grad_norm": 1.607926368713379, | |
| "learning_rate": 9.982281187306888e-06, | |
| "loss": 0.1459, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 2.2774133083411434, | |
| "grad_norm": 1.433325171470642, | |
| "learning_rate": 9.982207969072617e-06, | |
| "loss": 0.145, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 2.286785379568885, | |
| "grad_norm": 1.4051145315170288, | |
| "learning_rate": 9.98213475083835e-06, | |
| "loss": 0.1433, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 2.296157450796626, | |
| "grad_norm": 1.5076231956481934, | |
| "learning_rate": 9.98206153260408e-06, | |
| "loss": 0.1514, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.3055295220243672, | |
| "grad_norm": 1.185927152633667, | |
| "learning_rate": 9.981988314369811e-06, | |
| "loss": 0.1315, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 2.3149015932521086, | |
| "grad_norm": 1.1687299013137817, | |
| "learning_rate": 9.981915096135542e-06, | |
| "loss": 0.1611, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 2.32427366447985, | |
| "grad_norm": 1.205338716506958, | |
| "learning_rate": 9.981841877901274e-06, | |
| "loss": 0.1587, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 2.3336457357075915, | |
| "grad_norm": 1.1079684495925903, | |
| "learning_rate": 9.981768659667004e-06, | |
| "loss": 0.142, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 2.3430178069353325, | |
| "grad_norm": 1.1689645051956177, | |
| "learning_rate": 9.981695441432734e-06, | |
| "loss": 0.1449, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.3430178069353325, | |
| "eval_loss": 0.05785529315471649, | |
| "eval_pearson_cosine": 0.7640599012374878, | |
| "eval_pearson_dot": 0.6659318208694458, | |
| "eval_pearson_euclidean": 0.7584241628646851, | |
| "eval_pearson_manhattan": 0.7589800357818604, | |
| "eval_runtime": 27.3942, | |
| "eval_samples_per_second": 54.756, | |
| "eval_spearman_cosine": 0.7698402659202235, | |
| "eval_spearman_dot": 0.6637382071207051, | |
| "eval_spearman_euclidean": 0.765183939076614, | |
| "eval_spearman_manhattan": 0.7654494135153407, | |
| "eval_steps_per_second": 6.863, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.352389878163074, | |
| "grad_norm": 1.1410503387451172, | |
| "learning_rate": 9.981622223198467e-06, | |
| "loss": 0.1253, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 2.3617619493908153, | |
| "grad_norm": 1.6562408208847046, | |
| "learning_rate": 9.981549004964197e-06, | |
| "loss": 0.1363, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 2.3711340206185567, | |
| "grad_norm": 1.3503327369689941, | |
| "learning_rate": 9.981475786729928e-06, | |
| "loss": 0.141, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 2.380506091846298, | |
| "grad_norm": 1.4653688669204712, | |
| "learning_rate": 9.981402568495659e-06, | |
| "loss": 0.1452, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 2.3898781630740396, | |
| "grad_norm": 1.4135221242904663, | |
| "learning_rate": 9.98132935026139e-06, | |
| "loss": 0.1387, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 2.3992502343017805, | |
| "grad_norm": 1.1758474111557007, | |
| "learning_rate": 9.98125613202712e-06, | |
| "loss": 0.1402, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 2.408622305529522, | |
| "grad_norm": 1.6394227743148804, | |
| "learning_rate": 9.981182913792851e-06, | |
| "loss": 0.1434, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 2.4179943767572634, | |
| "grad_norm": 1.5223402976989746, | |
| "learning_rate": 9.981109695558584e-06, | |
| "loss": 0.1433, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 2.427366447985005, | |
| "grad_norm": 1.3722361326217651, | |
| "learning_rate": 9.981036477324314e-06, | |
| "loss": 0.145, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 2.436738519212746, | |
| "grad_norm": 1.4288251399993896, | |
| "learning_rate": 9.980963259090045e-06, | |
| "loss": 0.1419, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.446110590440487, | |
| "grad_norm": 1.3789891004562378, | |
| "learning_rate": 9.980890040855776e-06, | |
| "loss": 0.1428, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 2.4554826616682286, | |
| "grad_norm": 1.3833218812942505, | |
| "learning_rate": 9.980816822621507e-06, | |
| "loss": 0.163, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 2.46485473289597, | |
| "grad_norm": 1.2749391794204712, | |
| "learning_rate": 9.980743604387237e-06, | |
| "loss": 0.1457, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 2.4742268041237114, | |
| "grad_norm": 1.3677037954330444, | |
| "learning_rate": 9.980670386152968e-06, | |
| "loss": 0.1393, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 2.483598875351453, | |
| "grad_norm": 1.2386823892593384, | |
| "learning_rate": 9.980597167918699e-06, | |
| "loss": 0.1446, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 2.492970946579194, | |
| "grad_norm": 1.6553146839141846, | |
| "learning_rate": 9.98052394968443e-06, | |
| "loss": 0.1399, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 2.5023430178069352, | |
| "grad_norm": 1.2258574962615967, | |
| "learning_rate": 9.98045073145016e-06, | |
| "loss": 0.1557, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 2.5117150890346767, | |
| "grad_norm": 1.1680238246917725, | |
| "learning_rate": 9.980377513215891e-06, | |
| "loss": 0.14, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 2.521087160262418, | |
| "grad_norm": 1.3764533996582031, | |
| "learning_rate": 9.980304294981624e-06, | |
| "loss": 0.1429, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 2.530459231490159, | |
| "grad_norm": 1.1607757806777954, | |
| "learning_rate": 9.980231076747354e-06, | |
| "loss": 0.156, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.539831302717901, | |
| "grad_norm": 1.30258309841156, | |
| "learning_rate": 9.980157858513085e-06, | |
| "loss": 0.1334, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 2.549203373945642, | |
| "grad_norm": 1.3965803384780884, | |
| "learning_rate": 9.980084640278816e-06, | |
| "loss": 0.1532, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 2.5585754451733833, | |
| "grad_norm": 1.2492479085922241, | |
| "learning_rate": 9.980011422044547e-06, | |
| "loss": 0.1538, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 2.5679475164011247, | |
| "grad_norm": 1.5879229307174683, | |
| "learning_rate": 9.979938203810277e-06, | |
| "loss": 0.1393, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 2.5773195876288657, | |
| "grad_norm": 1.5499955415725708, | |
| "learning_rate": 9.979864985576008e-06, | |
| "loss": 0.1443, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 2.5773195876288657, | |
| "eval_loss": 0.059572458267211914, | |
| "eval_pearson_cosine": 0.7583234310150146, | |
| "eval_pearson_dot": 0.6585268378257751, | |
| "eval_pearson_euclidean": 0.7594324946403503, | |
| "eval_pearson_manhattan": 0.7599164843559265, | |
| "eval_runtime": 25.1198, | |
| "eval_samples_per_second": 59.714, | |
| "eval_spearman_cosine": 0.7658877891929784, | |
| "eval_spearman_dot": 0.6550703356470525, | |
| "eval_spearman_euclidean": 0.7651954936870381, | |
| "eval_spearman_manhattan": 0.7656066832066194, | |
| "eval_steps_per_second": 7.484, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 2.5866916588566076, | |
| "grad_norm": 1.1182575225830078, | |
| "learning_rate": 9.97979176734174e-06, | |
| "loss": 0.1449, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 2.5960637300843485, | |
| "grad_norm": 1.3228731155395508, | |
| "learning_rate": 9.979718549107471e-06, | |
| "loss": 0.1339, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 2.60543580131209, | |
| "grad_norm": 1.3763021230697632, | |
| "learning_rate": 9.9796453308732e-06, | |
| "loss": 0.1379, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 2.6148078725398314, | |
| "grad_norm": 1.6708637475967407, | |
| "learning_rate": 9.979572112638933e-06, | |
| "loss": 0.1491, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 2.624179943767573, | |
| "grad_norm": 1.0826717615127563, | |
| "learning_rate": 9.979498894404664e-06, | |
| "loss": 0.1447, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.633552014995314, | |
| "grad_norm": 1.4416155815124512, | |
| "learning_rate": 9.979425676170394e-06, | |
| "loss": 0.1398, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 2.642924086223055, | |
| "grad_norm": 1.3966304063796997, | |
| "learning_rate": 9.979352457936125e-06, | |
| "loss": 0.1332, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 2.6522961574507966, | |
| "grad_norm": 1.5255811214447021, | |
| "learning_rate": 9.979279239701856e-06, | |
| "loss": 0.1423, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 2.661668228678538, | |
| "grad_norm": 1.3866652250289917, | |
| "learning_rate": 9.979206021467587e-06, | |
| "loss": 0.1554, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 2.6710402999062794, | |
| "grad_norm": 1.3477802276611328, | |
| "learning_rate": 9.979132803233317e-06, | |
| "loss": 0.1547, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 2.680412371134021, | |
| "grad_norm": 1.540963053703308, | |
| "learning_rate": 9.97905958499905e-06, | |
| "loss": 0.1229, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 2.689784442361762, | |
| "grad_norm": 1.697350025177002, | |
| "learning_rate": 9.97898636676478e-06, | |
| "loss": 0.153, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 2.6991565135895033, | |
| "grad_norm": 1.6020257472991943, | |
| "learning_rate": 9.978913148530511e-06, | |
| "loss": 0.1334, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 2.7085285848172447, | |
| "grad_norm": 1.7637958526611328, | |
| "learning_rate": 9.978839930296242e-06, | |
| "loss": 0.1513, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 2.717900656044986, | |
| "grad_norm": 1.2917182445526123, | |
| "learning_rate": 9.978766712061973e-06, | |
| "loss": 0.1296, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.7272727272727275, | |
| "grad_norm": 1.42876136302948, | |
| "learning_rate": 9.978693493827704e-06, | |
| "loss": 0.1276, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 2.7366447985004685, | |
| "grad_norm": 1.340184211730957, | |
| "learning_rate": 9.978620275593434e-06, | |
| "loss": 0.164, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 2.74601686972821, | |
| "grad_norm": 1.1638396978378296, | |
| "learning_rate": 9.978547057359165e-06, | |
| "loss": 0.1372, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 2.7553889409559513, | |
| "grad_norm": 1.5060447454452515, | |
| "learning_rate": 9.978473839124897e-06, | |
| "loss": 0.1489, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 2.7647610121836927, | |
| "grad_norm": 1.3632638454437256, | |
| "learning_rate": 9.978400620890627e-06, | |
| "loss": 0.1242, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 2.774133083411434, | |
| "grad_norm": 1.6402980089187622, | |
| "learning_rate": 9.978327402656359e-06, | |
| "loss": 0.1395, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 2.783505154639175, | |
| "grad_norm": 1.8350452184677124, | |
| "learning_rate": 9.97825418442209e-06, | |
| "loss": 0.1501, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 2.7928772258669166, | |
| "grad_norm": 1.6517874002456665, | |
| "learning_rate": 9.97818096618782e-06, | |
| "loss": 0.1596, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 2.802249297094658, | |
| "grad_norm": 1.7441259622573853, | |
| "learning_rate": 9.978107747953551e-06, | |
| "loss": 0.1344, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 2.8116213683223994, | |
| "grad_norm": 1.4474517107009888, | |
| "learning_rate": 9.978034529719282e-06, | |
| "loss": 0.1363, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.8116213683223994, | |
| "eval_loss": 0.05750729516148567, | |
| "eval_pearson_cosine": 0.767126202583313, | |
| "eval_pearson_dot": 0.676889181137085, | |
| "eval_pearson_euclidean": 0.756407618522644, | |
| "eval_pearson_manhattan": 0.7570176124572754, | |
| "eval_runtime": 25.3699, | |
| "eval_samples_per_second": 59.125, | |
| "eval_spearman_cosine": 0.7727339030438767, | |
| "eval_spearman_dot": 0.6755843192398268, | |
| "eval_spearman_euclidean": 0.7624238185076594, | |
| "eval_spearman_manhattan": 0.7629469399526556, | |
| "eval_steps_per_second": 7.41, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.820993439550141, | |
| "grad_norm": 1.4202260971069336, | |
| "learning_rate": 9.977961311485013e-06, | |
| "loss": 0.1456, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 2.830365510777882, | |
| "grad_norm": 1.3678419589996338, | |
| "learning_rate": 9.977888093250743e-06, | |
| "loss": 0.1445, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 2.839737582005623, | |
| "grad_norm": 1.168271541595459, | |
| "learning_rate": 9.977814875016474e-06, | |
| "loss": 0.1428, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 2.8491096532333646, | |
| "grad_norm": 1.5929275751113892, | |
| "learning_rate": 9.977741656782207e-06, | |
| "loss": 0.1593, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 2.858481724461106, | |
| "grad_norm": 1.265101432800293, | |
| "learning_rate": 9.977668438547937e-06, | |
| "loss": 0.1519, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 2.8678537956888475, | |
| "grad_norm": 1.1187818050384521, | |
| "learning_rate": 9.977595220313666e-06, | |
| "loss": 0.1454, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 2.8772258669165884, | |
| "grad_norm": 1.1976639032363892, | |
| "learning_rate": 9.977522002079399e-06, | |
| "loss": 0.1321, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 2.88659793814433, | |
| "grad_norm": 1.7162209749221802, | |
| "learning_rate": 9.97744878384513e-06, | |
| "loss": 0.147, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 2.8959700093720713, | |
| "grad_norm": 1.3301661014556885, | |
| "learning_rate": 9.97737556561086e-06, | |
| "loss": 0.1341, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 2.9053420805998127, | |
| "grad_norm": 1.279984951019287, | |
| "learning_rate": 9.977302347376591e-06, | |
| "loss": 0.1342, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.914714151827554, | |
| "grad_norm": 1.6548879146575928, | |
| "learning_rate": 9.977229129142324e-06, | |
| "loss": 0.1429, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 2.924086223055295, | |
| "grad_norm": 0.9662721753120422, | |
| "learning_rate": 9.977155910908053e-06, | |
| "loss": 0.1524, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 2.9334582942830365, | |
| "grad_norm": 1.5336380004882812, | |
| "learning_rate": 9.977082692673783e-06, | |
| "loss": 0.1445, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 2.942830365510778, | |
| "grad_norm": 1.4380927085876465, | |
| "learning_rate": 9.977009474439516e-06, | |
| "loss": 0.1371, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 2.9522024367385193, | |
| "grad_norm": 1.551700472831726, | |
| "learning_rate": 9.976936256205247e-06, | |
| "loss": 0.135, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 2.9615745079662608, | |
| "grad_norm": 1.32683265209198, | |
| "learning_rate": 9.976863037970977e-06, | |
| "loss": 0.1444, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 2.9709465791940017, | |
| "grad_norm": 1.3574503660202026, | |
| "learning_rate": 9.976789819736708e-06, | |
| "loss": 0.1391, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 2.980318650421743, | |
| "grad_norm": 1.506625771522522, | |
| "learning_rate": 9.976716601502439e-06, | |
| "loss": 0.1552, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 2.9896907216494846, | |
| "grad_norm": 1.3970105648040771, | |
| "learning_rate": 9.97664338326817e-06, | |
| "loss": 0.147, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 2.999062792877226, | |
| "grad_norm": 1.4303011894226074, | |
| "learning_rate": 9.9765701650339e-06, | |
| "loss": 0.1559, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 3.0084348641049674, | |
| "grad_norm": 1.377488613128662, | |
| "learning_rate": 9.976496946799633e-06, | |
| "loss": 0.1187, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 3.0178069353327084, | |
| "grad_norm": 1.1664360761642456, | |
| "learning_rate": 9.976423728565364e-06, | |
| "loss": 0.1101, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 3.02717900656045, | |
| "grad_norm": 0.9129014015197754, | |
| "learning_rate": 9.976350510331093e-06, | |
| "loss": 0.111, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 3.036551077788191, | |
| "grad_norm": 1.2628843784332275, | |
| "learning_rate": 9.976277292096825e-06, | |
| "loss": 0.1141, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 3.0459231490159326, | |
| "grad_norm": 1.1534360647201538, | |
| "learning_rate": 9.976204073862556e-06, | |
| "loss": 0.1227, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 3.0459231490159326, | |
| "eval_loss": 0.051736850291490555, | |
| "eval_pearson_cosine": 0.763727605342865, | |
| "eval_pearson_dot": 0.673626720905304, | |
| "eval_pearson_euclidean": 0.756030797958374, | |
| "eval_pearson_manhattan": 0.7567305564880371, | |
| "eval_runtime": 21.997, | |
| "eval_samples_per_second": 68.191, | |
| "eval_spearman_cosine": 0.7669834916269708, | |
| "eval_spearman_dot": 0.6714383880600381, | |
| "eval_spearman_euclidean": 0.7611960037220876, | |
| "eval_spearman_manhattan": 0.7615680957541558, | |
| "eval_steps_per_second": 8.547, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 3.055295220243674, | |
| "grad_norm": 1.4779927730560303, | |
| "learning_rate": 9.976130855628287e-06, | |
| "loss": 0.1186, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 3.064667291471415, | |
| "grad_norm": 1.2425293922424316, | |
| "learning_rate": 9.976057637394017e-06, | |
| "loss": 0.1213, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 3.0740393626991565, | |
| "grad_norm": 1.6161679029464722, | |
| "learning_rate": 9.975984419159748e-06, | |
| "loss": 0.1127, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 3.083411433926898, | |
| "grad_norm": 1.199263334274292, | |
| "learning_rate": 9.975911200925479e-06, | |
| "loss": 0.0971, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 3.0927835051546393, | |
| "grad_norm": 1.5749520063400269, | |
| "learning_rate": 9.97583798269121e-06, | |
| "loss": 0.1162, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 3.1021555763823807, | |
| "grad_norm": 1.558112382888794, | |
| "learning_rate": 9.97576476445694e-06, | |
| "loss": 0.125, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 3.1115276476101217, | |
| "grad_norm": 1.5197752714157104, | |
| "learning_rate": 9.975691546222673e-06, | |
| "loss": 0.1199, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 3.120899718837863, | |
| "grad_norm": 1.1978933811187744, | |
| "learning_rate": 9.975618327988404e-06, | |
| "loss": 0.0975, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 3.1302717900656045, | |
| "grad_norm": 1.0790154933929443, | |
| "learning_rate": 9.975545109754134e-06, | |
| "loss": 0.1078, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 3.139643861293346, | |
| "grad_norm": 1.7810611724853516, | |
| "learning_rate": 9.975471891519865e-06, | |
| "loss": 0.1065, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 3.1490159325210874, | |
| "grad_norm": 1.2899665832519531, | |
| "learning_rate": 9.975398673285596e-06, | |
| "loss": 0.1104, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 3.1583880037488283, | |
| "grad_norm": 1.1923859119415283, | |
| "learning_rate": 9.975325455051327e-06, | |
| "loss": 0.1143, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 3.1677600749765698, | |
| "grad_norm": 1.428306221961975, | |
| "learning_rate": 9.975252236817057e-06, | |
| "loss": 0.101, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 3.177132146204311, | |
| "grad_norm": 1.323941946029663, | |
| "learning_rate": 9.97517901858279e-06, | |
| "loss": 0.1115, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 3.1865042174320526, | |
| "grad_norm": 1.4079722166061401, | |
| "learning_rate": 9.97510580034852e-06, | |
| "loss": 0.1032, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 3.195876288659794, | |
| "grad_norm": 1.2919671535491943, | |
| "learning_rate": 9.97503258211425e-06, | |
| "loss": 0.1145, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 3.205248359887535, | |
| "grad_norm": 1.1800559759140015, | |
| "learning_rate": 9.974959363879982e-06, | |
| "loss": 0.106, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 3.2146204311152764, | |
| "grad_norm": 1.5425052642822266, | |
| "learning_rate": 9.974886145645713e-06, | |
| "loss": 0.1156, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 3.223992502343018, | |
| "grad_norm": 1.7271355390548706, | |
| "learning_rate": 9.974812927411443e-06, | |
| "loss": 0.1121, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 3.2333645735707592, | |
| "grad_norm": 1.3295711278915405, | |
| "learning_rate": 9.974739709177174e-06, | |
| "loss": 0.1072, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 3.2427366447985007, | |
| "grad_norm": 1.658498764038086, | |
| "learning_rate": 9.974666490942905e-06, | |
| "loss": 0.1131, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 3.2521087160262416, | |
| "grad_norm": 1.6077649593353271, | |
| "learning_rate": 9.974593272708636e-06, | |
| "loss": 0.1143, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 3.261480787253983, | |
| "grad_norm": 1.4552775621414185, | |
| "learning_rate": 9.974520054474366e-06, | |
| "loss": 0.1065, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 3.2708528584817245, | |
| "grad_norm": 1.586267113685608, | |
| "learning_rate": 9.974446836240099e-06, | |
| "loss": 0.1137, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 3.280224929709466, | |
| "grad_norm": 0.9890511631965637, | |
| "learning_rate": 9.97437361800583e-06, | |
| "loss": 0.103, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.280224929709466, | |
| "eval_loss": 0.04644956439733505, | |
| "eval_pearson_cosine": 0.760254442691803, | |
| "eval_pearson_dot": 0.6812557578086853, | |
| "eval_pearson_euclidean": 0.7475454807281494, | |
| "eval_pearson_manhattan": 0.7483712434768677, | |
| "eval_runtime": 22.2407, | |
| "eval_samples_per_second": 67.444, | |
| "eval_spearman_cosine": 0.7642516190492565, | |
| "eval_spearman_dot": 0.6795590047108491, | |
| "eval_spearman_euclidean": 0.7527436591109528, | |
| "eval_spearman_manhattan": 0.7534967017417152, | |
| "eval_steps_per_second": 8.453, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.2895970009372073, | |
| "grad_norm": 1.4361557960510254, | |
| "learning_rate": 9.97430039977156e-06, | |
| "loss": 0.1078, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 3.2989690721649483, | |
| "grad_norm": 1.307634949684143, | |
| "learning_rate": 9.974227181537291e-06, | |
| "loss": 0.105, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 3.3083411433926897, | |
| "grad_norm": 1.103812336921692, | |
| "learning_rate": 9.974153963303022e-06, | |
| "loss": 0.1021, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 3.317713214620431, | |
| "grad_norm": 1.485766887664795, | |
| "learning_rate": 9.974080745068753e-06, | |
| "loss": 0.1055, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 3.3270852858481725, | |
| "grad_norm": 1.4017934799194336, | |
| "learning_rate": 9.974007526834483e-06, | |
| "loss": 0.0991, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 3.336457357075914, | |
| "grad_norm": 1.1994048357009888, | |
| "learning_rate": 9.973934308600214e-06, | |
| "loss": 0.1176, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 3.345829428303655, | |
| "grad_norm": 1.0661845207214355, | |
| "learning_rate": 9.973861090365947e-06, | |
| "loss": 0.1036, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 3.3552014995313963, | |
| "grad_norm": 1.273992896080017, | |
| "learning_rate": 9.973787872131676e-06, | |
| "loss": 0.1069, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 3.3645735707591378, | |
| "grad_norm": 1.157599687576294, | |
| "learning_rate": 9.973714653897406e-06, | |
| "loss": 0.1154, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 3.373945641986879, | |
| "grad_norm": 1.567265272140503, | |
| "learning_rate": 9.973641435663139e-06, | |
| "loss": 0.1104, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 3.3833177132146206, | |
| "grad_norm": 1.509450078010559, | |
| "learning_rate": 9.97356821742887e-06, | |
| "loss": 0.1123, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 3.3926897844423616, | |
| "grad_norm": 1.6206624507904053, | |
| "learning_rate": 9.9734949991946e-06, | |
| "loss": 0.0915, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 3.402061855670103, | |
| "grad_norm": 1.3384416103363037, | |
| "learning_rate": 9.973421780960331e-06, | |
| "loss": 0.1286, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 3.4114339268978444, | |
| "grad_norm": 1.4834225177764893, | |
| "learning_rate": 9.973348562726062e-06, | |
| "loss": 0.1129, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 3.420805998125586, | |
| "grad_norm": 1.486007809638977, | |
| "learning_rate": 9.973275344491793e-06, | |
| "loss": 0.1037, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 3.4301780693533273, | |
| "grad_norm": 1.5038363933563232, | |
| "learning_rate": 9.973202126257523e-06, | |
| "loss": 0.104, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 3.4395501405810682, | |
| "grad_norm": 1.3018808364868164, | |
| "learning_rate": 9.973128908023256e-06, | |
| "loss": 0.1068, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 3.4489222118088096, | |
| "grad_norm": 1.733067512512207, | |
| "learning_rate": 9.973055689788987e-06, | |
| "loss": 0.1011, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 3.458294283036551, | |
| "grad_norm": 1.3246439695358276, | |
| "learning_rate": 9.972982471554716e-06, | |
| "loss": 0.0989, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 3.4676663542642925, | |
| "grad_norm": 1.7354522943496704, | |
| "learning_rate": 9.972909253320448e-06, | |
| "loss": 0.1174, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 3.477038425492034, | |
| "grad_norm": 1.5907713174819946, | |
| "learning_rate": 9.972836035086179e-06, | |
| "loss": 0.1067, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 3.486410496719775, | |
| "grad_norm": 1.4252599477767944, | |
| "learning_rate": 9.97276281685191e-06, | |
| "loss": 0.1064, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 3.4957825679475163, | |
| "grad_norm": 1.3505686521530151, | |
| "learning_rate": 9.97268959861764e-06, | |
| "loss": 0.1168, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 3.5051546391752577, | |
| "grad_norm": 1.3022727966308594, | |
| "learning_rate": 9.972616380383373e-06, | |
| "loss": 0.1111, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 3.514526710402999, | |
| "grad_norm": 1.080246090888977, | |
| "learning_rate": 9.972543162149102e-06, | |
| "loss": 0.0982, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 3.514526710402999, | |
| "eval_loss": 0.04514094442129135, | |
| "eval_pearson_cosine": 0.7656620144844055, | |
| "eval_pearson_dot": 0.6821019649505615, | |
| "eval_pearson_euclidean": 0.7441372871398926, | |
| "eval_pearson_manhattan": 0.7452259659767151, | |
| "eval_runtime": 22.4556, | |
| "eval_samples_per_second": 66.798, | |
| "eval_spearman_cosine": 0.7694518035767811, | |
| "eval_spearman_dot": 0.6821838150409313, | |
| "eval_spearman_euclidean": 0.7516165395512334, | |
| "eval_spearman_manhattan": 0.7527176854515762, | |
| "eval_steps_per_second": 8.372, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 3.5238987816307406, | |
| "grad_norm": 1.3396129608154297, | |
| "learning_rate": 9.972469943914833e-06, | |
| "loss": 0.1145, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 3.5332708528584815, | |
| "grad_norm": 1.5277647972106934, | |
| "learning_rate": 9.972396725680565e-06, | |
| "loss": 0.1101, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 3.542642924086223, | |
| "grad_norm": 1.8469972610473633, | |
| "learning_rate": 9.972323507446296e-06, | |
| "loss": 0.1129, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 3.5520149953139644, | |
| "grad_norm": 1.2464599609375, | |
| "learning_rate": 9.972250289212027e-06, | |
| "loss": 0.1103, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 3.561387066541706, | |
| "grad_norm": 1.7863965034484863, | |
| "learning_rate": 9.972177070977757e-06, | |
| "loss": 0.1084, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 3.570759137769447, | |
| "grad_norm": 1.3085591793060303, | |
| "learning_rate": 9.972103852743488e-06, | |
| "loss": 0.11, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 3.580131208997188, | |
| "grad_norm": 1.5875599384307861, | |
| "learning_rate": 9.972030634509219e-06, | |
| "loss": 0.1213, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 3.5895032802249296, | |
| "grad_norm": 1.2654856443405151, | |
| "learning_rate": 9.97195741627495e-06, | |
| "loss": 0.1045, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 3.598875351452671, | |
| "grad_norm": 1.4713581800460815, | |
| "learning_rate": 9.97188419804068e-06, | |
| "loss": 0.1123, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 3.6082474226804124, | |
| "grad_norm": 1.3559589385986328, | |
| "learning_rate": 9.971810979806413e-06, | |
| "loss": 0.1171, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 3.617619493908154, | |
| "grad_norm": 1.7482990026474, | |
| "learning_rate": 9.971737761572142e-06, | |
| "loss": 0.1141, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 3.626991565135895, | |
| "grad_norm": 1.7189960479736328, | |
| "learning_rate": 9.971664543337873e-06, | |
| "loss": 0.107, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 3.6363636363636362, | |
| "grad_norm": 1.8246538639068604, | |
| "learning_rate": 9.971591325103605e-06, | |
| "loss": 0.1161, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 3.6457357075913777, | |
| "grad_norm": 1.0778300762176514, | |
| "learning_rate": 9.971518106869336e-06, | |
| "loss": 0.1084, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 3.655107778819119, | |
| "grad_norm": 1.5588942766189575, | |
| "learning_rate": 9.971444888635066e-06, | |
| "loss": 0.1038, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 3.6644798500468605, | |
| "grad_norm": 1.3670451641082764, | |
| "learning_rate": 9.971371670400797e-06, | |
| "loss": 0.1069, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 3.6738519212746015, | |
| "grad_norm": 1.437696099281311, | |
| "learning_rate": 9.971298452166528e-06, | |
| "loss": 0.1129, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 3.683223992502343, | |
| "grad_norm": 1.39695143699646, | |
| "learning_rate": 9.971225233932259e-06, | |
| "loss": 0.1113, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 3.6925960637300843, | |
| "grad_norm": 1.3372693061828613, | |
| "learning_rate": 9.97115201569799e-06, | |
| "loss": 0.1042, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 3.7019681349578257, | |
| "grad_norm": 1.4336313009262085, | |
| "learning_rate": 9.971078797463722e-06, | |
| "loss": 0.1224, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 3.711340206185567, | |
| "grad_norm": 1.3641144037246704, | |
| "learning_rate": 9.971005579229453e-06, | |
| "loss": 0.1082, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 3.720712277413308, | |
| "grad_norm": 1.1231974363327026, | |
| "learning_rate": 9.970932360995183e-06, | |
| "loss": 0.1108, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 3.7300843486410495, | |
| "grad_norm": 1.0743800401687622, | |
| "learning_rate": 9.970859142760914e-06, | |
| "loss": 0.1148, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 3.739456419868791, | |
| "grad_norm": 1.5260711908340454, | |
| "learning_rate": 9.970785924526645e-06, | |
| "loss": 0.1248, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 3.7488284910965324, | |
| "grad_norm": 1.1183910369873047, | |
| "learning_rate": 9.970712706292376e-06, | |
| "loss": 0.0987, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.7488284910965324, | |
| "eval_loss": 0.046661876142024994, | |
| "eval_pearson_cosine": 0.7576525807380676, | |
| "eval_pearson_dot": 0.6644298434257507, | |
| "eval_pearson_euclidean": 0.7384845614433289, | |
| "eval_pearson_manhattan": 0.7396556735038757, | |
| "eval_runtime": 23.8808, | |
| "eval_samples_per_second": 62.812, | |
| "eval_spearman_cosine": 0.7607075839895016, | |
| "eval_spearman_dot": 0.6622737418861694, | |
| "eval_spearman_euclidean": 0.7433752629911805, | |
| "eval_spearman_manhattan": 0.7446298314535014, | |
| "eval_steps_per_second": 7.872, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.758200562324274, | |
| "grad_norm": 1.3608311414718628, | |
| "learning_rate": 9.970639488058106e-06, | |
| "loss": 0.1179, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 3.7675726335520148, | |
| "grad_norm": 1.6313430070877075, | |
| "learning_rate": 9.970566269823839e-06, | |
| "loss": 0.1186, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 3.776944704779756, | |
| "grad_norm": 1.4092051982879639, | |
| "learning_rate": 9.970493051589568e-06, | |
| "loss": 0.1048, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 3.7863167760074976, | |
| "grad_norm": 1.4106525182724, | |
| "learning_rate": 9.970419833355299e-06, | |
| "loss": 0.1233, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 3.795688847235239, | |
| "grad_norm": 1.498146891593933, | |
| "learning_rate": 9.970346615121031e-06, | |
| "loss": 0.1164, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 3.8050609184629804, | |
| "grad_norm": 1.68582284450531, | |
| "learning_rate": 9.970273396886762e-06, | |
| "loss": 0.1194, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 3.8144329896907214, | |
| "grad_norm": 1.329270362854004, | |
| "learning_rate": 9.970200178652493e-06, | |
| "loss": 0.1001, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 3.823805060918463, | |
| "grad_norm": 1.6010513305664062, | |
| "learning_rate": 9.970126960418223e-06, | |
| "loss": 0.107, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 3.8331771321462043, | |
| "grad_norm": 1.213576078414917, | |
| "learning_rate": 9.970053742183954e-06, | |
| "loss": 0.1108, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 3.8425492033739457, | |
| "grad_norm": 1.585524320602417, | |
| "learning_rate": 9.969980523949685e-06, | |
| "loss": 0.1079, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 3.851921274601687, | |
| "grad_norm": 1.6043713092803955, | |
| "learning_rate": 9.969907305715416e-06, | |
| "loss": 0.1141, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 3.861293345829428, | |
| "grad_norm": 1.3566473722457886, | |
| "learning_rate": 9.969834087481146e-06, | |
| "loss": 0.1148, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 3.8706654170571695, | |
| "grad_norm": 1.390787124633789, | |
| "learning_rate": 9.969760869246879e-06, | |
| "loss": 0.1024, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 3.880037488284911, | |
| "grad_norm": 1.689005970954895, | |
| "learning_rate": 9.96968765101261e-06, | |
| "loss": 0.111, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 3.8894095595126523, | |
| "grad_norm": 1.850071907043457, | |
| "learning_rate": 9.96961443277834e-06, | |
| "loss": 0.1097, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 3.8987816307403937, | |
| "grad_norm": 1.4834603071212769, | |
| "learning_rate": 9.969541214544071e-06, | |
| "loss": 0.1084, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 3.9081537019681347, | |
| "grad_norm": 1.3408997058868408, | |
| "learning_rate": 9.969467996309802e-06, | |
| "loss": 0.1194, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 3.917525773195876, | |
| "grad_norm": 1.3920304775238037, | |
| "learning_rate": 9.969394778075533e-06, | |
| "loss": 0.1091, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 3.9268978444236176, | |
| "grad_norm": 1.0026508569717407, | |
| "learning_rate": 9.969321559841263e-06, | |
| "loss": 0.119, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 3.936269915651359, | |
| "grad_norm": 1.7984665632247925, | |
| "learning_rate": 9.969248341606996e-06, | |
| "loss": 0.1065, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 3.9456419868791004, | |
| "grad_norm": 1.6500909328460693, | |
| "learning_rate": 9.969175123372725e-06, | |
| "loss": 0.1083, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 3.9550140581068414, | |
| "grad_norm": 1.7580713033676147, | |
| "learning_rate": 9.969101905138456e-06, | |
| "loss": 0.1237, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 3.964386129334583, | |
| "grad_norm": 1.8374171257019043, | |
| "learning_rate": 9.969028686904188e-06, | |
| "loss": 0.1003, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 3.973758200562324, | |
| "grad_norm": 1.5857341289520264, | |
| "learning_rate": 9.968955468669919e-06, | |
| "loss": 0.1012, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 3.9831302717900656, | |
| "grad_norm": 1.627947211265564, | |
| "learning_rate": 9.96888225043565e-06, | |
| "loss": 0.1111, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 3.9831302717900656, | |
| "eval_loss": 0.04063473269343376, | |
| "eval_pearson_cosine": 0.7690664529800415, | |
| "eval_pearson_dot": 0.6998196840286255, | |
| "eval_pearson_euclidean": 0.7456687092781067, | |
| "eval_pearson_manhattan": 0.7471497058868408, | |
| "eval_runtime": 23.0817, | |
| "eval_samples_per_second": 64.986, | |
| "eval_spearman_cosine": 0.7702784084250337, | |
| "eval_spearman_dot": 0.7005907360024843, | |
| "eval_spearman_euclidean": 0.7509877657044322, | |
| "eval_spearman_manhattan": 0.7524785559548752, | |
| "eval_steps_per_second": 8.145, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 3.992502343017807, | |
| "grad_norm": 1.3161486387252808, | |
| "learning_rate": 9.96880903220138e-06, | |
| "loss": 0.1114, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 4.001874414245548, | |
| "grad_norm": 0.9556475281715393, | |
| "learning_rate": 9.968735813967111e-06, | |
| "loss": 0.1141, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 4.01124648547329, | |
| "grad_norm": 1.0041595697402954, | |
| "learning_rate": 9.968662595732842e-06, | |
| "loss": 0.0807, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 4.020618556701031, | |
| "grad_norm": 1.1500684022903442, | |
| "learning_rate": 9.968589377498573e-06, | |
| "loss": 0.0701, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 4.029990627928772, | |
| "grad_norm": 1.3963230848312378, | |
| "learning_rate": 9.968516159264305e-06, | |
| "loss": 0.0863, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 4.039362699156514, | |
| "grad_norm": 1.4251878261566162, | |
| "learning_rate": 9.968442941030036e-06, | |
| "loss": 0.0746, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 4.048734770384255, | |
| "grad_norm": 1.0674968957901, | |
| "learning_rate": 9.968369722795765e-06, | |
| "loss": 0.0667, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 4.0581068416119965, | |
| "grad_norm": 1.2465558052062988, | |
| "learning_rate": 9.968296504561497e-06, | |
| "loss": 0.0773, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 4.0674789128397375, | |
| "grad_norm": 1.409511923789978, | |
| "learning_rate": 9.968223286327228e-06, | |
| "loss": 0.0775, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 4.0768509840674785, | |
| "grad_norm": 1.2048633098602295, | |
| "learning_rate": 9.968150068092959e-06, | |
| "loss": 0.0885, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 4.08622305529522, | |
| "grad_norm": 1.3504215478897095, | |
| "learning_rate": 9.96807684985869e-06, | |
| "loss": 0.0802, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 4.095595126522961, | |
| "grad_norm": 1.5094915628433228, | |
| "learning_rate": 9.96800363162442e-06, | |
| "loss": 0.0889, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 4.104967197750703, | |
| "grad_norm": 1.2075692415237427, | |
| "learning_rate": 9.967930413390151e-06, | |
| "loss": 0.0718, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 4.114339268978444, | |
| "grad_norm": 1.476462960243225, | |
| "learning_rate": 9.967857195155882e-06, | |
| "loss": 0.0809, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 4.123711340206185, | |
| "grad_norm": 1.4811893701553345, | |
| "learning_rate": 9.967783976921614e-06, | |
| "loss": 0.082, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 4.133083411433927, | |
| "grad_norm": 1.3016406297683716, | |
| "learning_rate": 9.967710758687345e-06, | |
| "loss": 0.0867, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 4.142455482661668, | |
| "grad_norm": 1.3254297971725464, | |
| "learning_rate": 9.967637540453076e-06, | |
| "loss": 0.0783, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 4.15182755388941, | |
| "grad_norm": 1.7814503908157349, | |
| "learning_rate": 9.967564322218806e-06, | |
| "loss": 0.0812, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 4.161199625117151, | |
| "grad_norm": 1.3375070095062256, | |
| "learning_rate": 9.967491103984537e-06, | |
| "loss": 0.0835, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 4.170571696344892, | |
| "grad_norm": 1.3573247194290161, | |
| "learning_rate": 9.967417885750268e-06, | |
| "loss": 0.0772, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 4.179943767572634, | |
| "grad_norm": 1.601321816444397, | |
| "learning_rate": 9.967344667515999e-06, | |
| "loss": 0.0785, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 4.189315838800375, | |
| "grad_norm": 1.0777158737182617, | |
| "learning_rate": 9.96727144928173e-06, | |
| "loss": 0.0789, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 4.1986879100281165, | |
| "grad_norm": 1.717281699180603, | |
| "learning_rate": 9.967198231047462e-06, | |
| "loss": 0.0876, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 4.2080599812558575, | |
| "grad_norm": 1.6537655591964722, | |
| "learning_rate": 9.967125012813191e-06, | |
| "loss": 0.0859, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 4.217432052483598, | |
| "grad_norm": 1.3347113132476807, | |
| "learning_rate": 9.967051794578922e-06, | |
| "loss": 0.0888, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 4.217432052483598, | |
| "eval_loss": 0.042121224105358124, | |
| "eval_pearson_cosine": 0.7580196857452393, | |
| "eval_pearson_dot": 0.6874213814735413, | |
| "eval_pearson_euclidean": 0.740117073059082, | |
| "eval_pearson_manhattan": 0.7411655187606812, | |
| "eval_runtime": 22.046, | |
| "eval_samples_per_second": 68.04, | |
| "eval_spearman_cosine": 0.7598083870591178, | |
| "eval_spearman_dot": 0.6866180590359211, | |
| "eval_spearman_euclidean": 0.7457408658977246, | |
| "eval_spearman_manhattan": 0.7467901472090236, | |
| "eval_steps_per_second": 8.528, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 4.22680412371134, | |
| "grad_norm": 1.283334732055664, | |
| "learning_rate": 9.966978576344654e-06, | |
| "loss": 0.0824, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 4.236176194939081, | |
| "grad_norm": 1.4807559251785278, | |
| "learning_rate": 9.966905358110385e-06, | |
| "loss": 0.0812, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 4.245548266166823, | |
| "grad_norm": 1.1873483657836914, | |
| "learning_rate": 9.966832139876116e-06, | |
| "loss": 0.0788, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 4.254920337394564, | |
| "grad_norm": 1.27379310131073, | |
| "learning_rate": 9.966758921641846e-06, | |
| "loss": 0.0802, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 4.264292408622305, | |
| "grad_norm": 1.3721706867218018, | |
| "learning_rate": 9.966685703407577e-06, | |
| "loss": 0.0776, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 4.273664479850047, | |
| "grad_norm": 1.4129197597503662, | |
| "learning_rate": 9.966612485173308e-06, | |
| "loss": 0.0924, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 4.283036551077788, | |
| "grad_norm": 1.453730821609497, | |
| "learning_rate": 9.966539266939039e-06, | |
| "loss": 0.0823, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 4.29240862230553, | |
| "grad_norm": 1.4608802795410156, | |
| "learning_rate": 9.966466048704771e-06, | |
| "loss": 0.0806, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 4.301780693533271, | |
| "grad_norm": 1.0814175605773926, | |
| "learning_rate": 9.966392830470502e-06, | |
| "loss": 0.0781, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 4.311152764761012, | |
| "grad_norm": 1.9891834259033203, | |
| "learning_rate": 9.966319612236233e-06, | |
| "loss": 0.0792, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 4.320524835988754, | |
| "grad_norm": 0.7774847745895386, | |
| "learning_rate": 9.966246394001963e-06, | |
| "loss": 0.0734, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 4.329896907216495, | |
| "grad_norm": 2.0921082496643066, | |
| "learning_rate": 9.966173175767694e-06, | |
| "loss": 0.0789, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 4.339268978444236, | |
| "grad_norm": 1.4378306865692139, | |
| "learning_rate": 9.966099957533425e-06, | |
| "loss": 0.0829, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 4.348641049671977, | |
| "grad_norm": 1.5577812194824219, | |
| "learning_rate": 9.966026739299156e-06, | |
| "loss": 0.0782, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 4.358013120899718, | |
| "grad_norm": 1.8791301250457764, | |
| "learning_rate": 9.965953521064888e-06, | |
| "loss": 0.088, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 4.36738519212746, | |
| "grad_norm": 0.8537359833717346, | |
| "learning_rate": 9.965880302830617e-06, | |
| "loss": 0.0766, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 4.376757263355201, | |
| "grad_norm": 1.258042573928833, | |
| "learning_rate": 9.965807084596348e-06, | |
| "loss": 0.0877, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 4.386129334582943, | |
| "grad_norm": 1.5519142150878906, | |
| "learning_rate": 9.96573386636208e-06, | |
| "loss": 0.0881, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 4.395501405810684, | |
| "grad_norm": 1.1437076330184937, | |
| "learning_rate": 9.965660648127811e-06, | |
| "loss": 0.0816, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 4.404873477038425, | |
| "grad_norm": 1.3333864212036133, | |
| "learning_rate": 9.965587429893542e-06, | |
| "loss": 0.0818, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 4.414245548266167, | |
| "grad_norm": 1.403075098991394, | |
| "learning_rate": 9.965514211659273e-06, | |
| "loss": 0.0771, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 4.423617619493908, | |
| "grad_norm": 1.3652963638305664, | |
| "learning_rate": 9.965440993425003e-06, | |
| "loss": 0.0692, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 4.43298969072165, | |
| "grad_norm": 1.4429869651794434, | |
| "learning_rate": 9.965367775190734e-06, | |
| "loss": 0.0846, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 4.442361761949391, | |
| "grad_norm": 1.291710376739502, | |
| "learning_rate": 9.965294556956465e-06, | |
| "loss": 0.0796, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 4.451733833177133, | |
| "grad_norm": 1.4110385179519653, | |
| "learning_rate": 9.965221338722196e-06, | |
| "loss": 0.0756, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 4.451733833177133, | |
| "eval_loss": 0.039456192404031754, | |
| "eval_pearson_cosine": 0.7664028406143188, | |
| "eval_pearson_dot": 0.7008457779884338, | |
| "eval_pearson_euclidean": 0.7418538928031921, | |
| "eval_pearson_manhattan": 0.7431594133377075, | |
| "eval_runtime": 23.3602, | |
| "eval_samples_per_second": 64.212, | |
| "eval_spearman_cosine": 0.7673929323503452, | |
| "eval_spearman_dot": 0.7011750025269451, | |
| "eval_spearman_euclidean": 0.7464768579915497, | |
| "eval_spearman_manhattan": 0.7479944496608657, | |
| "eval_steps_per_second": 8.048, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 4.4611059044048735, | |
| "grad_norm": 1.1584782600402832, | |
| "learning_rate": 9.965148120487928e-06, | |
| "loss": 0.0834, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 4.4704779756326145, | |
| "grad_norm": 1.2065712213516235, | |
| "learning_rate": 9.965074902253659e-06, | |
| "loss": 0.0865, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 4.479850046860356, | |
| "grad_norm": 1.3458271026611328, | |
| "learning_rate": 9.965001684019388e-06, | |
| "loss": 0.0764, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 4.489222118088097, | |
| "grad_norm": 2.0091888904571533, | |
| "learning_rate": 9.96492846578512e-06, | |
| "loss": 0.0773, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 4.498594189315839, | |
| "grad_norm": 1.3832370042800903, | |
| "learning_rate": 9.964855247550851e-06, | |
| "loss": 0.0806, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 4.50796626054358, | |
| "grad_norm": 1.4656741619110107, | |
| "learning_rate": 9.964782029316582e-06, | |
| "loss": 0.0852, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 4.517338331771321, | |
| "grad_norm": 1.3915668725967407, | |
| "learning_rate": 9.964708811082312e-06, | |
| "loss": 0.086, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 4.526710402999063, | |
| "grad_norm": 1.2182085514068604, | |
| "learning_rate": 9.964635592848043e-06, | |
| "loss": 0.0777, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 4.536082474226804, | |
| "grad_norm": 1.2041029930114746, | |
| "learning_rate": 9.964562374613774e-06, | |
| "loss": 0.0738, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 4.545454545454545, | |
| "grad_norm": 1.289475917816162, | |
| "learning_rate": 9.964489156379505e-06, | |
| "loss": 0.0723, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 4.554826616682287, | |
| "grad_norm": 1.8206441402435303, | |
| "learning_rate": 9.964415938145237e-06, | |
| "loss": 0.0823, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 4.564198687910028, | |
| "grad_norm": 1.393254280090332, | |
| "learning_rate": 9.964342719910968e-06, | |
| "loss": 0.0869, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 4.57357075913777, | |
| "grad_norm": 1.6424909830093384, | |
| "learning_rate": 9.964269501676699e-06, | |
| "loss": 0.0721, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 4.582942830365511, | |
| "grad_norm": 1.6760517358779907, | |
| "learning_rate": 9.96419628344243e-06, | |
| "loss": 0.0849, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 4.592314901593252, | |
| "grad_norm": 1.4797537326812744, | |
| "learning_rate": 9.96412306520816e-06, | |
| "loss": 0.0815, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 4.6016869728209935, | |
| "grad_norm": 1.3184549808502197, | |
| "learning_rate": 9.964049846973891e-06, | |
| "loss": 0.0875, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 4.6110590440487345, | |
| "grad_norm": 1.0524438619613647, | |
| "learning_rate": 9.963976628739622e-06, | |
| "loss": 0.0821, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 4.620431115276476, | |
| "grad_norm": 0.8284000158309937, | |
| "learning_rate": 9.963903410505354e-06, | |
| "loss": 0.0737, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 4.629803186504217, | |
| "grad_norm": 1.2979810237884521, | |
| "learning_rate": 9.963830192271085e-06, | |
| "loss": 0.1031, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 4.639175257731958, | |
| "grad_norm": 1.2484486103057861, | |
| "learning_rate": 9.963756974036814e-06, | |
| "loss": 0.0853, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 4.6485473289597, | |
| "grad_norm": 1.4267854690551758, | |
| "learning_rate": 9.963683755802546e-06, | |
| "loss": 0.0784, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 4.657919400187441, | |
| "grad_norm": 1.2631357908248901, | |
| "learning_rate": 9.963610537568277e-06, | |
| "loss": 0.0814, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 4.667291471415183, | |
| "grad_norm": 1.5679900646209717, | |
| "learning_rate": 9.963537319334008e-06, | |
| "loss": 0.0851, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 4.676663542642924, | |
| "grad_norm": 1.216604471206665, | |
| "learning_rate": 9.963464101099739e-06, | |
| "loss": 0.0747, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 4.686035613870665, | |
| "grad_norm": 1.3772624731063843, | |
| "learning_rate": 9.96339088286547e-06, | |
| "loss": 0.0871, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 4.686035613870665, | |
| "eval_loss": 0.041086822748184204, | |
| "eval_pearson_cosine": 0.7587878704071045, | |
| "eval_pearson_dot": 0.6872098445892334, | |
| "eval_pearson_euclidean": 0.7388917207717896, | |
| "eval_pearson_manhattan": 0.7404583692550659, | |
| "eval_runtime": 22.5042, | |
| "eval_samples_per_second": 66.654, | |
| "eval_spearman_cosine": 0.7603871650644157, | |
| "eval_spearman_dot": 0.6866960900397536, | |
| "eval_spearman_euclidean": 0.7440960862957542, | |
| "eval_spearman_manhattan": 0.745568766414613, | |
| "eval_steps_per_second": 8.354, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 4.695407685098407, | |
| "grad_norm": 1.6077407598495483, | |
| "learning_rate": 9.9633176646312e-06, | |
| "loss": 0.0993, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 4.704779756326148, | |
| "grad_norm": 1.206281065940857, | |
| "learning_rate": 9.963244446396931e-06, | |
| "loss": 0.082, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 4.71415182755389, | |
| "grad_norm": 1.168562650680542, | |
| "learning_rate": 9.963171228162662e-06, | |
| "loss": 0.075, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 4.723523898781631, | |
| "grad_norm": 1.0943313837051392, | |
| "learning_rate": 9.963098009928394e-06, | |
| "loss": 0.0907, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 4.7328959700093725, | |
| "grad_norm": 1.1832613945007324, | |
| "learning_rate": 9.963024791694125e-06, | |
| "loss": 0.0776, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 4.742268041237113, | |
| "grad_norm": 1.1568524837493896, | |
| "learning_rate": 9.962951573459856e-06, | |
| "loss": 0.0956, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 4.751640112464854, | |
| "grad_norm": 1.4179660081863403, | |
| "learning_rate": 9.962878355225586e-06, | |
| "loss": 0.079, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 4.761012183692596, | |
| "grad_norm": 1.56465744972229, | |
| "learning_rate": 9.962805136991317e-06, | |
| "loss": 0.0708, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 4.770384254920337, | |
| "grad_norm": 1.47963547706604, | |
| "learning_rate": 9.962731918757048e-06, | |
| "loss": 0.0817, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 4.779756326148079, | |
| "grad_norm": 1.4979149103164673, | |
| "learning_rate": 9.962658700522779e-06, | |
| "loss": 0.0859, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 4.78912839737582, | |
| "grad_norm": 1.0254287719726562, | |
| "learning_rate": 9.962585482288511e-06, | |
| "loss": 0.077, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 4.798500468603561, | |
| "grad_norm": 1.5644149780273438, | |
| "learning_rate": 9.96251226405424e-06, | |
| "loss": 0.0775, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 4.807872539831303, | |
| "grad_norm": 1.2777773141860962, | |
| "learning_rate": 9.962439045819971e-06, | |
| "loss": 0.0734, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 4.817244611059044, | |
| "grad_norm": 1.130614995956421, | |
| "learning_rate": 9.962365827585703e-06, | |
| "loss": 0.082, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 4.826616682286786, | |
| "grad_norm": 0.9016211032867432, | |
| "learning_rate": 9.962292609351434e-06, | |
| "loss": 0.08, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 4.835988753514527, | |
| "grad_norm": 1.4159069061279297, | |
| "learning_rate": 9.962219391117165e-06, | |
| "loss": 0.0841, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 4.845360824742268, | |
| "grad_norm": 1.600085973739624, | |
| "learning_rate": 9.962146172882896e-06, | |
| "loss": 0.0766, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 4.85473289597001, | |
| "grad_norm": 1.4401110410690308, | |
| "learning_rate": 9.962072954648626e-06, | |
| "loss": 0.0869, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 4.8641049671977505, | |
| "grad_norm": 1.4603939056396484, | |
| "learning_rate": 9.961999736414357e-06, | |
| "loss": 0.077, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 4.873477038425492, | |
| "grad_norm": 1.0498592853546143, | |
| "learning_rate": 9.961926518180088e-06, | |
| "loss": 0.0673, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 4.882849109653233, | |
| "grad_norm": 1.9157027006149292, | |
| "learning_rate": 9.96185329994582e-06, | |
| "loss": 0.0865, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 4.892221180880974, | |
| "grad_norm": 1.0183812379837036, | |
| "learning_rate": 9.961780081711551e-06, | |
| "loss": 0.0809, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 4.901593252108716, | |
| "grad_norm": 1.4563605785369873, | |
| "learning_rate": 9.96170686347728e-06, | |
| "loss": 0.086, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 4.910965323336457, | |
| "grad_norm": 1.1856083869934082, | |
| "learning_rate": 9.961633645243013e-06, | |
| "loss": 0.0802, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 4.920337394564199, | |
| "grad_norm": 1.3724653720855713, | |
| "learning_rate": 9.961560427008743e-06, | |
| "loss": 0.0839, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 4.920337394564199, | |
| "eval_loss": 0.04000931978225708, | |
| "eval_pearson_cosine": 0.7643105387687683, | |
| "eval_pearson_dot": 0.6954823732376099, | |
| "eval_pearson_euclidean": 0.7297146320343018, | |
| "eval_pearson_manhattan": 0.7310500144958496, | |
| "eval_runtime": 21.985, | |
| "eval_samples_per_second": 68.228, | |
| "eval_spearman_cosine": 0.7658903505068073, | |
| "eval_spearman_dot": 0.6968591888025883, | |
| "eval_spearman_euclidean": 0.7350736410651904, | |
| "eval_spearman_manhattan": 0.7366836781540181, | |
| "eval_steps_per_second": 8.551, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 4.92970946579194, | |
| "grad_norm": 1.7151585817337036, | |
| "learning_rate": 9.961487208774474e-06, | |
| "loss": 0.0791, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 4.939081537019681, | |
| "grad_norm": 1.6940653324127197, | |
| "learning_rate": 9.961413990540205e-06, | |
| "loss": 0.0893, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 4.948453608247423, | |
| "grad_norm": 1.5087528228759766, | |
| "learning_rate": 9.961340772305936e-06, | |
| "loss": 0.0801, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 4.957825679475164, | |
| "grad_norm": 1.2038474082946777, | |
| "learning_rate": 9.961267554071666e-06, | |
| "loss": 0.0791, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 4.967197750702906, | |
| "grad_norm": 1.4044734239578247, | |
| "learning_rate": 9.961194335837397e-06, | |
| "loss": 0.0832, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 4.976569821930647, | |
| "grad_norm": 1.057298183441162, | |
| "learning_rate": 9.96112111760313e-06, | |
| "loss": 0.0869, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 4.985941893158388, | |
| "grad_norm": 1.4192899465560913, | |
| "learning_rate": 9.96104789936886e-06, | |
| "loss": 0.0837, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 4.9953139643861295, | |
| "grad_norm": 1.7742289304733276, | |
| "learning_rate": 9.960974681134591e-06, | |
| "loss": 0.0858, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 5.0046860356138705, | |
| "grad_norm": 0.9188485741615295, | |
| "learning_rate": 9.960901462900322e-06, | |
| "loss": 0.0684, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 5.014058106841612, | |
| "grad_norm": 1.6541597843170166, | |
| "learning_rate": 9.960828244666052e-06, | |
| "loss": 0.0669, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 5.023430178069353, | |
| "grad_norm": 1.5705071687698364, | |
| "learning_rate": 9.960755026431783e-06, | |
| "loss": 0.0646, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 5.032802249297094, | |
| "grad_norm": 0.9007801413536072, | |
| "learning_rate": 9.960681808197514e-06, | |
| "loss": 0.0721, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 5.042174320524836, | |
| "grad_norm": 1.044138789176941, | |
| "learning_rate": 9.960608589963245e-06, | |
| "loss": 0.0585, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 5.051546391752577, | |
| "grad_norm": 1.455098032951355, | |
| "learning_rate": 9.960535371728977e-06, | |
| "loss": 0.0677, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 5.060918462980319, | |
| "grad_norm": 1.3480255603790283, | |
| "learning_rate": 9.960462153494708e-06, | |
| "loss": 0.0582, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 5.07029053420806, | |
| "grad_norm": 0.9733775854110718, | |
| "learning_rate": 9.960388935260437e-06, | |
| "loss": 0.057, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 5.079662605435801, | |
| "grad_norm": 1.202635645866394, | |
| "learning_rate": 9.96031571702617e-06, | |
| "loss": 0.0642, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 5.089034676663543, | |
| "grad_norm": 1.2410409450531006, | |
| "learning_rate": 9.9602424987919e-06, | |
| "loss": 0.055, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 5.098406747891284, | |
| "grad_norm": 1.341126799583435, | |
| "learning_rate": 9.960169280557631e-06, | |
| "loss": 0.066, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 5.107778819119026, | |
| "grad_norm": 1.070065975189209, | |
| "learning_rate": 9.960096062323362e-06, | |
| "loss": 0.0565, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 5.117150890346767, | |
| "grad_norm": 1.5855072736740112, | |
| "learning_rate": 9.960022844089092e-06, | |
| "loss": 0.0613, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 5.126522961574508, | |
| "grad_norm": 0.7614333629608154, | |
| "learning_rate": 9.959949625854823e-06, | |
| "loss": 0.0572, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 5.1358950328022495, | |
| "grad_norm": 1.0969761610031128, | |
| "learning_rate": 9.959876407620554e-06, | |
| "loss": 0.0557, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 5.14526710402999, | |
| "grad_norm": 1.7454636096954346, | |
| "learning_rate": 9.959803189386286e-06, | |
| "loss": 0.0647, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 5.154639175257732, | |
| "grad_norm": 0.9625281691551208, | |
| "learning_rate": 9.959729971152017e-06, | |
| "loss": 0.0499, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 5.154639175257732, | |
| "eval_loss": 0.03924967721104622, | |
| "eval_pearson_cosine": 0.7608553767204285, | |
| "eval_pearson_dot": 0.6993385553359985, | |
| "eval_pearson_euclidean": 0.732108473777771, | |
| "eval_pearson_manhattan": 0.7334935069084167, | |
| "eval_runtime": 28.2448, | |
| "eval_samples_per_second": 53.107, | |
| "eval_spearman_cosine": 0.7615678141531256, | |
| "eval_spearman_dot": 0.6999177956469285, | |
| "eval_spearman_euclidean": 0.7378738640113753, | |
| "eval_spearman_manhattan": 0.7392624046122273, | |
| "eval_steps_per_second": 6.656, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 5.164011246485473, | |
| "grad_norm": 1.4280071258544922, | |
| "learning_rate": 9.959656752917748e-06, | |
| "loss": 0.0557, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 5.173383317713214, | |
| "grad_norm": 1.6271259784698486, | |
| "learning_rate": 9.959583534683479e-06, | |
| "loss": 0.0602, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 5.182755388940956, | |
| "grad_norm": 1.2609021663665771, | |
| "learning_rate": 9.95951031644921e-06, | |
| "loss": 0.0545, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 5.192127460168697, | |
| "grad_norm": 1.2945165634155273, | |
| "learning_rate": 9.95943709821494e-06, | |
| "loss": 0.0592, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 5.201499531396439, | |
| "grad_norm": 1.3600184917449951, | |
| "learning_rate": 9.959363879980671e-06, | |
| "loss": 0.0492, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 5.21087160262418, | |
| "grad_norm": 1.3210471868515015, | |
| "learning_rate": 9.959290661746403e-06, | |
| "loss": 0.0558, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 5.220243673851921, | |
| "grad_norm": 0.8935280442237854, | |
| "learning_rate": 9.959217443512134e-06, | |
| "loss": 0.0566, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 5.229615745079663, | |
| "grad_norm": 0.9014615416526794, | |
| "learning_rate": 9.959144225277863e-06, | |
| "loss": 0.0578, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 5.238987816307404, | |
| "grad_norm": 0.9144461750984192, | |
| "learning_rate": 9.959071007043596e-06, | |
| "loss": 0.0642, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 5.248359887535146, | |
| "grad_norm": 1.1306620836257935, | |
| "learning_rate": 9.958997788809326e-06, | |
| "loss": 0.0645, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 5.257731958762887, | |
| "grad_norm": 1.6353179216384888, | |
| "learning_rate": 9.958924570575057e-06, | |
| "loss": 0.0563, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 5.2671040299906275, | |
| "grad_norm": 1.0438508987426758, | |
| "learning_rate": 9.958851352340788e-06, | |
| "loss": 0.0554, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 5.276476101218369, | |
| "grad_norm": 1.0287367105484009, | |
| "learning_rate": 9.958778134106519e-06, | |
| "loss": 0.0586, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 5.28584817244611, | |
| "grad_norm": 1.0613245964050293, | |
| "learning_rate": 9.95870491587225e-06, | |
| "loss": 0.0634, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 5.295220243673852, | |
| "grad_norm": 1.489405632019043, | |
| "learning_rate": 9.95863169763798e-06, | |
| "loss": 0.0474, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 5.304592314901593, | |
| "grad_norm": 1.4497292041778564, | |
| "learning_rate": 9.95855847940371e-06, | |
| "loss": 0.056, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 5.313964386129334, | |
| "grad_norm": 1.2881600856781006, | |
| "learning_rate": 9.958485261169443e-06, | |
| "loss": 0.0561, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 5.323336457357076, | |
| "grad_norm": 1.4863743782043457, | |
| "learning_rate": 9.958412042935174e-06, | |
| "loss": 0.0562, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 5.332708528584817, | |
| "grad_norm": 1.325191855430603, | |
| "learning_rate": 9.958338824700903e-06, | |
| "loss": 0.0569, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 5.342080599812559, | |
| "grad_norm": 1.0650861263275146, | |
| "learning_rate": 9.958265606466636e-06, | |
| "loss": 0.0574, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 5.3514526710403, | |
| "grad_norm": 1.7255184650421143, | |
| "learning_rate": 9.958192388232366e-06, | |
| "loss": 0.055, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 5.360824742268041, | |
| "grad_norm": 0.8258642554283142, | |
| "learning_rate": 9.958119169998097e-06, | |
| "loss": 0.0509, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 5.370196813495783, | |
| "grad_norm": 1.2811216115951538, | |
| "learning_rate": 9.958045951763828e-06, | |
| "loss": 0.0585, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 5.379568884723524, | |
| "grad_norm": 1.2582824230194092, | |
| "learning_rate": 9.95797273352956e-06, | |
| "loss": 0.0589, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 5.3889409559512655, | |
| "grad_norm": 1.3511929512023926, | |
| "learning_rate": 9.95789951529529e-06, | |
| "loss": 0.0542, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 5.3889409559512655, | |
| "eval_loss": 0.03850702941417694, | |
| "eval_pearson_cosine": 0.7663590312004089, | |
| "eval_pearson_dot": 0.7060524225234985, | |
| "eval_pearson_euclidean": 0.7385671734809875, | |
| "eval_pearson_manhattan": 0.7399072647094727, | |
| "eval_runtime": 27.6896, | |
| "eval_samples_per_second": 54.172, | |
| "eval_spearman_cosine": 0.7668814587849042, | |
| "eval_spearman_dot": 0.706466499232552, | |
| "eval_spearman_euclidean": 0.744533534662993, | |
| "eval_spearman_manhattan": 0.7454034343244123, | |
| "eval_steps_per_second": 6.79, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 5.3983130271790065, | |
| "grad_norm": 1.3905717134475708, | |
| "learning_rate": 9.95782629706102e-06, | |
| "loss": 0.0583, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 5.4076850984067475, | |
| "grad_norm": 1.5047788619995117, | |
| "learning_rate": 9.957753078826752e-06, | |
| "loss": 0.0605, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 5.417057169634489, | |
| "grad_norm": 1.280427098274231, | |
| "learning_rate": 9.957679860592483e-06, | |
| "loss": 0.0584, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 5.42642924086223, | |
| "grad_norm": 1.3530281782150269, | |
| "learning_rate": 9.957606642358214e-06, | |
| "loss": 0.0591, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 5.435801312089972, | |
| "grad_norm": 1.0610909461975098, | |
| "learning_rate": 9.957533424123945e-06, | |
| "loss": 0.0546, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 5.445173383317713, | |
| "grad_norm": 0.9637224674224854, | |
| "learning_rate": 9.957460205889675e-06, | |
| "loss": 0.0641, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 5.454545454545454, | |
| "grad_norm": 1.3324577808380127, | |
| "learning_rate": 9.957386987655406e-06, | |
| "loss": 0.0599, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 5.463917525773196, | |
| "grad_norm": 0.9660161137580872, | |
| "learning_rate": 9.957313769421137e-06, | |
| "loss": 0.0591, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 5.473289597000937, | |
| "grad_norm": 1.128570556640625, | |
| "learning_rate": 9.95724055118687e-06, | |
| "loss": 0.0579, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 5.482661668228679, | |
| "grad_norm": 1.444172739982605, | |
| "learning_rate": 9.9571673329526e-06, | |
| "loss": 0.0636, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 5.49203373945642, | |
| "grad_norm": 1.3510165214538574, | |
| "learning_rate": 9.95709411471833e-06, | |
| "loss": 0.0631, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 5.501405810684162, | |
| "grad_norm": 1.0439740419387817, | |
| "learning_rate": 9.957020896484062e-06, | |
| "loss": 0.0635, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 5.510777881911903, | |
| "grad_norm": 1.15412175655365, | |
| "learning_rate": 9.956947678249792e-06, | |
| "loss": 0.0595, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 5.520149953139644, | |
| "grad_norm": 1.221147894859314, | |
| "learning_rate": 9.956874460015523e-06, | |
| "loss": 0.0552, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 5.5295220243673855, | |
| "grad_norm": 1.4210234880447388, | |
| "learning_rate": 9.956801241781254e-06, | |
| "loss": 0.0593, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 5.5388940955951265, | |
| "grad_norm": 1.1082103252410889, | |
| "learning_rate": 9.956728023546985e-06, | |
| "loss": 0.0535, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 5.548266166822868, | |
| "grad_norm": 0.8931286334991455, | |
| "learning_rate": 9.956654805312715e-06, | |
| "loss": 0.0556, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 5.557638238050609, | |
| "grad_norm": 1.5182912349700928, | |
| "learning_rate": 9.956581587078446e-06, | |
| "loss": 0.0583, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 5.56701030927835, | |
| "grad_norm": 1.2056432962417603, | |
| "learning_rate": 9.956508368844177e-06, | |
| "loss": 0.064, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 5.576382380506092, | |
| "grad_norm": 1.5039522647857666, | |
| "learning_rate": 9.95643515060991e-06, | |
| "loss": 0.0708, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 5.585754451733833, | |
| "grad_norm": 1.2651883363723755, | |
| "learning_rate": 9.95636193237564e-06, | |
| "loss": 0.0596, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 5.595126522961575, | |
| "grad_norm": 1.317690134048462, | |
| "learning_rate": 9.956288714141371e-06, | |
| "loss": 0.0713, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 5.604498594189316, | |
| "grad_norm": 0.9705867767333984, | |
| "learning_rate": 9.956215495907102e-06, | |
| "loss": 0.0699, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 5.613870665417057, | |
| "grad_norm": 1.4250271320343018, | |
| "learning_rate": 9.956142277672832e-06, | |
| "loss": 0.0595, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 5.623242736644799, | |
| "grad_norm": 1.0857118368148804, | |
| "learning_rate": 9.956069059438563e-06, | |
| "loss": 0.0555, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 5.623242736644799, | |
| "eval_loss": 0.03963544964790344, | |
| "eval_pearson_cosine": 0.7571043968200684, | |
| "eval_pearson_dot": 0.700376570224762, | |
| "eval_pearson_euclidean": 0.7279260158538818, | |
| "eval_pearson_manhattan": 0.729307234287262, | |
| "eval_runtime": 25.5449, | |
| "eval_samples_per_second": 58.72, | |
| "eval_spearman_cosine": 0.7579022153365402, | |
| "eval_spearman_dot": 0.6992710065203335, | |
| "eval_spearman_euclidean": 0.7330627821557505, | |
| "eval_spearman_manhattan": 0.7343750357819732, | |
| "eval_steps_per_second": 7.36, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 5.63261480787254, | |
| "grad_norm": 1.2122074365615845, | |
| "learning_rate": 9.955995841204294e-06, | |
| "loss": 0.0665, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 5.641986879100282, | |
| "grad_norm": 1.7832310199737549, | |
| "learning_rate": 9.955922622970026e-06, | |
| "loss": 0.063, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 5.651358950328023, | |
| "grad_norm": 1.1854170560836792, | |
| "learning_rate": 9.955849404735755e-06, | |
| "loss": 0.0573, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 5.660731021555764, | |
| "grad_norm": 1.6633968353271484, | |
| "learning_rate": 9.955776186501486e-06, | |
| "loss": 0.0549, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 5.670103092783505, | |
| "grad_norm": 1.31834077835083, | |
| "learning_rate": 9.955702968267219e-06, | |
| "loss": 0.0478, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 5.679475164011246, | |
| "grad_norm": 0.8284873962402344, | |
| "learning_rate": 9.95562975003295e-06, | |
| "loss": 0.0639, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 5.688847235238988, | |
| "grad_norm": 1.2393404245376587, | |
| "learning_rate": 9.95555653179868e-06, | |
| "loss": 0.0593, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 5.698219306466729, | |
| "grad_norm": 1.5327643156051636, | |
| "learning_rate": 9.95548331356441e-06, | |
| "loss": 0.0644, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 5.70759137769447, | |
| "grad_norm": 1.8985389471054077, | |
| "learning_rate": 9.955410095330142e-06, | |
| "loss": 0.0646, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 5.716963448922212, | |
| "grad_norm": 1.5896059274673462, | |
| "learning_rate": 9.955336877095872e-06, | |
| "loss": 0.0716, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 5.726335520149953, | |
| "grad_norm": 1.21624755859375, | |
| "learning_rate": 9.955263658861603e-06, | |
| "loss": 0.0559, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 5.735707591377695, | |
| "grad_norm": 1.3084664344787598, | |
| "learning_rate": 9.955190440627336e-06, | |
| "loss": 0.065, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 5.745079662605436, | |
| "grad_norm": 0.9755469560623169, | |
| "learning_rate": 9.955117222393066e-06, | |
| "loss": 0.0601, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 5.754451733833177, | |
| "grad_norm": 1.1662402153015137, | |
| "learning_rate": 9.955044004158797e-06, | |
| "loss": 0.0588, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 5.763823805060919, | |
| "grad_norm": 1.313323974609375, | |
| "learning_rate": 9.954970785924528e-06, | |
| "loss": 0.0667, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 5.77319587628866, | |
| "grad_norm": 1.4725874662399292, | |
| "learning_rate": 9.954897567690259e-06, | |
| "loss": 0.0619, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 5.782567947516402, | |
| "grad_norm": 1.3176454305648804, | |
| "learning_rate": 9.95482434945599e-06, | |
| "loss": 0.056, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 5.7919400187441425, | |
| "grad_norm": 1.0566222667694092, | |
| "learning_rate": 9.95475113122172e-06, | |
| "loss": 0.0587, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 5.8013120899718835, | |
| "grad_norm": 1.0623878240585327, | |
| "learning_rate": 9.95467791298745e-06, | |
| "loss": 0.0591, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 5.810684161199625, | |
| "grad_norm": 1.6217368841171265, | |
| "learning_rate": 9.954604694753183e-06, | |
| "loss": 0.0536, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 5.820056232427366, | |
| "grad_norm": 1.2574353218078613, | |
| "learning_rate": 9.954531476518912e-06, | |
| "loss": 0.0552, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 5.829428303655108, | |
| "grad_norm": 1.2605924606323242, | |
| "learning_rate": 9.954458258284643e-06, | |
| "loss": 0.0669, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 5.838800374882849, | |
| "grad_norm": 1.8283051252365112, | |
| "learning_rate": 9.954385040050375e-06, | |
| "loss": 0.0631, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 5.84817244611059, | |
| "grad_norm": 1.2457951307296753, | |
| "learning_rate": 9.954311821816106e-06, | |
| "loss": 0.0578, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 5.857544517338332, | |
| "grad_norm": 1.1618739366531372, | |
| "learning_rate": 9.954238603581837e-06, | |
| "loss": 0.0547, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 5.857544517338332, | |
| "eval_loss": 0.03839369863271713, | |
| "eval_pearson_cosine": 0.7663547396659851, | |
| "eval_pearson_dot": 0.7110079526901245, | |
| "eval_pearson_euclidean": 0.7369804978370667, | |
| "eval_pearson_manhattan": 0.738224983215332, | |
| "eval_runtime": 28.702, | |
| "eval_samples_per_second": 52.261, | |
| "eval_spearman_cosine": 0.766680322110213, | |
| "eval_spearman_dot": 0.7118792296635837, | |
| "eval_spearman_euclidean": 0.7420173359570077, | |
| "eval_spearman_manhattan": 0.7431811125331302, | |
| "eval_steps_per_second": 6.55, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 5.866916588566073, | |
| "grad_norm": 1.565491795539856, | |
| "learning_rate": 9.954165385347568e-06, | |
| "loss": 0.0634, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 5.876288659793815, | |
| "grad_norm": 1.412607192993164, | |
| "learning_rate": 9.954092167113298e-06, | |
| "loss": 0.0641, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 5.885660731021556, | |
| "grad_norm": 1.5475645065307617, | |
| "learning_rate": 9.95401894887903e-06, | |
| "loss": 0.058, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 5.895032802249297, | |
| "grad_norm": 1.6942791938781738, | |
| "learning_rate": 9.95394573064476e-06, | |
| "loss": 0.0668, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 5.904404873477039, | |
| "grad_norm": 1.286224603652954, | |
| "learning_rate": 9.953872512410492e-06, | |
| "loss": 0.058, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 5.91377694470478, | |
| "grad_norm": 1.5031893253326416, | |
| "learning_rate": 9.953799294176223e-06, | |
| "loss": 0.062, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 5.9231490159325215, | |
| "grad_norm": 1.416455864906311, | |
| "learning_rate": 9.953726075941952e-06, | |
| "loss": 0.0596, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 5.9325210871602625, | |
| "grad_norm": 1.3160662651062012, | |
| "learning_rate": 9.953652857707685e-06, | |
| "loss": 0.062, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 5.9418931583880035, | |
| "grad_norm": 0.9542105793952942, | |
| "learning_rate": 9.953579639473415e-06, | |
| "loss": 0.0645, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 5.951265229615745, | |
| "grad_norm": 1.4458489418029785, | |
| "learning_rate": 9.953506421239146e-06, | |
| "loss": 0.0563, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 5.960637300843486, | |
| "grad_norm": 1.0310072898864746, | |
| "learning_rate": 9.953433203004877e-06, | |
| "loss": 0.0567, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 5.970009372071228, | |
| "grad_norm": 1.4674971103668213, | |
| "learning_rate": 9.95335998477061e-06, | |
| "loss": 0.0579, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 5.979381443298969, | |
| "grad_norm": 1.229636311531067, | |
| "learning_rate": 9.953286766536338e-06, | |
| "loss": 0.0589, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 5.98875351452671, | |
| "grad_norm": 1.4654268026351929, | |
| "learning_rate": 9.95321354830207e-06, | |
| "loss": 0.0519, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 5.998125585754452, | |
| "grad_norm": 1.276367425918579, | |
| "learning_rate": 9.953140330067802e-06, | |
| "loss": 0.066, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 6.007497656982193, | |
| "grad_norm": 1.0710258483886719, | |
| "learning_rate": 9.953067111833532e-06, | |
| "loss": 0.0462, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 6.016869728209935, | |
| "grad_norm": 0.9316133856773376, | |
| "learning_rate": 9.952993893599263e-06, | |
| "loss": 0.044, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 6.026241799437676, | |
| "grad_norm": 0.8318607211112976, | |
| "learning_rate": 9.952920675364994e-06, | |
| "loss": 0.0399, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 6.035613870665417, | |
| "grad_norm": 0.9682859182357788, | |
| "learning_rate": 9.952847457130725e-06, | |
| "loss": 0.0371, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 6.044985941893159, | |
| "grad_norm": 0.8720560669898987, | |
| "learning_rate": 9.952774238896455e-06, | |
| "loss": 0.0453, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 6.0543580131209, | |
| "grad_norm": 0.7835734486579895, | |
| "learning_rate": 9.952701020662186e-06, | |
| "loss": 0.0475, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 6.0637300843486415, | |
| "grad_norm": 1.4373115301132202, | |
| "learning_rate": 9.952627802427917e-06, | |
| "loss": 0.0416, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 6.073102155576382, | |
| "grad_norm": 1.317517638206482, | |
| "learning_rate": 9.95255458419365e-06, | |
| "loss": 0.0425, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 6.082474226804123, | |
| "grad_norm": 1.1831910610198975, | |
| "learning_rate": 9.952481365959378e-06, | |
| "loss": 0.0471, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 6.091846298031865, | |
| "grad_norm": 1.0449994802474976, | |
| "learning_rate": 9.95240814772511e-06, | |
| "loss": 0.0476, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 6.091846298031865, | |
| "eval_loss": 0.03876839950680733, | |
| "eval_pearson_cosine": 0.7637665867805481, | |
| "eval_pearson_dot": 0.7007623910903931, | |
| "eval_pearson_euclidean": 0.7322614192962646, | |
| "eval_pearson_manhattan": 0.7338271141052246, | |
| "eval_runtime": 22.3296, | |
| "eval_samples_per_second": 67.175, | |
| "eval_spearman_cosine": 0.7641548541194557, | |
| "eval_spearman_dot": 0.7012776165056044, | |
| "eval_spearman_euclidean": 0.7377602855270703, | |
| "eval_spearman_manhattan": 0.73918298594716, | |
| "eval_steps_per_second": 8.419, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 6.101218369259606, | |
| "grad_norm": 0.7369022965431213, | |
| "learning_rate": 9.952334929490842e-06, | |
| "loss": 0.0364, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 6.110590440487348, | |
| "grad_norm": 0.8673484325408936, | |
| "learning_rate": 9.952261711256572e-06, | |
| "loss": 0.0498, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 6.119962511715089, | |
| "grad_norm": 1.5341424942016602, | |
| "learning_rate": 9.952188493022303e-06, | |
| "loss": 0.045, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 6.12933458294283, | |
| "grad_norm": 0.8899186253547668, | |
| "learning_rate": 9.952115274788034e-06, | |
| "loss": 0.0441, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 6.138706654170572, | |
| "grad_norm": 1.0708824396133423, | |
| "learning_rate": 9.952042056553765e-06, | |
| "loss": 0.0458, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 6.148078725398313, | |
| "grad_norm": 1.1551895141601562, | |
| "learning_rate": 9.951968838319495e-06, | |
| "loss": 0.0421, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 6.157450796626055, | |
| "grad_norm": 1.0832526683807373, | |
| "learning_rate": 9.951895620085226e-06, | |
| "loss": 0.0462, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 6.166822867853796, | |
| "grad_norm": 1.303536295890808, | |
| "learning_rate": 9.951822401850959e-06, | |
| "loss": 0.0423, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 6.176194939081537, | |
| "grad_norm": 1.2826794385910034, | |
| "learning_rate": 9.95174918361669e-06, | |
| "loss": 0.0463, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 6.185567010309279, | |
| "grad_norm": 1.0724890232086182, | |
| "learning_rate": 9.95167596538242e-06, | |
| "loss": 0.043, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 6.1949390815370196, | |
| "grad_norm": 0.9407768249511719, | |
| "learning_rate": 9.95160274714815e-06, | |
| "loss": 0.045, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 6.204311152764761, | |
| "grad_norm": 1.1686878204345703, | |
| "learning_rate": 9.951529528913882e-06, | |
| "loss": 0.0407, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 6.213683223992502, | |
| "grad_norm": 1.5972820520401, | |
| "learning_rate": 9.951456310679612e-06, | |
| "loss": 0.0449, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 6.223055295220243, | |
| "grad_norm": 0.7610195875167847, | |
| "learning_rate": 9.951383092445343e-06, | |
| "loss": 0.0397, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 6.232427366447985, | |
| "grad_norm": 1.02704656124115, | |
| "learning_rate": 9.951309874211075e-06, | |
| "loss": 0.0448, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 6.241799437675726, | |
| "grad_norm": 0.8035688400268555, | |
| "learning_rate": 9.951236655976805e-06, | |
| "loss": 0.0445, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 6.251171508903468, | |
| "grad_norm": 1.019539475440979, | |
| "learning_rate": 9.951163437742535e-06, | |
| "loss": 0.0452, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 6.260543580131209, | |
| "grad_norm": 1.662574291229248, | |
| "learning_rate": 9.951090219508268e-06, | |
| "loss": 0.0517, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 6.26991565135895, | |
| "grad_norm": 1.1599600315093994, | |
| "learning_rate": 9.951017001273998e-06, | |
| "loss": 0.0493, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 6.279287722586692, | |
| "grad_norm": 0.7756074070930481, | |
| "learning_rate": 9.95094378303973e-06, | |
| "loss": 0.048, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 6.288659793814433, | |
| "grad_norm": 1.0959285497665405, | |
| "learning_rate": 9.95087056480546e-06, | |
| "loss": 0.0501, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 6.298031865042175, | |
| "grad_norm": 1.2311910390853882, | |
| "learning_rate": 9.95079734657119e-06, | |
| "loss": 0.0486, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 6.307403936269916, | |
| "grad_norm": 1.2149254083633423, | |
| "learning_rate": 9.950724128336921e-06, | |
| "loss": 0.0389, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 6.316776007497657, | |
| "grad_norm": 1.5355291366577148, | |
| "learning_rate": 9.950650910102652e-06, | |
| "loss": 0.0472, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 6.3261480787253985, | |
| "grad_norm": 1.1264081001281738, | |
| "learning_rate": 9.950577691868385e-06, | |
| "loss": 0.043, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 6.3261480787253985, | |
| "eval_loss": 0.03764544054865837, | |
| "eval_pearson_cosine": 0.7692497968673706, | |
| "eval_pearson_dot": 0.7138222455978394, | |
| "eval_pearson_euclidean": 0.7343003749847412, | |
| "eval_pearson_manhattan": 0.7356712818145752, | |
| "eval_runtime": 22.6897, | |
| "eval_samples_per_second": 66.109, | |
| "eval_spearman_cosine": 0.7695765922931803, | |
| "eval_spearman_dot": 0.7152262336240688, | |
| "eval_spearman_euclidean": 0.739557951171161, | |
| "eval_spearman_manhattan": 0.7408550126908494, | |
| "eval_steps_per_second": 8.286, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 6.3355201499531395, | |
| "grad_norm": 0.6277545690536499, | |
| "learning_rate": 9.950504473634115e-06, | |
| "loss": 0.0406, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 6.344892221180881, | |
| "grad_norm": 1.3999137878417969, | |
| "learning_rate": 9.950431255399846e-06, | |
| "loss": 0.0447, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 6.354264292408622, | |
| "grad_norm": 0.7465086579322815, | |
| "learning_rate": 9.950358037165577e-06, | |
| "loss": 0.0502, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 6.363636363636363, | |
| "grad_norm": 1.1154383420944214, | |
| "learning_rate": 9.950284818931308e-06, | |
| "loss": 0.05, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 6.373008434864105, | |
| "grad_norm": 1.1133472919464111, | |
| "learning_rate": 9.950211600697038e-06, | |
| "loss": 0.0473, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 6.382380506091846, | |
| "grad_norm": 1.0995352268218994, | |
| "learning_rate": 9.95013838246277e-06, | |
| "loss": 0.0414, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 6.391752577319588, | |
| "grad_norm": 0.9666862487792969, | |
| "learning_rate": 9.9500651642285e-06, | |
| "loss": 0.049, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 6.401124648547329, | |
| "grad_norm": 1.1517918109893799, | |
| "learning_rate": 9.94999194599423e-06, | |
| "loss": 0.0413, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 6.41049671977507, | |
| "grad_norm": 0.5381759405136108, | |
| "learning_rate": 9.949918727759961e-06, | |
| "loss": 0.0418, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 6.419868791002812, | |
| "grad_norm": 0.973006546497345, | |
| "learning_rate": 9.949845509525692e-06, | |
| "loss": 0.0495, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 6.429240862230553, | |
| "grad_norm": 1.126633882522583, | |
| "learning_rate": 9.949772291291425e-06, | |
| "loss": 0.0493, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 6.438612933458295, | |
| "grad_norm": 0.7894268035888672, | |
| "learning_rate": 9.949699073057155e-06, | |
| "loss": 0.0436, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 6.447985004686036, | |
| "grad_norm": 0.7125422358512878, | |
| "learning_rate": 9.949625854822886e-06, | |
| "loss": 0.0433, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 6.457357075913777, | |
| "grad_norm": 0.9013342261314392, | |
| "learning_rate": 9.949552636588617e-06, | |
| "loss": 0.0376, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 6.4667291471415185, | |
| "grad_norm": 1.132384181022644, | |
| "learning_rate": 9.949479418354348e-06, | |
| "loss": 0.0482, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 6.4761012183692594, | |
| "grad_norm": 1.0104179382324219, | |
| "learning_rate": 9.949406200120078e-06, | |
| "loss": 0.0485, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 6.485473289597001, | |
| "grad_norm": 1.233464241027832, | |
| "learning_rate": 9.949332981885809e-06, | |
| "loss": 0.0478, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 6.494845360824742, | |
| "grad_norm": 0.7077954411506653, | |
| "learning_rate": 9.949259763651542e-06, | |
| "loss": 0.0464, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 6.504217432052483, | |
| "grad_norm": 1.5273882150650024, | |
| "learning_rate": 9.949186545417272e-06, | |
| "loss": 0.0404, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 6.513589503280225, | |
| "grad_norm": 1.2204720973968506, | |
| "learning_rate": 9.949113327183001e-06, | |
| "loss": 0.0375, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 6.522961574507966, | |
| "grad_norm": 0.9539759755134583, | |
| "learning_rate": 9.949040108948734e-06, | |
| "loss": 0.0397, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 6.532333645735708, | |
| "grad_norm": 1.949201226234436, | |
| "learning_rate": 9.948966890714465e-06, | |
| "loss": 0.0476, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 6.541705716963449, | |
| "grad_norm": 1.046915888786316, | |
| "learning_rate": 9.948893672480195e-06, | |
| "loss": 0.0445, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 6.55107778819119, | |
| "grad_norm": 0.8392923474311829, | |
| "learning_rate": 9.948820454245926e-06, | |
| "loss": 0.0502, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 6.560449859418932, | |
| "grad_norm": 1.357014536857605, | |
| "learning_rate": 9.948747236011659e-06, | |
| "loss": 0.0436, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 6.560449859418932, | |
| "eval_loss": 0.03813355416059494, | |
| "eval_pearson_cosine": 0.7662351131439209, | |
| "eval_pearson_dot": 0.7104849219322205, | |
| "eval_pearson_euclidean": 0.7334129810333252, | |
| "eval_pearson_manhattan": 0.7350986003875732, | |
| "eval_runtime": 22.7512, | |
| "eval_samples_per_second": 65.931, | |
| "eval_spearman_cosine": 0.7662226343415417, | |
| "eval_spearman_dot": 0.7115825441503862, | |
| "eval_spearman_euclidean": 0.7384103552275764, | |
| "eval_spearman_manhattan": 0.7397995971405482, | |
| "eval_steps_per_second": 8.263, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 6.569821930646673, | |
| "grad_norm": 1.1269482374191284, | |
| "learning_rate": 9.948674017777388e-06, | |
| "loss": 0.0395, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 6.579194001874415, | |
| "grad_norm": 0.8978859782218933, | |
| "learning_rate": 9.948600799543118e-06, | |
| "loss": 0.0438, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 6.588566073102156, | |
| "grad_norm": 1.3999450206756592, | |
| "learning_rate": 9.94852758130885e-06, | |
| "loss": 0.0466, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 6.597938144329897, | |
| "grad_norm": 0.985998272895813, | |
| "learning_rate": 9.948454363074582e-06, | |
| "loss": 0.0474, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 6.607310215557638, | |
| "grad_norm": 0.7843828797340393, | |
| "learning_rate": 9.948381144840312e-06, | |
| "loss": 0.0417, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 6.616682286785379, | |
| "grad_norm": 1.64656400680542, | |
| "learning_rate": 9.948307926606043e-06, | |
| "loss": 0.045, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 6.626054358013121, | |
| "grad_norm": 0.6348075866699219, | |
| "learning_rate": 9.948234708371774e-06, | |
| "loss": 0.0501, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 6.635426429240862, | |
| "grad_norm": 1.8781590461730957, | |
| "learning_rate": 9.948161490137505e-06, | |
| "loss": 0.0445, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 6.644798500468603, | |
| "grad_norm": 1.0441402196884155, | |
| "learning_rate": 9.948088271903235e-06, | |
| "loss": 0.0457, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 6.654170571696345, | |
| "grad_norm": 1.2460689544677734, | |
| "learning_rate": 9.948015053668966e-06, | |
| "loss": 0.0471, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 6.663542642924086, | |
| "grad_norm": 0.993414580821991, | |
| "learning_rate": 9.947941835434698e-06, | |
| "loss": 0.0423, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 6.672914714151828, | |
| "grad_norm": 1.2848552465438843, | |
| "learning_rate": 9.947868617200428e-06, | |
| "loss": 0.0414, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 6.682286785379569, | |
| "grad_norm": 1.2903103828430176, | |
| "learning_rate": 9.947795398966158e-06, | |
| "loss": 0.0402, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 6.69165885660731, | |
| "grad_norm": 1.2319235801696777, | |
| "learning_rate": 9.94772218073189e-06, | |
| "loss": 0.0504, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 6.701030927835052, | |
| "grad_norm": 0.8465273976325989, | |
| "learning_rate": 9.947648962497621e-06, | |
| "loss": 0.0409, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 6.710402999062793, | |
| "grad_norm": 1.186928153038025, | |
| "learning_rate": 9.947575744263352e-06, | |
| "loss": 0.0458, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 6.719775070290535, | |
| "grad_norm": 1.3528752326965332, | |
| "learning_rate": 9.947502526029083e-06, | |
| "loss": 0.0433, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 6.7291471415182755, | |
| "grad_norm": 0.8908892273902893, | |
| "learning_rate": 9.947429307794814e-06, | |
| "loss": 0.0456, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 6.7385192127460165, | |
| "grad_norm": 1.1235069036483765, | |
| "learning_rate": 9.947356089560544e-06, | |
| "loss": 0.0481, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 6.747891283973758, | |
| "grad_norm": 1.6809895038604736, | |
| "learning_rate": 9.947282871326275e-06, | |
| "loss": 0.0454, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 6.757263355201499, | |
| "grad_norm": 0.8632039427757263, | |
| "learning_rate": 9.947209653092008e-06, | |
| "loss": 0.0481, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 6.766635426429241, | |
| "grad_norm": 1.2185996770858765, | |
| "learning_rate": 9.947136434857738e-06, | |
| "loss": 0.0383, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 6.776007497656982, | |
| "grad_norm": 0.6979696154594421, | |
| "learning_rate": 9.947063216623467e-06, | |
| "loss": 0.0435, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 6.785379568884723, | |
| "grad_norm": 1.459441065788269, | |
| "learning_rate": 9.9469899983892e-06, | |
| "loss": 0.0449, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 6.794751640112465, | |
| "grad_norm": 1.0957977771759033, | |
| "learning_rate": 9.94691678015493e-06, | |
| "loss": 0.032, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 6.794751640112465, | |
| "eval_loss": 0.03765299916267395, | |
| "eval_pearson_cosine": 0.7692482471466064, | |
| "eval_pearson_dot": 0.722366452217102, | |
| "eval_pearson_euclidean": 0.7316011190414429, | |
| "eval_pearson_manhattan": 0.7333144545555115, | |
| "eval_runtime": 22.5438, | |
| "eval_samples_per_second": 66.537, | |
| "eval_spearman_cosine": 0.7695046405395065, | |
| "eval_spearman_dot": 0.7242050912795406, | |
| "eval_spearman_euclidean": 0.7356828429817377, | |
| "eval_spearman_manhattan": 0.737487116385034, | |
| "eval_steps_per_second": 8.339, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 6.804123711340206, | |
| "grad_norm": 1.377066731452942, | |
| "learning_rate": 9.946843561920661e-06, | |
| "loss": 0.0529, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 6.813495782567948, | |
| "grad_norm": 0.714728057384491, | |
| "learning_rate": 9.946770343686392e-06, | |
| "loss": 0.0432, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 6.822867853795689, | |
| "grad_norm": 1.4324384927749634, | |
| "learning_rate": 9.946697125452125e-06, | |
| "loss": 0.046, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 6.83223992502343, | |
| "grad_norm": 1.2564704418182373, | |
| "learning_rate": 9.946623907217854e-06, | |
| "loss": 0.046, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 6.841611996251172, | |
| "grad_norm": 0.8522197008132935, | |
| "learning_rate": 9.946550688983584e-06, | |
| "loss": 0.0393, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 6.850984067478913, | |
| "grad_norm": 0.8751912117004395, | |
| "learning_rate": 9.946477470749317e-06, | |
| "loss": 0.0426, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 6.8603561387066545, | |
| "grad_norm": 0.8960391879081726, | |
| "learning_rate": 9.946404252515048e-06, | |
| "loss": 0.0445, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 6.8697282099343955, | |
| "grad_norm": 1.092128872871399, | |
| "learning_rate": 9.946331034280778e-06, | |
| "loss": 0.0459, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 6.8791002811621365, | |
| "grad_norm": 1.1840777397155762, | |
| "learning_rate": 9.946257816046509e-06, | |
| "loss": 0.0387, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 6.888472352389878, | |
| "grad_norm": 1.0283764600753784, | |
| "learning_rate": 9.94618459781224e-06, | |
| "loss": 0.0577, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 6.897844423617619, | |
| "grad_norm": 0.749761164188385, | |
| "learning_rate": 9.94611137957797e-06, | |
| "loss": 0.0414, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 6.907216494845361, | |
| "grad_norm": 0.8442000150680542, | |
| "learning_rate": 9.946038161343701e-06, | |
| "loss": 0.046, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 6.916588566073102, | |
| "grad_norm": 1.2296583652496338, | |
| "learning_rate": 9.945964943109432e-06, | |
| "loss": 0.0412, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 6.925960637300843, | |
| "grad_norm": 0.6515626311302185, | |
| "learning_rate": 9.945891724875165e-06, | |
| "loss": 0.0481, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 6.935332708528585, | |
| "grad_norm": 1.8992091417312622, | |
| "learning_rate": 9.945818506640895e-06, | |
| "loss": 0.0431, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 6.944704779756326, | |
| "grad_norm": 1.1663875579833984, | |
| "learning_rate": 9.945745288406624e-06, | |
| "loss": 0.0459, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 6.954076850984068, | |
| "grad_norm": 0.6695976853370667, | |
| "learning_rate": 9.945672070172357e-06, | |
| "loss": 0.0448, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 6.963448922211809, | |
| "grad_norm": 1.158563494682312, | |
| "learning_rate": 9.945598851938088e-06, | |
| "loss": 0.0398, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 6.97282099343955, | |
| "grad_norm": 1.2068713903427124, | |
| "learning_rate": 9.945525633703818e-06, | |
| "loss": 0.0443, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 6.982193064667292, | |
| "grad_norm": 0.9688456654548645, | |
| "learning_rate": 9.945452415469549e-06, | |
| "loss": 0.0452, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 6.991565135895033, | |
| "grad_norm": 1.5483156442642212, | |
| "learning_rate": 9.94537919723528e-06, | |
| "loss": 0.0498, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 7.0009372071227745, | |
| "grad_norm": 1.18287193775177, | |
| "learning_rate": 9.94530597900101e-06, | |
| "loss": 0.0445, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 7.010309278350515, | |
| "grad_norm": 0.7765620946884155, | |
| "learning_rate": 9.945232760766741e-06, | |
| "loss": 0.0346, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 7.019681349578256, | |
| "grad_norm": 0.948760986328125, | |
| "learning_rate": 9.945159542532474e-06, | |
| "loss": 0.0348, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 7.029053420805998, | |
| "grad_norm": 0.9965664744377136, | |
| "learning_rate": 9.945086324298205e-06, | |
| "loss": 0.0342, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 7.029053420805998, | |
| "eval_loss": 0.03782695531845093, | |
| "eval_pearson_cosine": 0.768491804599762, | |
| "eval_pearson_dot": 0.7183945775032043, | |
| "eval_pearson_euclidean": 0.7320147752761841, | |
| "eval_pearson_manhattan": 0.7333334684371948, | |
| "eval_runtime": 21.6515, | |
| "eval_samples_per_second": 69.279, | |
| "eval_spearman_cosine": 0.7677979499645443, | |
| "eval_spearman_dot": 0.7186610110098233, | |
| "eval_spearman_euclidean": 0.7364530110375347, | |
| "eval_spearman_manhattan": 0.737620665225201, | |
| "eval_steps_per_second": 8.683, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 7.038425492033739, | |
| "grad_norm": 0.8594346046447754, | |
| "learning_rate": 9.945013106063935e-06, | |
| "loss": 0.0318, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 7.047797563261481, | |
| "grad_norm": 1.62812340259552, | |
| "learning_rate": 9.944939887829666e-06, | |
| "loss": 0.0414, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 7.057169634489222, | |
| "grad_norm": 1.1017098426818848, | |
| "learning_rate": 9.944866669595397e-06, | |
| "loss": 0.0327, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 7.066541705716963, | |
| "grad_norm": 0.8536505699157715, | |
| "learning_rate": 9.944793451361128e-06, | |
| "loss": 0.0286, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 7.075913776944705, | |
| "grad_norm": 1.0389901399612427, | |
| "learning_rate": 9.944720233126858e-06, | |
| "loss": 0.0365, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 7.085285848172446, | |
| "grad_norm": 1.0682491064071655, | |
| "learning_rate": 9.94464701489259e-06, | |
| "loss": 0.034, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 7.094657919400188, | |
| "grad_norm": 0.8786489963531494, | |
| "learning_rate": 9.944573796658321e-06, | |
| "loss": 0.0373, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 7.104029990627929, | |
| "grad_norm": 1.3642008304595947, | |
| "learning_rate": 9.94450057842405e-06, | |
| "loss": 0.0314, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 7.11340206185567, | |
| "grad_norm": 0.7243325114250183, | |
| "learning_rate": 9.944427360189783e-06, | |
| "loss": 0.0299, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 7.122774133083412, | |
| "grad_norm": 0.6696385145187378, | |
| "learning_rate": 9.944354141955514e-06, | |
| "loss": 0.0311, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 7.1321462043111525, | |
| "grad_norm": 1.03152334690094, | |
| "learning_rate": 9.944280923721244e-06, | |
| "loss": 0.0355, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 7.141518275538894, | |
| "grad_norm": 0.8586616516113281, | |
| "learning_rate": 9.944207705486975e-06, | |
| "loss": 0.0394, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 7.150890346766635, | |
| "grad_norm": 0.9514285922050476, | |
| "learning_rate": 9.944134487252706e-06, | |
| "loss": 0.035, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 7.160262417994376, | |
| "grad_norm": 0.8053460717201233, | |
| "learning_rate": 9.944061269018437e-06, | |
| "loss": 0.0312, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 7.169634489222118, | |
| "grad_norm": 1.0056674480438232, | |
| "learning_rate": 9.943988050784167e-06, | |
| "loss": 0.0371, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 7.179006560449859, | |
| "grad_norm": 0.7738359570503235, | |
| "learning_rate": 9.943914832549898e-06, | |
| "loss": 0.0302, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 7.188378631677601, | |
| "grad_norm": 1.039197325706482, | |
| "learning_rate": 9.94384161431563e-06, | |
| "loss": 0.0316, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 7.197750702905342, | |
| "grad_norm": 1.578165888786316, | |
| "learning_rate": 9.943768396081361e-06, | |
| "loss": 0.0388, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 7.207122774133083, | |
| "grad_norm": 1.1753205060958862, | |
| "learning_rate": 9.943695177847092e-06, | |
| "loss": 0.0387, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 7.216494845360825, | |
| "grad_norm": 1.295299768447876, | |
| "learning_rate": 9.943621959612823e-06, | |
| "loss": 0.0417, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 7.225866916588566, | |
| "grad_norm": 0.9477363228797913, | |
| "learning_rate": 9.943548741378554e-06, | |
| "loss": 0.0305, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 7.235238987816308, | |
| "grad_norm": 1.0547223091125488, | |
| "learning_rate": 9.943475523144284e-06, | |
| "loss": 0.0314, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 7.244611059044049, | |
| "grad_norm": 1.4873117208480835, | |
| "learning_rate": 9.943402304910015e-06, | |
| "loss": 0.0302, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 7.25398313027179, | |
| "grad_norm": 0.9882778525352478, | |
| "learning_rate": 9.943329086675748e-06, | |
| "loss": 0.0328, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 7.2633552014995315, | |
| "grad_norm": 1.3187719583511353, | |
| "learning_rate": 9.943255868441477e-06, | |
| "loss": 0.0341, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 7.2633552014995315, | |
| "eval_loss": 0.03773624449968338, | |
| "eval_pearson_cosine": 0.7699387073516846, | |
| "eval_pearson_dot": 0.7237234115600586, | |
| "eval_pearson_euclidean": 0.7316513061523438, | |
| "eval_pearson_manhattan": 0.7335678339004517, | |
| "eval_runtime": 22.1612, | |
| "eval_samples_per_second": 67.686, | |
| "eval_spearman_cosine": 0.7694615753118931, | |
| "eval_spearman_dot": 0.7243788947148158, | |
| "eval_spearman_euclidean": 0.7361849268567764, | |
| "eval_spearman_manhattan": 0.7377945356892571, | |
| "eval_steps_per_second": 8.483, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 7.2727272727272725, | |
| "grad_norm": 1.0984870195388794, | |
| "learning_rate": 9.943182650207207e-06, | |
| "loss": 0.0329, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 7.282099343955014, | |
| "grad_norm": 0.7666100263595581, | |
| "learning_rate": 9.94310943197294e-06, | |
| "loss": 0.0358, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 7.291471415182755, | |
| "grad_norm": 0.9941838383674622, | |
| "learning_rate": 9.94303621373867e-06, | |
| "loss": 0.0351, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 7.300843486410496, | |
| "grad_norm": 1.3012335300445557, | |
| "learning_rate": 9.942962995504401e-06, | |
| "loss": 0.0296, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 7.310215557638238, | |
| "grad_norm": 1.1914719343185425, | |
| "learning_rate": 9.942889777270132e-06, | |
| "loss": 0.0333, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 7.319587628865979, | |
| "grad_norm": 1.1405929327011108, | |
| "learning_rate": 9.942816559035863e-06, | |
| "loss": 0.0408, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 7.328959700093721, | |
| "grad_norm": 0.665600061416626, | |
| "learning_rate": 9.942743340801594e-06, | |
| "loss": 0.0314, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 7.338331771321462, | |
| "grad_norm": 1.2029966115951538, | |
| "learning_rate": 9.942670122567324e-06, | |
| "loss": 0.041, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 7.347703842549203, | |
| "grad_norm": 0.44810751080513, | |
| "learning_rate": 9.942596904333057e-06, | |
| "loss": 0.0317, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 7.357075913776945, | |
| "grad_norm": 1.565082311630249, | |
| "learning_rate": 9.942523686098788e-06, | |
| "loss": 0.035, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 7.366447985004686, | |
| "grad_norm": 1.6850316524505615, | |
| "learning_rate": 9.942450467864517e-06, | |
| "loss": 0.0365, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 7.375820056232428, | |
| "grad_norm": 1.0027261972427368, | |
| "learning_rate": 9.942377249630249e-06, | |
| "loss": 0.0309, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 7.385192127460169, | |
| "grad_norm": 0.51674485206604, | |
| "learning_rate": 9.94230403139598e-06, | |
| "loss": 0.0321, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 7.39456419868791, | |
| "grad_norm": 1.0429599285125732, | |
| "learning_rate": 9.94223081316171e-06, | |
| "loss": 0.033, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 7.4039362699156515, | |
| "grad_norm": 0.618232250213623, | |
| "learning_rate": 9.942157594927441e-06, | |
| "loss": 0.0353, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 7.413308341143392, | |
| "grad_norm": 0.9780518412590027, | |
| "learning_rate": 9.942084376693174e-06, | |
| "loss": 0.0354, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 7.422680412371134, | |
| "grad_norm": 1.214362621307373, | |
| "learning_rate": 9.942011158458903e-06, | |
| "loss": 0.0338, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 7.432052483598875, | |
| "grad_norm": 1.202986240386963, | |
| "learning_rate": 9.941937940224634e-06, | |
| "loss": 0.0387, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 7.441424554826616, | |
| "grad_norm": 1.4128488302230835, | |
| "learning_rate": 9.941864721990366e-06, | |
| "loss": 0.0315, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 7.450796626054358, | |
| "grad_norm": 0.7198026180267334, | |
| "learning_rate": 9.941791503756097e-06, | |
| "loss": 0.0338, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 7.460168697282099, | |
| "grad_norm": 1.1124250888824463, | |
| "learning_rate": 9.941718285521828e-06, | |
| "loss": 0.0352, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 7.469540768509841, | |
| "grad_norm": 1.0420817136764526, | |
| "learning_rate": 9.941645067287558e-06, | |
| "loss": 0.0338, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 7.478912839737582, | |
| "grad_norm": 0.9638373255729675, | |
| "learning_rate": 9.941571849053289e-06, | |
| "loss": 0.0356, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 7.488284910965323, | |
| "grad_norm": 0.8584896922111511, | |
| "learning_rate": 9.94149863081902e-06, | |
| "loss": 0.0353, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 7.497656982193065, | |
| "grad_norm": 0.7161556482315063, | |
| "learning_rate": 9.94142541258475e-06, | |
| "loss": 0.0329, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 7.497656982193065, | |
| "eval_loss": 0.03753030672669411, | |
| "eval_pearson_cosine": 0.7705868482589722, | |
| "eval_pearson_dot": 0.7248358726501465, | |
| "eval_pearson_euclidean": 0.734631359577179, | |
| "eval_pearson_manhattan": 0.7363988161087036, | |
| "eval_runtime": 22.3628, | |
| "eval_samples_per_second": 67.076, | |
| "eval_spearman_cosine": 0.769708288306187, | |
| "eval_spearman_dot": 0.7249767839130733, | |
| "eval_spearman_euclidean": 0.7394619718544255, | |
| "eval_spearman_manhattan": 0.7409361299302836, | |
| "eval_steps_per_second": 8.407, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 7.507029053420806, | |
| "grad_norm": 0.443439781665802, | |
| "learning_rate": 9.941352194350481e-06, | |
| "loss": 0.0301, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 7.516401124648548, | |
| "grad_norm": 0.5801528692245483, | |
| "learning_rate": 9.941278976116214e-06, | |
| "loss": 0.0379, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 7.525773195876289, | |
| "grad_norm": 0.9093418717384338, | |
| "learning_rate": 9.941205757881943e-06, | |
| "loss": 0.0376, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 7.5351452671040295, | |
| "grad_norm": 0.7593823671340942, | |
| "learning_rate": 9.941132539647674e-06, | |
| "loss": 0.0444, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 7.544517338331771, | |
| "grad_norm": 0.706062376499176, | |
| "learning_rate": 9.941059321413406e-06, | |
| "loss": 0.0365, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 7.553889409559512, | |
| "grad_norm": 0.9754658937454224, | |
| "learning_rate": 9.940986103179137e-06, | |
| "loss": 0.0333, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 7.563261480787254, | |
| "grad_norm": 0.8546915054321289, | |
| "learning_rate": 9.940912884944867e-06, | |
| "loss": 0.0365, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 7.572633552014995, | |
| "grad_norm": 1.0958435535430908, | |
| "learning_rate": 9.940839666710598e-06, | |
| "loss": 0.0371, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 7.582005623242736, | |
| "grad_norm": 0.9083812832832336, | |
| "learning_rate": 9.940766448476329e-06, | |
| "loss": 0.0355, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 7.591377694470478, | |
| "grad_norm": 0.8183301091194153, | |
| "learning_rate": 9.94069323024206e-06, | |
| "loss": 0.0366, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 7.600749765698219, | |
| "grad_norm": 1.1571640968322754, | |
| "learning_rate": 9.94062001200779e-06, | |
| "loss": 0.0357, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 7.610121836925961, | |
| "grad_norm": 0.47001174092292786, | |
| "learning_rate": 9.940546793773523e-06, | |
| "loss": 0.0366, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 7.619493908153702, | |
| "grad_norm": 0.7864421010017395, | |
| "learning_rate": 9.940473575539254e-06, | |
| "loss": 0.0354, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 7.628865979381443, | |
| "grad_norm": 1.7657727003097534, | |
| "learning_rate": 9.940400357304984e-06, | |
| "loss": 0.0353, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 7.638238050609185, | |
| "grad_norm": 0.9494844079017639, | |
| "learning_rate": 9.940327139070715e-06, | |
| "loss": 0.0358, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 7.647610121836926, | |
| "grad_norm": 1.1095364093780518, | |
| "learning_rate": 9.940253920836446e-06, | |
| "loss": 0.0338, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 7.6569821930646675, | |
| "grad_norm": 0.5973043441772461, | |
| "learning_rate": 9.940180702602177e-06, | |
| "loss": 0.0332, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 7.6663542642924085, | |
| "grad_norm": 0.5820950865745544, | |
| "learning_rate": 9.940107484367907e-06, | |
| "loss": 0.0398, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 7.6757263355201495, | |
| "grad_norm": 0.8826543688774109, | |
| "learning_rate": 9.94003426613364e-06, | |
| "loss": 0.0363, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 7.685098406747891, | |
| "grad_norm": 1.2651371955871582, | |
| "learning_rate": 9.93996104789937e-06, | |
| "loss": 0.041, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 7.694470477975632, | |
| "grad_norm": 0.4515238106250763, | |
| "learning_rate": 9.9398878296651e-06, | |
| "loss": 0.0375, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 7.703842549203374, | |
| "grad_norm": 1.2343902587890625, | |
| "learning_rate": 9.939814611430832e-06, | |
| "loss": 0.0362, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 7.713214620431115, | |
| "grad_norm": 0.9942644238471985, | |
| "learning_rate": 9.939741393196563e-06, | |
| "loss": 0.029, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 7.722586691658856, | |
| "grad_norm": 1.327783226966858, | |
| "learning_rate": 9.939668174962294e-06, | |
| "loss": 0.0392, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 7.731958762886598, | |
| "grad_norm": 1.4785791635513306, | |
| "learning_rate": 9.939594956728024e-06, | |
| "loss": 0.035, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 7.731958762886598, | |
| "eval_loss": 0.037988826632499695, | |
| "eval_pearson_cosine": 0.7700406312942505, | |
| "eval_pearson_dot": 0.7271457314491272, | |
| "eval_pearson_euclidean": 0.7288488745689392, | |
| "eval_pearson_manhattan": 0.7308281660079956, | |
| "eval_runtime": 23.4237, | |
| "eval_samples_per_second": 64.038, | |
| "eval_spearman_cosine": 0.7690641250527666, | |
| "eval_spearman_dot": 0.72759972168602, | |
| "eval_spearman_euclidean": 0.7335219335323239, | |
| "eval_spearman_manhattan": 0.7351665552942261, | |
| "eval_steps_per_second": 8.026, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 7.741330834114339, | |
| "grad_norm": 0.9368901252746582, | |
| "learning_rate": 9.939521738493755e-06, | |
| "loss": 0.0354, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 7.750702905342081, | |
| "grad_norm": 0.924701452255249, | |
| "learning_rate": 9.939448520259486e-06, | |
| "loss": 0.0308, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 7.760074976569822, | |
| "grad_norm": 0.6925562620162964, | |
| "learning_rate": 9.939375302025217e-06, | |
| "loss": 0.0379, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 7.769447047797563, | |
| "grad_norm": 1.1450366973876953, | |
| "learning_rate": 9.939302083790947e-06, | |
| "loss": 0.035, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 7.778819119025305, | |
| "grad_norm": 1.4248292446136475, | |
| "learning_rate": 9.93922886555668e-06, | |
| "loss": 0.0425, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 7.788191190253046, | |
| "grad_norm": 1.1555083990097046, | |
| "learning_rate": 9.93915564732241e-06, | |
| "loss": 0.035, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 7.7975632614807875, | |
| "grad_norm": 0.8950551152229309, | |
| "learning_rate": 9.93908242908814e-06, | |
| "loss": 0.0371, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 7.8069353327085285, | |
| "grad_norm": 0.9402216076850891, | |
| "learning_rate": 9.939009210853872e-06, | |
| "loss": 0.0325, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 7.816307403936269, | |
| "grad_norm": 0.7723280191421509, | |
| "learning_rate": 9.938935992619603e-06, | |
| "loss": 0.0335, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 7.825679475164011, | |
| "grad_norm": 1.1138160228729248, | |
| "learning_rate": 9.938862774385334e-06, | |
| "loss": 0.0392, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 7.835051546391752, | |
| "grad_norm": 1.1937012672424316, | |
| "learning_rate": 9.938789556151064e-06, | |
| "loss": 0.0349, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 7.844423617619494, | |
| "grad_norm": 0.8927692174911499, | |
| "learning_rate": 9.938716337916797e-06, | |
| "loss": 0.0339, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 7.853795688847235, | |
| "grad_norm": 1.1513832807540894, | |
| "learning_rate": 9.938643119682526e-06, | |
| "loss": 0.039, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 7.863167760074976, | |
| "grad_norm": 0.6757535338401794, | |
| "learning_rate": 9.938569901448257e-06, | |
| "loss": 0.0331, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 7.872539831302718, | |
| "grad_norm": 0.64778071641922, | |
| "learning_rate": 9.938496683213989e-06, | |
| "loss": 0.0357, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 7.881911902530459, | |
| "grad_norm": 0.8938049674034119, | |
| "learning_rate": 9.93842346497972e-06, | |
| "loss": 0.0342, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 7.891283973758201, | |
| "grad_norm": 1.0501271486282349, | |
| "learning_rate": 9.93835024674545e-06, | |
| "loss": 0.0335, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 7.900656044985942, | |
| "grad_norm": 0.8977199792861938, | |
| "learning_rate": 9.938277028511181e-06, | |
| "loss": 0.0352, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 7.910028116213683, | |
| "grad_norm": 1.1958116292953491, | |
| "learning_rate": 9.938203810276912e-06, | |
| "loss": 0.0349, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 7.919400187441425, | |
| "grad_norm": 0.9677138328552246, | |
| "learning_rate": 9.938130592042643e-06, | |
| "loss": 0.0368, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 7.928772258669166, | |
| "grad_norm": 0.6786054372787476, | |
| "learning_rate": 9.938057373808374e-06, | |
| "loss": 0.0312, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 7.938144329896907, | |
| "grad_norm": 0.8180833458900452, | |
| "learning_rate": 9.937984155574106e-06, | |
| "loss": 0.0351, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 7.947516401124648, | |
| "grad_norm": 0.9622411727905273, | |
| "learning_rate": 9.937910937339837e-06, | |
| "loss": 0.0312, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 7.956888472352389, | |
| "grad_norm": 0.7947582006454468, | |
| "learning_rate": 9.937837719105566e-06, | |
| "loss": 0.0309, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 7.966260543580131, | |
| "grad_norm": 0.663296103477478, | |
| "learning_rate": 9.937764500871298e-06, | |
| "loss": 0.0361, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 7.966260543580131, | |
| "eval_loss": 0.03769104555249214, | |
| "eval_pearson_cosine": 0.7716894745826721, | |
| "eval_pearson_dot": 0.7308681011199951, | |
| "eval_pearson_euclidean": 0.7253518104553223, | |
| "eval_pearson_manhattan": 0.727583646774292, | |
| "eval_runtime": 21.789, | |
| "eval_samples_per_second": 68.842, | |
| "eval_spearman_cosine": 0.7708559308843369, | |
| "eval_spearman_dot": 0.7317227014854395, | |
| "eval_spearman_euclidean": 0.729650509473576, | |
| "eval_spearman_manhattan": 0.7317616874018321, | |
| "eval_steps_per_second": 8.628, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 7.975632614807872, | |
| "grad_norm": 0.4781196415424347, | |
| "learning_rate": 9.937691282637029e-06, | |
| "loss": 0.0322, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 7.985004686035614, | |
| "grad_norm": 1.5688908100128174, | |
| "learning_rate": 9.93761806440276e-06, | |
| "loss": 0.0385, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 7.994376757263355, | |
| "grad_norm": 0.9491916298866272, | |
| "learning_rate": 9.93754484616849e-06, | |
| "loss": 0.0349, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 8.003748828491096, | |
| "grad_norm": 0.5889357924461365, | |
| "learning_rate": 9.937471627934221e-06, | |
| "loss": 0.0282, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 8.013120899718837, | |
| "grad_norm": 0.7906449437141418, | |
| "learning_rate": 9.937398409699952e-06, | |
| "loss": 0.0236, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 8.02249297094658, | |
| "grad_norm": 1.4013662338256836, | |
| "learning_rate": 9.937325191465683e-06, | |
| "loss": 0.0303, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 8.03186504217432, | |
| "grad_norm": 1.186049461364746, | |
| "learning_rate": 9.937251973231414e-06, | |
| "loss": 0.0283, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 8.041237113402062, | |
| "grad_norm": 0.9762454628944397, | |
| "learning_rate": 9.937178754997146e-06, | |
| "loss": 0.0235, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 8.050609184629803, | |
| "grad_norm": 0.8854254484176636, | |
| "learning_rate": 9.937105536762877e-06, | |
| "loss": 0.0269, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 8.059981255857544, | |
| "grad_norm": 1.2090007066726685, | |
| "learning_rate": 9.937032318528607e-06, | |
| "loss": 0.0254, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 8.069353327085286, | |
| "grad_norm": 0.5176217555999756, | |
| "learning_rate": 9.936959100294338e-06, | |
| "loss": 0.0317, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 8.078725398313027, | |
| "grad_norm": 0.4938619136810303, | |
| "learning_rate": 9.936885882060069e-06, | |
| "loss": 0.0245, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 8.088097469540768, | |
| "grad_norm": 1.6035066843032837, | |
| "learning_rate": 9.9368126638258e-06, | |
| "loss": 0.0296, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 8.09746954076851, | |
| "grad_norm": 0.6895983815193176, | |
| "learning_rate": 9.93673944559153e-06, | |
| "loss": 0.0292, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 8.10684161199625, | |
| "grad_norm": 0.6980400085449219, | |
| "learning_rate": 9.936666227357263e-06, | |
| "loss": 0.0299, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 8.116213683223993, | |
| "grad_norm": 1.0714101791381836, | |
| "learning_rate": 9.936593009122992e-06, | |
| "loss": 0.0258, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 8.125585754451734, | |
| "grad_norm": 0.6729503273963928, | |
| "learning_rate": 9.936519790888723e-06, | |
| "loss": 0.0279, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 8.134957825679475, | |
| "grad_norm": 0.8938456177711487, | |
| "learning_rate": 9.936446572654455e-06, | |
| "loss": 0.0245, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 8.144329896907216, | |
| "grad_norm": 1.2066154479980469, | |
| "learning_rate": 9.936373354420186e-06, | |
| "loss": 0.0334, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 8.153701968134957, | |
| "grad_norm": 0.7639226913452148, | |
| "learning_rate": 9.936300136185917e-06, | |
| "loss": 0.0245, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 8.1630740393627, | |
| "grad_norm": 1.4429128170013428, | |
| "learning_rate": 9.936226917951647e-06, | |
| "loss": 0.0278, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 8.17244611059044, | |
| "grad_norm": 0.8992042541503906, | |
| "learning_rate": 9.936153699717378e-06, | |
| "loss": 0.0267, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 8.181818181818182, | |
| "grad_norm": 0.598173975944519, | |
| "learning_rate": 9.936080481483109e-06, | |
| "loss": 0.0258, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 8.191190253045923, | |
| "grad_norm": 0.42205601930618286, | |
| "learning_rate": 9.93600726324884e-06, | |
| "loss": 0.0323, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 8.200562324273664, | |
| "grad_norm": 0.584039568901062, | |
| "learning_rate": 9.935934045014572e-06, | |
| "loss": 0.0224, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 8.200562324273664, | |
| "eval_loss": 0.037737876176834106, | |
| "eval_pearson_cosine": 0.7710561156272888, | |
| "eval_pearson_dot": 0.7243790626525879, | |
| "eval_pearson_euclidean": 0.7310018539428711, | |
| "eval_pearson_manhattan": 0.7328372001647949, | |
| "eval_runtime": 24.3532, | |
| "eval_samples_per_second": 61.593, | |
| "eval_spearman_cosine": 0.7703050511110383, | |
| "eval_spearman_dot": 0.725368343860831, | |
| "eval_spearman_euclidean": 0.7355669919591825, | |
| "eval_spearman_manhattan": 0.7369211933770833, | |
| "eval_steps_per_second": 7.72, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 8.209934395501406, | |
| "grad_norm": 0.8525517582893372, | |
| "learning_rate": 9.935860826780303e-06, | |
| "loss": 0.0268, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 8.219306466729147, | |
| "grad_norm": 0.7080439329147339, | |
| "learning_rate": 9.935787608546034e-06, | |
| "loss": 0.0237, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 8.228678537956888, | |
| "grad_norm": 0.7084332704544067, | |
| "learning_rate": 9.935714390311764e-06, | |
| "loss": 0.0232, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 8.23805060918463, | |
| "grad_norm": 1.2140733003616333, | |
| "learning_rate": 9.935641172077495e-06, | |
| "loss": 0.028, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 8.24742268041237, | |
| "grad_norm": 0.6614952087402344, | |
| "learning_rate": 9.935567953843226e-06, | |
| "loss": 0.025, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 8.256794751640113, | |
| "grad_norm": 0.642755925655365, | |
| "learning_rate": 9.935494735608957e-06, | |
| "loss": 0.0259, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 8.266166822867854, | |
| "grad_norm": 1.1676636934280396, | |
| "learning_rate": 9.935421517374687e-06, | |
| "loss": 0.0292, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 8.275538894095595, | |
| "grad_norm": 0.4561503529548645, | |
| "learning_rate": 9.935348299140418e-06, | |
| "loss": 0.026, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 8.284910965323336, | |
| "grad_norm": 0.5693290829658508, | |
| "learning_rate": 9.935275080906149e-06, | |
| "loss": 0.0283, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 8.294283036551079, | |
| "grad_norm": 1.2574779987335205, | |
| "learning_rate": 9.935201862671881e-06, | |
| "loss": 0.0275, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 8.30365510777882, | |
| "grad_norm": 0.9662300944328308, | |
| "learning_rate": 9.935128644437612e-06, | |
| "loss": 0.0257, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 8.31302717900656, | |
| "grad_norm": 0.5467878580093384, | |
| "learning_rate": 9.935055426203343e-06, | |
| "loss": 0.0264, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 8.322399250234302, | |
| "grad_norm": 1.0672435760498047, | |
| "learning_rate": 9.934982207969074e-06, | |
| "loss": 0.0334, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 8.331771321462043, | |
| "grad_norm": 1.155970573425293, | |
| "learning_rate": 9.934908989734804e-06, | |
| "loss": 0.029, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 8.341143392689784, | |
| "grad_norm": 0.9163686037063599, | |
| "learning_rate": 9.934835771500535e-06, | |
| "loss": 0.0295, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 8.350515463917526, | |
| "grad_norm": 0.6844992637634277, | |
| "learning_rate": 9.934762553266266e-06, | |
| "loss": 0.0228, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 8.359887535145267, | |
| "grad_norm": 0.6449628472328186, | |
| "learning_rate": 9.934689335031997e-06, | |
| "loss": 0.0272, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 8.369259606373008, | |
| "grad_norm": 1.0157432556152344, | |
| "learning_rate": 9.934616116797729e-06, | |
| "loss": 0.0251, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 8.37863167760075, | |
| "grad_norm": 0.9558159112930298, | |
| "learning_rate": 9.93454289856346e-06, | |
| "loss": 0.0262, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 8.388003748828492, | |
| "grad_norm": 1.2592884302139282, | |
| "learning_rate": 9.934469680329189e-06, | |
| "loss": 0.0317, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 8.397375820056233, | |
| "grad_norm": 0.8466887474060059, | |
| "learning_rate": 9.934396462094921e-06, | |
| "loss": 0.0333, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 8.406747891283974, | |
| "grad_norm": 0.8453270792961121, | |
| "learning_rate": 9.934323243860652e-06, | |
| "loss": 0.0276, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 8.416119962511715, | |
| "grad_norm": 0.6024593710899353, | |
| "learning_rate": 9.934250025626383e-06, | |
| "loss": 0.0269, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 8.425492033739456, | |
| "grad_norm": 0.8663728833198547, | |
| "learning_rate": 9.934176807392114e-06, | |
| "loss": 0.0289, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 8.434864104967197, | |
| "grad_norm": 0.8765361905097961, | |
| "learning_rate": 9.934103589157846e-06, | |
| "loss": 0.0256, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 8.434864104967197, | |
| "eval_loss": 0.038624610751867294, | |
| "eval_pearson_cosine": 0.7652055025100708, | |
| "eval_pearson_dot": 0.7185550928115845, | |
| "eval_pearson_euclidean": 0.7254422903060913, | |
| "eval_pearson_manhattan": 0.7273893356323242, | |
| "eval_runtime": 25.8439, | |
| "eval_samples_per_second": 58.041, | |
| "eval_spearman_cosine": 0.7646832614130892, | |
| "eval_spearman_dot": 0.7190565869110545, | |
| "eval_spearman_euclidean": 0.7303235144121284, | |
| "eval_spearman_manhattan": 0.7319318616566108, | |
| "eval_steps_per_second": 7.274, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 8.44423617619494, | |
| "grad_norm": 0.6332679986953735, | |
| "learning_rate": 9.934030370923575e-06, | |
| "loss": 0.027, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 8.45360824742268, | |
| "grad_norm": 0.6109747886657715, | |
| "learning_rate": 9.933957152689306e-06, | |
| "loss": 0.0242, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 8.462980318650422, | |
| "grad_norm": 1.127426266670227, | |
| "learning_rate": 9.933883934455038e-06, | |
| "loss": 0.035, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 8.472352389878163, | |
| "grad_norm": 0.7529722452163696, | |
| "learning_rate": 9.933810716220769e-06, | |
| "loss": 0.0303, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 8.481724461105905, | |
| "grad_norm": 0.6331318020820618, | |
| "learning_rate": 9.9337374979865e-06, | |
| "loss": 0.0301, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 8.491096532333646, | |
| "grad_norm": 0.9451204538345337, | |
| "learning_rate": 9.93366427975223e-06, | |
| "loss": 0.0301, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 8.500468603561387, | |
| "grad_norm": 1.0673385858535767, | |
| "learning_rate": 9.933591061517961e-06, | |
| "loss": 0.0311, | |
| "step": 9070 | |
| }, | |
| { | |
| "epoch": 8.509840674789128, | |
| "grad_norm": 0.5267199873924255, | |
| "learning_rate": 9.933517843283692e-06, | |
| "loss": 0.0258, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 8.51921274601687, | |
| "grad_norm": 1.0747129917144775, | |
| "learning_rate": 9.933444625049423e-06, | |
| "loss": 0.0308, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 8.52858481724461, | |
| "grad_norm": 0.5183865427970886, | |
| "learning_rate": 9.933371406815155e-06, | |
| "loss": 0.0272, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 8.537956888472353, | |
| "grad_norm": 0.8063677549362183, | |
| "learning_rate": 9.933298188580886e-06, | |
| "loss": 0.0256, | |
| "step": 9110 | |
| }, | |
| { | |
| "epoch": 8.547328959700094, | |
| "grad_norm": 0.7497850656509399, | |
| "learning_rate": 9.933224970346615e-06, | |
| "loss": 0.0272, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 8.556701030927835, | |
| "grad_norm": 1.4813112020492554, | |
| "learning_rate": 9.933151752112347e-06, | |
| "loss": 0.0271, | |
| "step": 9130 | |
| }, | |
| { | |
| "epoch": 8.566073102155576, | |
| "grad_norm": 0.9482595920562744, | |
| "learning_rate": 9.933078533878078e-06, | |
| "loss": 0.0256, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 8.575445173383319, | |
| "grad_norm": 0.5539655089378357, | |
| "learning_rate": 9.933005315643809e-06, | |
| "loss": 0.0274, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 8.58481724461106, | |
| "grad_norm": 0.7821139097213745, | |
| "learning_rate": 9.93293209740954e-06, | |
| "loss": 0.0284, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 8.5941893158388, | |
| "grad_norm": 0.9729026556015015, | |
| "learning_rate": 9.93285887917527e-06, | |
| "loss": 0.0256, | |
| "step": 9170 | |
| }, | |
| { | |
| "epoch": 8.603561387066541, | |
| "grad_norm": 1.1433371305465698, | |
| "learning_rate": 9.932785660941001e-06, | |
| "loss": 0.0274, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 8.612933458294282, | |
| "grad_norm": 1.211930751800537, | |
| "learning_rate": 9.932712442706732e-06, | |
| "loss": 0.0325, | |
| "step": 9190 | |
| }, | |
| { | |
| "epoch": 8.622305529522023, | |
| "grad_norm": 1.3734978437423706, | |
| "learning_rate": 9.932639224472463e-06, | |
| "loss": 0.0311, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 8.631677600749766, | |
| "grad_norm": 1.3476920127868652, | |
| "learning_rate": 9.932566006238195e-06, | |
| "loss": 0.0281, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 8.641049671977507, | |
| "grad_norm": 0.720197856426239, | |
| "learning_rate": 9.932492788003926e-06, | |
| "loss": 0.0233, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 8.650421743205248, | |
| "grad_norm": 1.2147605419158936, | |
| "learning_rate": 9.932419569769655e-06, | |
| "loss": 0.0308, | |
| "step": 9230 | |
| }, | |
| { | |
| "epoch": 8.65979381443299, | |
| "grad_norm": 0.5273356437683105, | |
| "learning_rate": 9.932346351535387e-06, | |
| "loss": 0.0278, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 8.669165885660732, | |
| "grad_norm": 1.316347360610962, | |
| "learning_rate": 9.932273133301118e-06, | |
| "loss": 0.0283, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 8.669165885660732, | |
| "eval_loss": 0.037036340683698654, | |
| "eval_pearson_cosine": 0.773975670337677, | |
| "eval_pearson_dot": 0.7285434007644653, | |
| "eval_pearson_euclidean": 0.7271639108657837, | |
| "eval_pearson_manhattan": 0.7293847799301147, | |
| "eval_runtime": 21.5505, | |
| "eval_samples_per_second": 69.604, | |
| "eval_spearman_cosine": 0.773229338598899, | |
| "eval_spearman_dot": 0.7297658810725091, | |
| "eval_spearman_euclidean": 0.7311555468063519, | |
| "eval_spearman_manhattan": 0.7331183382723726, | |
| "eval_steps_per_second": 8.724, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 8.678537956888473, | |
| "grad_norm": 0.6502562165260315, | |
| "learning_rate": 9.932199915066849e-06, | |
| "loss": 0.0255, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 8.687910028116214, | |
| "grad_norm": 1.172356128692627, | |
| "learning_rate": 9.93212669683258e-06, | |
| "loss": 0.0293, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 8.697282099343955, | |
| "grad_norm": 0.6329541206359863, | |
| "learning_rate": 9.932053478598312e-06, | |
| "loss": 0.0299, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 8.706654170571696, | |
| "grad_norm": 1.1246780157089233, | |
| "learning_rate": 9.931980260364041e-06, | |
| "loss": 0.0322, | |
| "step": 9290 | |
| }, | |
| { | |
| "epoch": 8.716026241799437, | |
| "grad_norm": 0.7996613383293152, | |
| "learning_rate": 9.931907042129772e-06, | |
| "loss": 0.0279, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 8.72539831302718, | |
| "grad_norm": 1.0772420167922974, | |
| "learning_rate": 9.931833823895504e-06, | |
| "loss": 0.0268, | |
| "step": 9310 | |
| }, | |
| { | |
| "epoch": 8.73477038425492, | |
| "grad_norm": 1.3459417819976807, | |
| "learning_rate": 9.931760605661235e-06, | |
| "loss": 0.0361, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 8.744142455482661, | |
| "grad_norm": 0.901692271232605, | |
| "learning_rate": 9.931687387426966e-06, | |
| "loss": 0.0317, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 8.753514526710402, | |
| "grad_norm": 1.1700392961502075, | |
| "learning_rate": 9.931614169192697e-06, | |
| "loss": 0.0257, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 8.762886597938145, | |
| "grad_norm": 1.1746001243591309, | |
| "learning_rate": 9.931540950958427e-06, | |
| "loss": 0.0261, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 8.772258669165886, | |
| "grad_norm": 1.250924825668335, | |
| "learning_rate": 9.931467732724158e-06, | |
| "loss": 0.0272, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 8.781630740393627, | |
| "grad_norm": 0.922290027141571, | |
| "learning_rate": 9.931394514489889e-06, | |
| "loss": 0.0295, | |
| "step": 9370 | |
| }, | |
| { | |
| "epoch": 8.791002811621368, | |
| "grad_norm": 0.6809844970703125, | |
| "learning_rate": 9.931321296255621e-06, | |
| "loss": 0.0269, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 8.800374882849109, | |
| "grad_norm": 0.8787119388580322, | |
| "learning_rate": 9.931248078021352e-06, | |
| "loss": 0.031, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 8.80974695407685, | |
| "grad_norm": 0.5186774134635925, | |
| "learning_rate": 9.931174859787083e-06, | |
| "loss": 0.0322, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 8.819119025304593, | |
| "grad_norm": 0.8100725412368774, | |
| "learning_rate": 9.931101641552814e-06, | |
| "loss": 0.026, | |
| "step": 9410 | |
| }, | |
| { | |
| "epoch": 8.828491096532334, | |
| "grad_norm": 0.7274125218391418, | |
| "learning_rate": 9.931028423318544e-06, | |
| "loss": 0.0291, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 8.837863167760075, | |
| "grad_norm": 1.1390098333358765, | |
| "learning_rate": 9.930955205084275e-06, | |
| "loss": 0.031, | |
| "step": 9430 | |
| }, | |
| { | |
| "epoch": 8.847235238987816, | |
| "grad_norm": 0.8184690475463867, | |
| "learning_rate": 9.930881986850006e-06, | |
| "loss": 0.0312, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 8.856607310215558, | |
| "grad_norm": 0.4963175356388092, | |
| "learning_rate": 9.930808768615737e-06, | |
| "loss": 0.0212, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 8.8659793814433, | |
| "grad_norm": 1.4110792875289917, | |
| "learning_rate": 9.930735550381467e-06, | |
| "loss": 0.0284, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 8.87535145267104, | |
| "grad_norm": 0.9356960654258728, | |
| "learning_rate": 9.930662332147198e-06, | |
| "loss": 0.0273, | |
| "step": 9470 | |
| }, | |
| { | |
| "epoch": 8.884723523898781, | |
| "grad_norm": 1.2740856409072876, | |
| "learning_rate": 9.930589113912929e-06, | |
| "loss": 0.0293, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 8.894095595126522, | |
| "grad_norm": 1.2273004055023193, | |
| "learning_rate": 9.930515895678661e-06, | |
| "loss": 0.0322, | |
| "step": 9490 | |
| }, | |
| { | |
| "epoch": 8.903467666354265, | |
| "grad_norm": 0.8036444187164307, | |
| "learning_rate": 9.930442677444392e-06, | |
| "loss": 0.0274, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 8.903467666354265, | |
| "eval_loss": 0.037216756492853165, | |
| "eval_pearson_cosine": 0.7742361426353455, | |
| "eval_pearson_dot": 0.7297594547271729, | |
| "eval_pearson_euclidean": 0.7265840172767639, | |
| "eval_pearson_manhattan": 0.7287671566009521, | |
| "eval_runtime": 27.1822, | |
| "eval_samples_per_second": 55.183, | |
| "eval_spearman_cosine": 0.773949198027488, | |
| "eval_spearman_dot": 0.7317025356234911, | |
| "eval_spearman_euclidean": 0.7328250947435205, | |
| "eval_spearman_manhattan": 0.7345883817446427, | |
| "eval_steps_per_second": 6.916, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 8.912839737582006, | |
| "grad_norm": 1.1801636219024658, | |
| "learning_rate": 9.930369459210123e-06, | |
| "loss": 0.0298, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 8.922211808809747, | |
| "grad_norm": 0.6167355179786682, | |
| "learning_rate": 9.930296240975853e-06, | |
| "loss": 0.0321, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 8.931583880037488, | |
| "grad_norm": 0.9813573956489563, | |
| "learning_rate": 9.930223022741584e-06, | |
| "loss": 0.0315, | |
| "step": 9530 | |
| }, | |
| { | |
| "epoch": 8.940955951265229, | |
| "grad_norm": 1.0033338069915771, | |
| "learning_rate": 9.930149804507315e-06, | |
| "loss": 0.0288, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 8.950328022492972, | |
| "grad_norm": 1.8989328145980835, | |
| "learning_rate": 9.930076586273046e-06, | |
| "loss": 0.0301, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 8.959700093720713, | |
| "grad_norm": 1.1895250082015991, | |
| "learning_rate": 9.930003368038778e-06, | |
| "loss": 0.0245, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 8.969072164948454, | |
| "grad_norm": 0.5209571719169617, | |
| "learning_rate": 9.929930149804509e-06, | |
| "loss": 0.0292, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 8.978444236176195, | |
| "grad_norm": 0.6561270952224731, | |
| "learning_rate": 9.929856931570238e-06, | |
| "loss": 0.0321, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 8.987816307403936, | |
| "grad_norm": 0.8421456217765808, | |
| "learning_rate": 9.92978371333597e-06, | |
| "loss": 0.0298, | |
| "step": 9590 | |
| }, | |
| { | |
| "epoch": 8.997188378631678, | |
| "grad_norm": 2.0356316566467285, | |
| "learning_rate": 9.929710495101701e-06, | |
| "loss": 0.0285, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 9.00656044985942, | |
| "grad_norm": 0.9041091799736023, | |
| "learning_rate": 9.929637276867432e-06, | |
| "loss": 0.0266, | |
| "step": 9610 | |
| }, | |
| { | |
| "epoch": 9.01593252108716, | |
| "grad_norm": 1.0879167318344116, | |
| "learning_rate": 9.929564058633163e-06, | |
| "loss": 0.0276, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 9.025304592314901, | |
| "grad_norm": 0.48896804451942444, | |
| "learning_rate": 9.929490840398893e-06, | |
| "loss": 0.0209, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 9.034676663542642, | |
| "grad_norm": 0.3795441687107086, | |
| "learning_rate": 9.929417622164624e-06, | |
| "loss": 0.0202, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 9.044048734770385, | |
| "grad_norm": 0.6517238020896912, | |
| "learning_rate": 9.929344403930355e-06, | |
| "loss": 0.0258, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 9.053420805998126, | |
| "grad_norm": 0.7814950942993164, | |
| "learning_rate": 9.929271185696087e-06, | |
| "loss": 0.0217, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 9.062792877225867, | |
| "grad_norm": 0.8012738823890686, | |
| "learning_rate": 9.929197967461818e-06, | |
| "loss": 0.0187, | |
| "step": 9670 | |
| }, | |
| { | |
| "epoch": 9.072164948453608, | |
| "grad_norm": 0.9685556292533875, | |
| "learning_rate": 9.929124749227549e-06, | |
| "loss": 0.0223, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 9.081537019681349, | |
| "grad_norm": 0.8415644764900208, | |
| "learning_rate": 9.92905153099328e-06, | |
| "loss": 0.0174, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 9.090909090909092, | |
| "grad_norm": 0.5449099540710449, | |
| "learning_rate": 9.92897831275901e-06, | |
| "loss": 0.025, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 9.100281162136833, | |
| "grad_norm": 0.7209439873695374, | |
| "learning_rate": 9.928905094524741e-06, | |
| "loss": 0.0221, | |
| "step": 9710 | |
| }, | |
| { | |
| "epoch": 9.109653233364574, | |
| "grad_norm": 0.5441991090774536, | |
| "learning_rate": 9.928831876290472e-06, | |
| "loss": 0.0217, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 9.119025304592315, | |
| "grad_norm": 0.7726917862892151, | |
| "learning_rate": 9.928758658056203e-06, | |
| "loss": 0.0264, | |
| "step": 9730 | |
| }, | |
| { | |
| "epoch": 9.128397375820056, | |
| "grad_norm": 1.4641560316085815, | |
| "learning_rate": 9.928685439821935e-06, | |
| "loss": 0.0215, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 9.137769447047798, | |
| "grad_norm": 0.7165714502334595, | |
| "learning_rate": 9.928612221587664e-06, | |
| "loss": 0.025, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 9.137769447047798, | |
| "eval_loss": 0.03766760975122452, | |
| "eval_pearson_cosine": 0.7719284296035767, | |
| "eval_pearson_dot": 0.7294802665710449, | |
| "eval_pearson_euclidean": 0.7313249111175537, | |
| "eval_pearson_manhattan": 0.7333976626396179, | |
| "eval_runtime": 27.8656, | |
| "eval_samples_per_second": 53.83, | |
| "eval_spearman_cosine": 0.7718354415047185, | |
| "eval_spearman_dot": 0.730941479257979, | |
| "eval_spearman_euclidean": 0.7371740495785648, | |
| "eval_spearman_manhattan": 0.7388595895844299, | |
| "eval_steps_per_second": 6.747, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 9.14714151827554, | |
| "grad_norm": 1.162800908088684, | |
| "learning_rate": 9.928539003353395e-06, | |
| "loss": 0.0299, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 9.15651358950328, | |
| "grad_norm": 0.826000452041626, | |
| "learning_rate": 9.928465785119127e-06, | |
| "loss": 0.0204, | |
| "step": 9770 | |
| }, | |
| { | |
| "epoch": 9.165885660731021, | |
| "grad_norm": 0.4205090403556824, | |
| "learning_rate": 9.928392566884858e-06, | |
| "loss": 0.0213, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 9.175257731958762, | |
| "grad_norm": 1.4229509830474854, | |
| "learning_rate": 9.928319348650589e-06, | |
| "loss": 0.0232, | |
| "step": 9790 | |
| }, | |
| { | |
| "epoch": 9.184629803186505, | |
| "grad_norm": 0.550862729549408, | |
| "learning_rate": 9.92824613041632e-06, | |
| "loss": 0.0247, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 9.194001874414246, | |
| "grad_norm": 0.6965065598487854, | |
| "learning_rate": 9.92817291218205e-06, | |
| "loss": 0.025, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 9.203373945641987, | |
| "grad_norm": 0.43077608942985535, | |
| "learning_rate": 9.928099693947781e-06, | |
| "loss": 0.0251, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 9.212746016869728, | |
| "grad_norm": 0.450005441904068, | |
| "learning_rate": 9.928026475713512e-06, | |
| "loss": 0.0212, | |
| "step": 9830 | |
| }, | |
| { | |
| "epoch": 9.222118088097469, | |
| "grad_norm": 1.184260368347168, | |
| "learning_rate": 9.927953257479244e-06, | |
| "loss": 0.0222, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 9.231490159325212, | |
| "grad_norm": 0.5146024823188782, | |
| "learning_rate": 9.927880039244975e-06, | |
| "loss": 0.0237, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 9.240862230552953, | |
| "grad_norm": 0.638936460018158, | |
| "learning_rate": 9.927806821010704e-06, | |
| "loss": 0.0313, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 9.250234301780694, | |
| "grad_norm": 0.5175133943557739, | |
| "learning_rate": 9.927733602776437e-06, | |
| "loss": 0.0267, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 9.259606373008435, | |
| "grad_norm": 0.46744242310523987, | |
| "learning_rate": 9.927660384542167e-06, | |
| "loss": 0.0221, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 9.268978444236176, | |
| "grad_norm": 1.0883630514144897, | |
| "learning_rate": 9.927587166307898e-06, | |
| "loss": 0.0209, | |
| "step": 9890 | |
| }, | |
| { | |
| "epoch": 9.278350515463918, | |
| "grad_norm": 0.8785117864608765, | |
| "learning_rate": 9.927513948073629e-06, | |
| "loss": 0.0243, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 9.28772258669166, | |
| "grad_norm": 1.33463716506958, | |
| "learning_rate": 9.927440729839361e-06, | |
| "loss": 0.0231, | |
| "step": 9910 | |
| }, | |
| { | |
| "epoch": 9.2970946579194, | |
| "grad_norm": 0.6693497896194458, | |
| "learning_rate": 9.92736751160509e-06, | |
| "loss": 0.0205, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 9.306466729147141, | |
| "grad_norm": 0.44432297348976135, | |
| "learning_rate": 9.927294293370821e-06, | |
| "loss": 0.0255, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 9.315838800374882, | |
| "grad_norm": 0.9900962710380554, | |
| "learning_rate": 9.927221075136553e-06, | |
| "loss": 0.0262, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 9.325210871602625, | |
| "grad_norm": 0.8196175694465637, | |
| "learning_rate": 9.927147856902284e-06, | |
| "loss": 0.0262, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 9.334582942830366, | |
| "grad_norm": 1.0177077054977417, | |
| "learning_rate": 9.927074638668015e-06, | |
| "loss": 0.0267, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 9.343955014058107, | |
| "grad_norm": 1.218307375907898, | |
| "learning_rate": 9.927001420433746e-06, | |
| "loss": 0.0248, | |
| "step": 9970 | |
| }, | |
| { | |
| "epoch": 9.353327085285848, | |
| "grad_norm": 0.9856002926826477, | |
| "learning_rate": 9.926928202199476e-06, | |
| "loss": 0.0233, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 9.362699156513589, | |
| "grad_norm": 0.6501719355583191, | |
| "learning_rate": 9.926854983965207e-06, | |
| "loss": 0.0271, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 9.372071227741332, | |
| "grad_norm": 0.5562245845794678, | |
| "learning_rate": 9.926781765730938e-06, | |
| "loss": 0.031, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 9.372071227741332, | |
| "eval_loss": 0.03722027316689491, | |
| "eval_pearson_cosine": 0.7733820676803589, | |
| "eval_pearson_dot": 0.725334644317627, | |
| "eval_pearson_euclidean": 0.7356694936752319, | |
| "eval_pearson_manhattan": 0.7372510433197021, | |
| "eval_runtime": 25.8635, | |
| "eval_samples_per_second": 57.997, | |
| "eval_spearman_cosine": 0.7735257400299028, | |
| "eval_spearman_dot": 0.726586040502744, | |
| "eval_spearman_euclidean": 0.7407176416099474, | |
| "eval_spearman_manhattan": 0.7421316928799319, | |
| "eval_steps_per_second": 7.269, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 9.381443298969073, | |
| "grad_norm": 0.4994644522666931, | |
| "learning_rate": 9.926708547496669e-06, | |
| "loss": 0.0226, | |
| "step": 10010 | |
| }, | |
| { | |
| "epoch": 9.390815370196814, | |
| "grad_norm": 1.5270389318466187, | |
| "learning_rate": 9.926635329262401e-06, | |
| "loss": 0.0211, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 9.400187441424555, | |
| "grad_norm": 0.47197312116622925, | |
| "learning_rate": 9.92656211102813e-06, | |
| "loss": 0.0235, | |
| "step": 10030 | |
| }, | |
| { | |
| "epoch": 9.409559512652296, | |
| "grad_norm": 1.132454752922058, | |
| "learning_rate": 9.926488892793863e-06, | |
| "loss": 0.023, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 9.418931583880038, | |
| "grad_norm": 0.7693812251091003, | |
| "learning_rate": 9.926415674559593e-06, | |
| "loss": 0.0247, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 9.42830365510778, | |
| "grad_norm": 0.42411306500434875, | |
| "learning_rate": 9.926342456325324e-06, | |
| "loss": 0.0234, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 9.43767572633552, | |
| "grad_norm": 0.9110538959503174, | |
| "learning_rate": 9.926269238091055e-06, | |
| "loss": 0.0256, | |
| "step": 10070 | |
| }, | |
| { | |
| "epoch": 9.447047797563261, | |
| "grad_norm": 0.6932746171951294, | |
| "learning_rate": 9.926196019856786e-06, | |
| "loss": 0.0288, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 9.456419868791002, | |
| "grad_norm": 0.6196317076683044, | |
| "learning_rate": 9.926122801622516e-06, | |
| "loss": 0.0239, | |
| "step": 10090 | |
| }, | |
| { | |
| "epoch": 9.465791940018745, | |
| "grad_norm": 0.6985231637954712, | |
| "learning_rate": 9.926049583388247e-06, | |
| "loss": 0.0194, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 9.475164011246486, | |
| "grad_norm": 0.8828220963478088, | |
| "learning_rate": 9.925976365153978e-06, | |
| "loss": 0.0282, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 9.484536082474227, | |
| "grad_norm": 0.3887142241001129, | |
| "learning_rate": 9.92590314691971e-06, | |
| "loss": 0.0231, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 9.493908153701968, | |
| "grad_norm": 0.696250855922699, | |
| "learning_rate": 9.925829928685441e-06, | |
| "loss": 0.0241, | |
| "step": 10130 | |
| }, | |
| { | |
| "epoch": 9.503280224929709, | |
| "grad_norm": 0.9591291546821594, | |
| "learning_rate": 9.925756710451172e-06, | |
| "loss": 0.0237, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 9.512652296157452, | |
| "grad_norm": 0.6247865557670593, | |
| "learning_rate": 9.925683492216903e-06, | |
| "loss": 0.0225, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 9.522024367385193, | |
| "grad_norm": 0.8061539530754089, | |
| "learning_rate": 9.925610273982633e-06, | |
| "loss": 0.0248, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 9.531396438612934, | |
| "grad_norm": 0.5681460499763489, | |
| "learning_rate": 9.925537055748364e-06, | |
| "loss": 0.0216, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 9.540768509840674, | |
| "grad_norm": 0.7798430323600769, | |
| "learning_rate": 9.925463837514095e-06, | |
| "loss": 0.0205, | |
| "step": 10180 | |
| }, | |
| { | |
| "epoch": 9.550140581068415, | |
| "grad_norm": 0.633307695388794, | |
| "learning_rate": 9.925390619279827e-06, | |
| "loss": 0.0257, | |
| "step": 10190 | |
| }, | |
| { | |
| "epoch": 9.559512652296158, | |
| "grad_norm": 0.5352799892425537, | |
| "learning_rate": 9.925317401045558e-06, | |
| "loss": 0.0214, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 9.5688847235239, | |
| "grad_norm": 1.4367021322250366, | |
| "learning_rate": 9.925244182811287e-06, | |
| "loss": 0.0245, | |
| "step": 10210 | |
| }, | |
| { | |
| "epoch": 9.57825679475164, | |
| "grad_norm": 0.6616729497909546, | |
| "learning_rate": 9.92517096457702e-06, | |
| "loss": 0.0168, | |
| "step": 10220 | |
| }, | |
| { | |
| "epoch": 9.587628865979381, | |
| "grad_norm": 0.5232043862342834, | |
| "learning_rate": 9.92509774634275e-06, | |
| "loss": 0.0229, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 9.597000937207122, | |
| "grad_norm": 0.5471720099449158, | |
| "learning_rate": 9.925024528108481e-06, | |
| "loss": 0.0244, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 9.606373008434865, | |
| "grad_norm": 0.8130425214767456, | |
| "learning_rate": 9.924951309874212e-06, | |
| "loss": 0.0243, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 9.606373008434865, | |
| "eval_loss": 0.037354420870542526, | |
| "eval_pearson_cosine": 0.7731273770332336, | |
| "eval_pearson_dot": 0.7302557826042175, | |
| "eval_pearson_euclidean": 0.7300422191619873, | |
| "eval_pearson_manhattan": 0.7321226596832275, | |
| "eval_runtime": 25.5048, | |
| "eval_samples_per_second": 58.813, | |
| "eval_spearman_cosine": 0.7727287355752905, | |
| "eval_spearman_dot": 0.7305929253470385, | |
| "eval_spearman_euclidean": 0.7346168467659768, | |
| "eval_spearman_manhattan": 0.7364009847987945, | |
| "eval_steps_per_second": 7.371, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 9.615745079662606, | |
| "grad_norm": 0.497060626745224, | |
| "learning_rate": 9.924878091639943e-06, | |
| "loss": 0.0217, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 9.625117150890347, | |
| "grad_norm": 0.985636830329895, | |
| "learning_rate": 9.924804873405673e-06, | |
| "loss": 0.0238, | |
| "step": 10270 | |
| }, | |
| { | |
| "epoch": 9.634489222118088, | |
| "grad_norm": 0.8833957314491272, | |
| "learning_rate": 9.924731655171404e-06, | |
| "loss": 0.0215, | |
| "step": 10280 | |
| }, | |
| { | |
| "epoch": 9.643861293345829, | |
| "grad_norm": 0.7223436832427979, | |
| "learning_rate": 9.924658436937137e-06, | |
| "loss": 0.0257, | |
| "step": 10290 | |
| }, | |
| { | |
| "epoch": 9.653233364573572, | |
| "grad_norm": 1.0917994976043701, | |
| "learning_rate": 9.924585218702867e-06, | |
| "loss": 0.0272, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 9.662605435801312, | |
| "grad_norm": 0.79998779296875, | |
| "learning_rate": 9.924512000468598e-06, | |
| "loss": 0.0232, | |
| "step": 10310 | |
| }, | |
| { | |
| "epoch": 9.671977507029053, | |
| "grad_norm": 0.9708638191223145, | |
| "learning_rate": 9.924438782234329e-06, | |
| "loss": 0.0214, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 9.681349578256794, | |
| "grad_norm": 0.5575175881385803, | |
| "learning_rate": 9.92436556400006e-06, | |
| "loss": 0.0256, | |
| "step": 10330 | |
| }, | |
| { | |
| "epoch": 9.690721649484535, | |
| "grad_norm": 1.2645318508148193, | |
| "learning_rate": 9.92429234576579e-06, | |
| "loss": 0.0276, | |
| "step": 10340 | |
| }, | |
| { | |
| "epoch": 9.700093720712278, | |
| "grad_norm": 0.6546396017074585, | |
| "learning_rate": 9.924219127531521e-06, | |
| "loss": 0.024, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 9.70946579194002, | |
| "grad_norm": 0.8439049124717712, | |
| "learning_rate": 9.924145909297252e-06, | |
| "loss": 0.0259, | |
| "step": 10360 | |
| }, | |
| { | |
| "epoch": 9.71883786316776, | |
| "grad_norm": 0.9637166261672974, | |
| "learning_rate": 9.924072691062984e-06, | |
| "loss": 0.0225, | |
| "step": 10370 | |
| }, | |
| { | |
| "epoch": 9.728209934395501, | |
| "grad_norm": 0.6104253530502319, | |
| "learning_rate": 9.923999472828713e-06, | |
| "loss": 0.0254, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 9.737582005623242, | |
| "grad_norm": 0.5664217472076416, | |
| "learning_rate": 9.923926254594444e-06, | |
| "loss": 0.0192, | |
| "step": 10390 | |
| }, | |
| { | |
| "epoch": 9.746954076850985, | |
| "grad_norm": 0.6904122233390808, | |
| "learning_rate": 9.923853036360176e-06, | |
| "loss": 0.0213, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 9.756326148078726, | |
| "grad_norm": 1.0864416360855103, | |
| "learning_rate": 9.923779818125907e-06, | |
| "loss": 0.0254, | |
| "step": 10410 | |
| }, | |
| { | |
| "epoch": 9.765698219306467, | |
| "grad_norm": 0.791348397731781, | |
| "learning_rate": 9.923706599891638e-06, | |
| "loss": 0.0264, | |
| "step": 10420 | |
| }, | |
| { | |
| "epoch": 9.775070290534208, | |
| "grad_norm": 0.7972745895385742, | |
| "learning_rate": 9.923633381657369e-06, | |
| "loss": 0.0206, | |
| "step": 10430 | |
| }, | |
| { | |
| "epoch": 9.784442361761949, | |
| "grad_norm": 0.6930385231971741, | |
| "learning_rate": 9.9235601634231e-06, | |
| "loss": 0.0283, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 9.793814432989691, | |
| "grad_norm": 0.5096721053123474, | |
| "learning_rate": 9.92348694518883e-06, | |
| "loss": 0.0263, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 9.803186504217432, | |
| "grad_norm": 0.7492228150367737, | |
| "learning_rate": 9.923413726954561e-06, | |
| "loss": 0.0237, | |
| "step": 10460 | |
| }, | |
| { | |
| "epoch": 9.812558575445173, | |
| "grad_norm": 0.8097043037414551, | |
| "learning_rate": 9.923340508720293e-06, | |
| "loss": 0.0225, | |
| "step": 10470 | |
| }, | |
| { | |
| "epoch": 9.821930646672914, | |
| "grad_norm": 0.45464569330215454, | |
| "learning_rate": 9.923267290486024e-06, | |
| "loss": 0.0175, | |
| "step": 10480 | |
| }, | |
| { | |
| "epoch": 9.831302717900655, | |
| "grad_norm": 0.6172147393226624, | |
| "learning_rate": 9.923194072251753e-06, | |
| "loss": 0.0272, | |
| "step": 10490 | |
| }, | |
| { | |
| "epoch": 9.840674789128398, | |
| "grad_norm": 0.9826374650001526, | |
| "learning_rate": 9.923120854017486e-06, | |
| "loss": 0.0233, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 9.840674789128398, | |
| "eval_loss": 0.03700366988778114, | |
| "eval_pearson_cosine": 0.7760223746299744, | |
| "eval_pearson_dot": 0.7342942953109741, | |
| "eval_pearson_euclidean": 0.7316151857376099, | |
| "eval_pearson_manhattan": 0.7336723804473877, | |
| "eval_runtime": 22.135, | |
| "eval_samples_per_second": 67.766, | |
| "eval_spearman_cosine": 0.7753394120917871, | |
| "eval_spearman_dot": 0.7356003834746606, | |
| "eval_spearman_euclidean": 0.7371167930939387, | |
| "eval_spearman_manhattan": 0.7388623589601665, | |
| "eval_steps_per_second": 8.493, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 9.850046860356139, | |
| "grad_norm": 0.5944278240203857, | |
| "learning_rate": 9.923047635783216e-06, | |
| "loss": 0.0245, | |
| "step": 10510 | |
| }, | |
| { | |
| "epoch": 9.85941893158388, | |
| "grad_norm": 0.4207167625427246, | |
| "learning_rate": 9.922974417548947e-06, | |
| "loss": 0.0236, | |
| "step": 10520 | |
| }, | |
| { | |
| "epoch": 9.868791002811621, | |
| "grad_norm": 1.185616374015808, | |
| "learning_rate": 9.922901199314678e-06, | |
| "loss": 0.025, | |
| "step": 10530 | |
| }, | |
| { | |
| "epoch": 9.878163074039362, | |
| "grad_norm": 0.6041834354400635, | |
| "learning_rate": 9.92282798108041e-06, | |
| "loss": 0.0229, | |
| "step": 10540 | |
| }, | |
| { | |
| "epoch": 9.887535145267105, | |
| "grad_norm": 1.3135936260223389, | |
| "learning_rate": 9.92275476284614e-06, | |
| "loss": 0.022, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 9.896907216494846, | |
| "grad_norm": 0.7592184543609619, | |
| "learning_rate": 9.92268154461187e-06, | |
| "loss": 0.0251, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 9.906279287722587, | |
| "grad_norm": 0.5679847002029419, | |
| "learning_rate": 9.922608326377603e-06, | |
| "loss": 0.0218, | |
| "step": 10570 | |
| }, | |
| { | |
| "epoch": 9.915651358950328, | |
| "grad_norm": 1.1727142333984375, | |
| "learning_rate": 9.922535108143333e-06, | |
| "loss": 0.0266, | |
| "step": 10580 | |
| }, | |
| { | |
| "epoch": 9.925023430178069, | |
| "grad_norm": 1.2769267559051514, | |
| "learning_rate": 9.922461889909064e-06, | |
| "loss": 0.0237, | |
| "step": 10590 | |
| }, | |
| { | |
| "epoch": 9.934395501405811, | |
| "grad_norm": 0.6604001522064209, | |
| "learning_rate": 9.922388671674795e-06, | |
| "loss": 0.0206, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 9.943767572633552, | |
| "grad_norm": 0.8065370321273804, | |
| "learning_rate": 9.922315453440526e-06, | |
| "loss": 0.0272, | |
| "step": 10610 | |
| }, | |
| { | |
| "epoch": 9.953139643861293, | |
| "grad_norm": 1.0085433721542358, | |
| "learning_rate": 9.922242235206256e-06, | |
| "loss": 0.019, | |
| "step": 10620 | |
| }, | |
| { | |
| "epoch": 9.962511715089034, | |
| "grad_norm": 0.9662045240402222, | |
| "learning_rate": 9.922169016971987e-06, | |
| "loss": 0.0218, | |
| "step": 10630 | |
| }, | |
| { | |
| "epoch": 9.971883786316775, | |
| "grad_norm": 0.49303632974624634, | |
| "learning_rate": 9.922095798737718e-06, | |
| "loss": 0.0223, | |
| "step": 10640 | |
| }, | |
| { | |
| "epoch": 9.981255857544518, | |
| "grad_norm": 0.7215604186058044, | |
| "learning_rate": 9.92202258050345e-06, | |
| "loss": 0.0259, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 9.990627928772259, | |
| "grad_norm": 0.6104753017425537, | |
| "learning_rate": 9.92194936226918e-06, | |
| "loss": 0.0232, | |
| "step": 10660 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 1.011549949645996, | |
| "learning_rate": 9.92187614403491e-06, | |
| "loss": 0.0234, | |
| "step": 10670 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 10670, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |