| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.751640112464855, | |
| "eval_steps": 250, | |
| "global_step": 2000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01874414245548266, | |
| "grad_norm": 1.0379021167755127, | |
| "learning_rate": 0.0007999941425412586, | |
| "loss": 0.6752, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03748828491096532, | |
| "grad_norm": 0.779513955116272, | |
| "learning_rate": 0.000799988285082517, | |
| "loss": 0.3775, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.056232427366447985, | |
| "grad_norm": 0.7819342017173767, | |
| "learning_rate": 0.0007999824276237754, | |
| "loss": 0.3341, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07497656982193064, | |
| "grad_norm": 0.8172865509986877, | |
| "learning_rate": 0.0007999765701650339, | |
| "loss": 0.3026, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.09372071227741331, | |
| "grad_norm": 0.6789480447769165, | |
| "learning_rate": 0.0007999707127062924, | |
| "loss": 0.3091, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11246485473289597, | |
| "grad_norm": 0.6779003143310547, | |
| "learning_rate": 0.0007999648552475509, | |
| "loss": 0.2934, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.13120899718837864, | |
| "grad_norm": 0.5085116028785706, | |
| "learning_rate": 0.0007999589977888094, | |
| "loss": 0.2856, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.14995313964386128, | |
| "grad_norm": 0.5592414140701294, | |
| "learning_rate": 0.0007999531403300678, | |
| "loss": 0.2736, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.16869728209934395, | |
| "grad_norm": 0.5976341366767883, | |
| "learning_rate": 0.0007999472828713263, | |
| "loss": 0.2938, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.18744142455482662, | |
| "grad_norm": 0.5462539196014404, | |
| "learning_rate": 0.0007999414254125848, | |
| "loss": 0.2597, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.20618556701030927, | |
| "grad_norm": 0.5744641423225403, | |
| "learning_rate": 0.0007999355679538432, | |
| "loss": 0.2784, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.22492970946579194, | |
| "grad_norm": 0.6287326812744141, | |
| "learning_rate": 0.0007999297104951018, | |
| "loss": 0.2616, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2436738519212746, | |
| "grad_norm": 0.5516992211341858, | |
| "learning_rate": 0.0007999238530363602, | |
| "loss": 0.2668, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.2624179943767573, | |
| "grad_norm": 0.5527953505516052, | |
| "learning_rate": 0.0007999179955776188, | |
| "loss": 0.2623, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.28116213683223995, | |
| "grad_norm": 0.5433118343353271, | |
| "learning_rate": 0.0007999121381188772, | |
| "loss": 0.2692, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.29990627928772257, | |
| "grad_norm": 0.5417677760124207, | |
| "learning_rate": 0.0007999062806601356, | |
| "loss": 0.2435, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.31865042174320524, | |
| "grad_norm": 0.523895800113678, | |
| "learning_rate": 0.0007999004232013942, | |
| "loss": 0.2459, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3373945641986879, | |
| "grad_norm": 0.7117316126823425, | |
| "learning_rate": 0.0007998945657426526, | |
| "loss": 0.2638, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3561387066541706, | |
| "grad_norm": 0.777367353439331, | |
| "learning_rate": 0.0007998887082839111, | |
| "loss": 0.2423, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.37488284910965325, | |
| "grad_norm": 0.6237531304359436, | |
| "learning_rate": 0.0007998828508251696, | |
| "loss": 0.2446, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3936269915651359, | |
| "grad_norm": 0.47088104486465454, | |
| "learning_rate": 0.000799876993366428, | |
| "loss": 0.2556, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.41237113402061853, | |
| "grad_norm": 0.48523184657096863, | |
| "learning_rate": 0.0007998711359076864, | |
| "loss": 0.2361, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4311152764761012, | |
| "grad_norm": 0.5119248628616333, | |
| "learning_rate": 0.000799865278448945, | |
| "loss": 0.2401, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.4498594189315839, | |
| "grad_norm": 0.4833837151527405, | |
| "learning_rate": 0.0007998594209902034, | |
| "loss": 0.2412, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.46860356138706655, | |
| "grad_norm": 0.5014116764068604, | |
| "learning_rate": 0.000799853563531462, | |
| "loss": 0.2475, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.46860356138706655, | |
| "eval_loss": 0.11557532846927643, | |
| "eval_pearson_cosine": 0.7298070192337036, | |
| "eval_pearson_dot": 0.7093910574913025, | |
| "eval_pearson_euclidean": 0.7365932464599609, | |
| "eval_pearson_manhattan": 0.7382453680038452, | |
| "eval_runtime": 27.2701, | |
| "eval_samples_per_second": 55.005, | |
| "eval_spearman_cosine": 0.7577597198343433, | |
| "eval_spearman_dot": 0.7151991550847255, | |
| "eval_spearman_euclidean": 0.7434473510612767, | |
| "eval_spearman_manhattan": 0.747354805702794, | |
| "eval_steps_per_second": 6.894, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.4873477038425492, | |
| "grad_norm": 0.4523856043815613, | |
| "learning_rate": 0.0007998477060727204, | |
| "loss": 0.2261, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5060918462980318, | |
| "grad_norm": 0.5232961177825928, | |
| "learning_rate": 0.0007998418486139788, | |
| "loss": 0.2473, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5248359887535146, | |
| "grad_norm": 0.5113686323165894, | |
| "learning_rate": 0.0007998359911552374, | |
| "loss": 0.2354, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5435801312089972, | |
| "grad_norm": 0.458387166261673, | |
| "learning_rate": 0.0007998301336964958, | |
| "loss": 0.2568, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5623242736644799, | |
| "grad_norm": 0.45359304547309875, | |
| "learning_rate": 0.0007998242762377543, | |
| "loss": 0.2415, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5810684161199625, | |
| "grad_norm": 0.46073561906814575, | |
| "learning_rate": 0.0007998184187790128, | |
| "loss": 0.2291, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.5998125585754451, | |
| "grad_norm": 0.4503585696220398, | |
| "learning_rate": 0.0007998125613202712, | |
| "loss": 0.2368, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6185567010309279, | |
| "grad_norm": 0.4221174716949463, | |
| "learning_rate": 0.0007998067038615297, | |
| "loss": 0.2322, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.6373008434864105, | |
| "grad_norm": 0.42522430419921875, | |
| "learning_rate": 0.0007998008464027882, | |
| "loss": 0.241, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6560449859418932, | |
| "grad_norm": 0.47986653447151184, | |
| "learning_rate": 0.0007997949889440467, | |
| "loss": 0.2252, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6747891283973758, | |
| "grad_norm": 0.6221345067024231, | |
| "learning_rate": 0.0007997891314853051, | |
| "loss": 0.2418, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.6935332708528584, | |
| "grad_norm": 0.4737911820411682, | |
| "learning_rate": 0.0007997832740265637, | |
| "loss": 0.2232, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.7122774133083412, | |
| "grad_norm": 0.47973355650901794, | |
| "learning_rate": 0.000799777416567822, | |
| "loss": 0.227, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.7310215557638238, | |
| "grad_norm": 0.4451119005680084, | |
| "learning_rate": 0.0007997715591090806, | |
| "loss": 0.2206, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.7497656982193065, | |
| "grad_norm": 0.4816949963569641, | |
| "learning_rate": 0.0007997657016503391, | |
| "loss": 0.221, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7685098406747891, | |
| "grad_norm": 0.44739213585853577, | |
| "learning_rate": 0.0007997598441915975, | |
| "loss": 0.226, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.7872539831302718, | |
| "grad_norm": 0.4036339521408081, | |
| "learning_rate": 0.0007997539867328561, | |
| "loss": 0.2359, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.8059981255857545, | |
| "grad_norm": 0.4639866054058075, | |
| "learning_rate": 0.0007997481292741144, | |
| "loss": 0.2251, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.8247422680412371, | |
| "grad_norm": 0.4569236636161804, | |
| "learning_rate": 0.0007997422718153729, | |
| "loss": 0.2337, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.8434864104967198, | |
| "grad_norm": 0.37712669372558594, | |
| "learning_rate": 0.0007997364143566314, | |
| "loss": 0.2009, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8622305529522024, | |
| "grad_norm": 0.3660425543785095, | |
| "learning_rate": 0.0007997305568978899, | |
| "loss": 0.2217, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.8809746954076851, | |
| "grad_norm": 0.37786588072776794, | |
| "learning_rate": 0.0007997246994391483, | |
| "loss": 0.2256, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.8997188378631678, | |
| "grad_norm": 0.34985071420669556, | |
| "learning_rate": 0.0007997188419804069, | |
| "loss": 0.2137, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.9184629803186504, | |
| "grad_norm": 0.5390796661376953, | |
| "learning_rate": 0.0007997129845216653, | |
| "loss": 0.2164, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.9372071227741331, | |
| "grad_norm": 0.45559704303741455, | |
| "learning_rate": 0.0007997071270629237, | |
| "loss": 0.2267, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9372071227741331, | |
| "eval_loss": 0.120799720287323, | |
| "eval_pearson_cosine": 0.739181399345398, | |
| "eval_pearson_dot": 0.7062755823135376, | |
| "eval_pearson_euclidean": 0.7443870306015015, | |
| "eval_pearson_manhattan": 0.7475869655609131, | |
| "eval_runtime": 27.6267, | |
| "eval_samples_per_second": 54.295, | |
| "eval_spearman_cosine": 0.7615568395698803, | |
| "eval_spearman_dot": 0.7105088137905408, | |
| "eval_spearman_euclidean": 0.7483334786946333, | |
| "eval_spearman_manhattan": 0.753415014125672, | |
| "eval_steps_per_second": 6.805, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9559512652296157, | |
| "grad_norm": 0.4264763593673706, | |
| "learning_rate": 0.0007997012696041823, | |
| "loss": 0.1925, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.9746954076850984, | |
| "grad_norm": 0.3580343425273895, | |
| "learning_rate": 0.0007996954121454407, | |
| "loss": 0.2208, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.993439550140581, | |
| "grad_norm": 0.3899773359298706, | |
| "learning_rate": 0.0007996895546866993, | |
| "loss": 0.2219, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.013120899718838, | |
| "grad_norm": 0.34510987997055054, | |
| "learning_rate": 0.0007996836972279577, | |
| "loss": 0.1681, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.0318650421743205, | |
| "grad_norm": 0.52265465259552, | |
| "learning_rate": 0.0007996778397692161, | |
| "loss": 0.1328, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.0506091846298031, | |
| "grad_norm": 0.39699018001556396, | |
| "learning_rate": 0.0007996719823104747, | |
| "loss": 0.146, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.069353327085286, | |
| "grad_norm": 0.3806276023387909, | |
| "learning_rate": 0.0007996661248517331, | |
| "loss": 0.1394, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.0880974695407686, | |
| "grad_norm": 0.38312238454818726, | |
| "learning_rate": 0.0007996602673929916, | |
| "loss": 0.1296, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.1068416119962512, | |
| "grad_norm": 0.40978288650512695, | |
| "learning_rate": 0.0007996544099342501, | |
| "loss": 0.1328, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.1255857544517338, | |
| "grad_norm": 0.36600926518440247, | |
| "learning_rate": 0.0007996485524755086, | |
| "loss": 0.1403, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.1443298969072164, | |
| "grad_norm": 0.44099515676498413, | |
| "learning_rate": 0.0007996426950167669, | |
| "loss": 0.133, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.1630740393626993, | |
| "grad_norm": 0.3496938645839691, | |
| "learning_rate": 0.0007996368375580255, | |
| "loss": 0.1334, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.1818181818181819, | |
| "grad_norm": 0.3754808306694031, | |
| "learning_rate": 0.0007996309800992839, | |
| "loss": 0.1417, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.2005623242736645, | |
| "grad_norm": 0.45285704731941223, | |
| "learning_rate": 0.0007996251226405425, | |
| "loss": 0.1525, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.219306466729147, | |
| "grad_norm": 0.4499760866165161, | |
| "learning_rate": 0.000799619265181801, | |
| "loss": 0.1432, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.2380506091846297, | |
| "grad_norm": 0.3310897648334503, | |
| "learning_rate": 0.0007996134077230593, | |
| "loss": 0.1392, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.2567947516401126, | |
| "grad_norm": 0.32931098341941833, | |
| "learning_rate": 0.0007996075502643179, | |
| "loss": 0.1484, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.2755388940955952, | |
| "grad_norm": 0.32448434829711914, | |
| "learning_rate": 0.0007996016928055763, | |
| "loss": 0.1464, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.2942830365510778, | |
| "grad_norm": 0.38759011030197144, | |
| "learning_rate": 0.0007995958353468348, | |
| "loss": 0.1412, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.3130271790065604, | |
| "grad_norm": 0.29216083884239197, | |
| "learning_rate": 0.0007995899778880933, | |
| "loss": 0.1381, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.331771321462043, | |
| "grad_norm": 0.36174365878105164, | |
| "learning_rate": 0.0007995841204293518, | |
| "loss": 0.1429, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.3505154639175259, | |
| "grad_norm": 0.3533223569393158, | |
| "learning_rate": 0.0007995782629706102, | |
| "loss": 0.1349, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.3692596063730085, | |
| "grad_norm": 0.34500110149383545, | |
| "learning_rate": 0.0007995724055118687, | |
| "loss": 0.1445, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.388003748828491, | |
| "grad_norm": 0.3347356617450714, | |
| "learning_rate": 0.0007995665480531272, | |
| "loss": 0.1508, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.4067478912839737, | |
| "grad_norm": 0.5160906910896301, | |
| "learning_rate": 0.0007995606905943857, | |
| "loss": 0.156, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.4067478912839737, | |
| "eval_loss": 0.07170082628726959, | |
| "eval_pearson_cosine": 0.7533469200134277, | |
| "eval_pearson_dot": 0.7396403551101685, | |
| "eval_pearson_euclidean": 0.7499958276748657, | |
| "eval_pearson_manhattan": 0.7558424472808838, | |
| "eval_runtime": 27.4077, | |
| "eval_samples_per_second": 54.729, | |
| "eval_spearman_cosine": 0.7671397694369135, | |
| "eval_spearman_dot": 0.7444267819763823, | |
| "eval_spearman_euclidean": 0.753108908424924, | |
| "eval_spearman_manhattan": 0.7608183984789815, | |
| "eval_steps_per_second": 6.859, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.4254920337394563, | |
| "grad_norm": 0.3751141428947449, | |
| "learning_rate": 0.0007995548331356442, | |
| "loss": 0.1606, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.4442361761949392, | |
| "grad_norm": 0.3998653292655945, | |
| "learning_rate": 0.0007995489756769026, | |
| "loss": 0.1447, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.4629803186504218, | |
| "grad_norm": 0.32710304856300354, | |
| "learning_rate": 0.0007995431182181611, | |
| "loss": 0.1381, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.4817244611059044, | |
| "grad_norm": 0.3845181167125702, | |
| "learning_rate": 0.0007995372607594195, | |
| "loss": 0.1488, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.5004686035613872, | |
| "grad_norm": 0.39582550525665283, | |
| "learning_rate": 0.000799531403300678, | |
| "loss": 0.1546, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.5192127460168696, | |
| "grad_norm": 0.38061007857322693, | |
| "learning_rate": 0.0007995255458419366, | |
| "loss": 0.1403, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.5379568884723525, | |
| "grad_norm": 0.4833431839942932, | |
| "learning_rate": 0.000799519688383195, | |
| "loss": 0.1582, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.556701030927835, | |
| "grad_norm": 0.36174824833869934, | |
| "learning_rate": 0.0007995138309244534, | |
| "loss": 0.1561, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.5754451733833177, | |
| "grad_norm": 0.4403337836265564, | |
| "learning_rate": 0.0007995079734657119, | |
| "loss": 0.1472, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.5941893158388005, | |
| "grad_norm": 0.384498655796051, | |
| "learning_rate": 0.0007995021160069704, | |
| "loss": 0.1458, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.612933458294283, | |
| "grad_norm": 0.29897651076316833, | |
| "learning_rate": 0.0007994962585482289, | |
| "loss": 0.1528, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.6316776007497658, | |
| "grad_norm": 0.3865436613559723, | |
| "learning_rate": 0.0007994904010894874, | |
| "loss": 0.1497, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.6504217432052484, | |
| "grad_norm": 0.34619590640068054, | |
| "learning_rate": 0.0007994845436307458, | |
| "loss": 0.1425, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.669165885660731, | |
| "grad_norm": 0.2863396108150482, | |
| "learning_rate": 0.0007994786861720043, | |
| "loss": 0.1441, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.6879100281162138, | |
| "grad_norm": 0.371105432510376, | |
| "learning_rate": 0.0007994728287132628, | |
| "loss": 0.1437, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.7066541705716962, | |
| "grad_norm": 0.3657528758049011, | |
| "learning_rate": 0.0007994669712545212, | |
| "loss": 0.1435, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.725398313027179, | |
| "grad_norm": 0.333408385515213, | |
| "learning_rate": 0.0007994611137957798, | |
| "loss": 0.1322, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.7441424554826617, | |
| "grad_norm": 0.34960120916366577, | |
| "learning_rate": 0.0007994552563370382, | |
| "loss": 0.1551, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.7628865979381443, | |
| "grad_norm": 0.30177751183509827, | |
| "learning_rate": 0.0007994493988782967, | |
| "loss": 0.1641, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.7816307403936271, | |
| "grad_norm": 0.39110997319221497, | |
| "learning_rate": 0.0007994435414195552, | |
| "loss": 0.1523, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.8003748828491095, | |
| "grad_norm": 0.30561545491218567, | |
| "learning_rate": 0.0007994376839608136, | |
| "loss": 0.1516, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.8191190253045924, | |
| "grad_norm": 0.32364317774772644, | |
| "learning_rate": 0.0007994318265020722, | |
| "loss": 0.145, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.837863167760075, | |
| "grad_norm": 0.36380302906036377, | |
| "learning_rate": 0.0007994259690433306, | |
| "loss": 0.1576, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.8566073102155576, | |
| "grad_norm": 0.3005361258983612, | |
| "learning_rate": 0.0007994201115845891, | |
| "loss": 0.1535, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.8753514526710404, | |
| "grad_norm": 0.33928126096725464, | |
| "learning_rate": 0.0007994142541258476, | |
| "loss": 0.1741, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.8753514526710404, | |
| "eval_loss": 0.08202449977397919, | |
| "eval_pearson_cosine": 0.7594348192214966, | |
| "eval_pearson_dot": 0.7289378643035889, | |
| "eval_pearson_euclidean": 0.7542859315872192, | |
| "eval_pearson_manhattan": 0.7619277238845825, | |
| "eval_runtime": 27.4437, | |
| "eval_samples_per_second": 54.657, | |
| "eval_spearman_cosine": 0.7679965558172381, | |
| "eval_spearman_dot": 0.7298723962118674, | |
| "eval_spearman_euclidean": 0.7535528712822376, | |
| "eval_spearman_manhattan": 0.7621231744319878, | |
| "eval_steps_per_second": 6.85, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.8940955951265228, | |
| "grad_norm": 0.3324965834617615, | |
| "learning_rate": 0.000799408396667106, | |
| "loss": 0.1491, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.9128397375820057, | |
| "grad_norm": 0.3112243711948395, | |
| "learning_rate": 0.0007994025392083644, | |
| "loss": 0.1622, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.9315838800374883, | |
| "grad_norm": 0.3381972014904022, | |
| "learning_rate": 0.000799396681749623, | |
| "loss": 0.1462, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.9503280224929709, | |
| "grad_norm": 0.3424859642982483, | |
| "learning_rate": 0.0007993908242908814, | |
| "loss": 0.1651, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.9690721649484537, | |
| "grad_norm": 0.42161494493484497, | |
| "learning_rate": 0.0007993849668321399, | |
| "loss": 0.1521, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.9878163074039361, | |
| "grad_norm": 0.3541307747364044, | |
| "learning_rate": 0.0007993791093733984, | |
| "loss": 0.162, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.007497656982193, | |
| "grad_norm": 0.22963856160640717, | |
| "learning_rate": 0.0007993732519146568, | |
| "loss": 0.1297, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.026241799437676, | |
| "grad_norm": 0.28242990374565125, | |
| "learning_rate": 0.0007993673944559154, | |
| "loss": 0.0773, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.044985941893158, | |
| "grad_norm": 0.3516603112220764, | |
| "learning_rate": 0.0007993615369971738, | |
| "loss": 0.0799, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.063730084348641, | |
| "grad_norm": 0.3558428883552551, | |
| "learning_rate": 0.0007993556795384323, | |
| "loss": 0.0885, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.082474226804124, | |
| "grad_norm": 0.3211170732975006, | |
| "learning_rate": 0.0007993498220796908, | |
| "loss": 0.0825, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.1012183692596063, | |
| "grad_norm": 0.20844395458698273, | |
| "learning_rate": 0.0007993439646209492, | |
| "loss": 0.0763, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.119962511715089, | |
| "grad_norm": 0.3156029284000397, | |
| "learning_rate": 0.0007993381071622077, | |
| "loss": 0.0797, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.138706654170572, | |
| "grad_norm": 0.3986193835735321, | |
| "learning_rate": 0.0007993322497034662, | |
| "loss": 0.0852, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.1574507966260543, | |
| "grad_norm": 0.18681703507900238, | |
| "learning_rate": 0.0007993263922447247, | |
| "loss": 0.0779, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.176194939081537, | |
| "grad_norm": 0.2365262657403946, | |
| "learning_rate": 0.0007993205347859831, | |
| "loss": 0.0833, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.1949390815370196, | |
| "grad_norm": 0.25459378957748413, | |
| "learning_rate": 0.0007993146773272417, | |
| "loss": 0.0761, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.2136832239925024, | |
| "grad_norm": 0.39024218916893005, | |
| "learning_rate": 0.0007993088198685, | |
| "loss": 0.0873, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.2324273664479852, | |
| "grad_norm": 0.3662407100200653, | |
| "learning_rate": 0.0007993029624097585, | |
| "loss": 0.0842, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.2511715089034676, | |
| "grad_norm": 0.30686551332473755, | |
| "learning_rate": 0.0007992971049510171, | |
| "loss": 0.0845, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.2699156513589505, | |
| "grad_norm": 0.29860755801200867, | |
| "learning_rate": 0.0007992912474922755, | |
| "loss": 0.0806, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.288659793814433, | |
| "grad_norm": 0.272029310464859, | |
| "learning_rate": 0.0007992853900335341, | |
| "loss": 0.0849, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.3074039362699157, | |
| "grad_norm": 0.23034346103668213, | |
| "learning_rate": 0.0007992795325747924, | |
| "loss": 0.0873, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.3261480787253985, | |
| "grad_norm": 0.38400229811668396, | |
| "learning_rate": 0.0007992736751160509, | |
| "loss": 0.0854, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.344892221180881, | |
| "grad_norm": 0.2619285583496094, | |
| "learning_rate": 0.0007992678176573094, | |
| "loss": 0.0854, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.344892221180881, | |
| "eval_loss": 0.0549059733748436, | |
| "eval_pearson_cosine": 0.7565033435821533, | |
| "eval_pearson_dot": 0.7438405752182007, | |
| "eval_pearson_euclidean": 0.7398021221160889, | |
| "eval_pearson_manhattan": 0.7514023780822754, | |
| "eval_runtime": 27.2363, | |
| "eval_samples_per_second": 55.074, | |
| "eval_spearman_cosine": 0.7657686934808458, | |
| "eval_spearman_dot": 0.7450125999969373, | |
| "eval_spearman_euclidean": 0.7411997174627442, | |
| "eval_spearman_manhattan": 0.754436544283217, | |
| "eval_steps_per_second": 6.903, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.3636363636363638, | |
| "grad_norm": 0.2573038935661316, | |
| "learning_rate": 0.0007992619601985679, | |
| "loss": 0.0812, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.382380506091846, | |
| "grad_norm": 0.2684009373188019, | |
| "learning_rate": 0.0007992561027398263, | |
| "loss": 0.0833, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.401124648547329, | |
| "grad_norm": 0.2773861289024353, | |
| "learning_rate": 0.0007992502452810849, | |
| "loss": 0.0902, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.419868791002812, | |
| "grad_norm": 0.3180435001850128, | |
| "learning_rate": 0.0007992443878223433, | |
| "loss": 0.0882, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.438612933458294, | |
| "grad_norm": 0.2758583426475525, | |
| "learning_rate": 0.0007992385303636017, | |
| "loss": 0.0815, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.457357075913777, | |
| "grad_norm": 0.3327929973602295, | |
| "learning_rate": 0.0007992326729048603, | |
| "loss": 0.0949, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.4761012183692594, | |
| "grad_norm": 0.31645268201828003, | |
| "learning_rate": 0.0007992268154461187, | |
| "loss": 0.0942, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.4948453608247423, | |
| "grad_norm": 0.2587279975414276, | |
| "learning_rate": 0.0007992209579873773, | |
| "loss": 0.0889, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.513589503280225, | |
| "grad_norm": 0.29799187183380127, | |
| "learning_rate": 0.0007992151005286357, | |
| "loss": 0.1027, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.5323336457357075, | |
| "grad_norm": 0.3042343258857727, | |
| "learning_rate": 0.0007992092430698941, | |
| "loss": 0.0947, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.5510777881911904, | |
| "grad_norm": 0.36439308524131775, | |
| "learning_rate": 0.0007992033856111527, | |
| "loss": 0.0887, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.5698219306466727, | |
| "grad_norm": 0.24675941467285156, | |
| "learning_rate": 0.0007991975281524111, | |
| "loss": 0.0893, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.5885660731021556, | |
| "grad_norm": 0.3232560157775879, | |
| "learning_rate": 0.0007991916706936696, | |
| "loss": 0.0949, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.6073102155576384, | |
| "grad_norm": 0.3095908463001251, | |
| "learning_rate": 0.0007991858132349281, | |
| "loss": 0.0893, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.626054358013121, | |
| "grad_norm": 0.24996769428253174, | |
| "learning_rate": 0.0007991799557761866, | |
| "loss": 0.0918, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.6447985004686037, | |
| "grad_norm": 0.3013332486152649, | |
| "learning_rate": 0.0007991740983174449, | |
| "loss": 0.0965, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.663542642924086, | |
| "grad_norm": 0.43422290682792664, | |
| "learning_rate": 0.0007991682408587035, | |
| "loss": 0.1144, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 2.682286785379569, | |
| "grad_norm": 0.3462458848953247, | |
| "learning_rate": 0.0007991623833999619, | |
| "loss": 0.1068, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.7010309278350517, | |
| "grad_norm": 0.2752937078475952, | |
| "learning_rate": 0.0007991565259412205, | |
| "loss": 0.1048, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.719775070290534, | |
| "grad_norm": 0.33038660883903503, | |
| "learning_rate": 0.000799150668482479, | |
| "loss": 0.1055, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.738519212746017, | |
| "grad_norm": 0.28442054986953735, | |
| "learning_rate": 0.0007991448110237373, | |
| "loss": 0.1053, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.7572633552014993, | |
| "grad_norm": 0.25279343128204346, | |
| "learning_rate": 0.0007991389535649959, | |
| "loss": 0.109, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 2.776007497656982, | |
| "grad_norm": 0.3681808114051819, | |
| "learning_rate": 0.0007991330961062543, | |
| "loss": 0.1092, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.794751640112465, | |
| "grad_norm": 0.3884279429912567, | |
| "learning_rate": 0.0007991272386475128, | |
| "loss": 0.1105, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 2.8134957825679474, | |
| "grad_norm": 0.3542380928993225, | |
| "learning_rate": 0.0007991213811887713, | |
| "loss": 0.109, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.8134957825679474, | |
| "eval_loss": 0.06194353476166725, | |
| "eval_pearson_cosine": 0.7544945478439331, | |
| "eval_pearson_dot": 0.7297648787498474, | |
| "eval_pearson_euclidean": 0.7457708120346069, | |
| "eval_pearson_manhattan": 0.7537869215011597, | |
| "eval_runtime": 27.28, | |
| "eval_samples_per_second": 54.985, | |
| "eval_spearman_cosine": 0.7677406665753612, | |
| "eval_spearman_dot": 0.7355031880736892, | |
| "eval_spearman_euclidean": 0.752266788615453, | |
| "eval_spearman_manhattan": 0.7620929193607933, | |
| "eval_steps_per_second": 6.892, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.8322399250234302, | |
| "grad_norm": 0.28738659620285034, | |
| "learning_rate": 0.0007991155237300298, | |
| "loss": 0.1043, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 2.8509840674789126, | |
| "grad_norm": 0.39117714762687683, | |
| "learning_rate": 0.0007991096662712882, | |
| "loss": 0.0993, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 2.8697282099343955, | |
| "grad_norm": 0.3144415616989136, | |
| "learning_rate": 0.0007991038088125467, | |
| "loss": 0.1145, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 2.8884723523898783, | |
| "grad_norm": 0.28154823184013367, | |
| "learning_rate": 0.0007990979513538052, | |
| "loss": 0.1128, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 2.9072164948453607, | |
| "grad_norm": 0.3766768276691437, | |
| "learning_rate": 0.0007990920938950637, | |
| "loss": 0.1033, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.9259606373008435, | |
| "grad_norm": 0.38604792952537537, | |
| "learning_rate": 0.0007990862364363222, | |
| "loss": 0.1044, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 2.944704779756326, | |
| "grad_norm": 0.36833906173706055, | |
| "learning_rate": 0.0007990803789775806, | |
| "loss": 0.1159, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 2.963448922211809, | |
| "grad_norm": 0.3357650935649872, | |
| "learning_rate": 0.0007990745215188391, | |
| "loss": 0.1185, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 2.9821930646672916, | |
| "grad_norm": 0.30260348320007324, | |
| "learning_rate": 0.0007990686640600976, | |
| "loss": 0.1167, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 3.0018744142455485, | |
| "grad_norm": 0.28110650181770325, | |
| "learning_rate": 0.000799062806601356, | |
| "loss": 0.1115, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.020618556701031, | |
| "grad_norm": 0.32038745284080505, | |
| "learning_rate": 0.0007990569491426146, | |
| "loss": 0.0637, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 3.0393626991565137, | |
| "grad_norm": 0.29342755675315857, | |
| "learning_rate": 0.000799051091683873, | |
| "loss": 0.0687, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 3.058106841611996, | |
| "grad_norm": 0.33964619040489197, | |
| "learning_rate": 0.0007990452342251314, | |
| "loss": 0.0611, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 3.076850984067479, | |
| "grad_norm": 0.23580531775951385, | |
| "learning_rate": 0.0007990393767663899, | |
| "loss": 0.0635, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 3.0955951265229618, | |
| "grad_norm": 0.2617776393890381, | |
| "learning_rate": 0.0007990335193076484, | |
| "loss": 0.0709, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 3.114339268978444, | |
| "grad_norm": 0.25627410411834717, | |
| "learning_rate": 0.0007990276618489068, | |
| "loss": 0.0682, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 3.133083411433927, | |
| "grad_norm": 0.21987001597881317, | |
| "learning_rate": 0.0007990218043901654, | |
| "loss": 0.06, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 3.1518275538894094, | |
| "grad_norm": 0.2657093405723572, | |
| "learning_rate": 0.0007990159469314238, | |
| "loss": 0.0712, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 3.170571696344892, | |
| "grad_norm": 0.23929661512374878, | |
| "learning_rate": 0.0007990100894726823, | |
| "loss": 0.0566, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 3.189315838800375, | |
| "grad_norm": 0.23572145402431488, | |
| "learning_rate": 0.0007990042320139408, | |
| "loss": 0.0571, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.2080599812558575, | |
| "grad_norm": 0.26287132501602173, | |
| "learning_rate": 0.0007989983745551992, | |
| "loss": 0.067, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 3.2268041237113403, | |
| "grad_norm": 0.24504464864730835, | |
| "learning_rate": 0.0007989925170964578, | |
| "loss": 0.0637, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 3.2455482661668227, | |
| "grad_norm": 0.17006747424602509, | |
| "learning_rate": 0.0007989866596377162, | |
| "loss": 0.0552, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 3.2642924086223055, | |
| "grad_norm": 0.2752683460712433, | |
| "learning_rate": 0.0007989808021789747, | |
| "loss": 0.0639, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 3.2830365510777884, | |
| "grad_norm": 0.2681417465209961, | |
| "learning_rate": 0.0007989749447202332, | |
| "loss": 0.0705, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 3.2830365510777884, | |
| "eval_loss": 0.0486464686691761, | |
| "eval_pearson_cosine": 0.7632350921630859, | |
| "eval_pearson_dot": 0.7505504488945007, | |
| "eval_pearson_euclidean": 0.7458865642547607, | |
| "eval_pearson_manhattan": 0.7597954273223877, | |
| "eval_runtime": 27.3673, | |
| "eval_samples_per_second": 54.81, | |
| "eval_spearman_cosine": 0.7679814031707208, | |
| "eval_spearman_dot": 0.7517654374212466, | |
| "eval_spearman_euclidean": 0.7467275015139031, | |
| "eval_spearman_manhattan": 0.7607208640788498, | |
| "eval_steps_per_second": 6.87, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 3.3017806935332707, | |
| "grad_norm": 0.24346262216567993, | |
| "learning_rate": 0.0007989690872614916, | |
| "loss": 0.0658, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 3.3205248359887536, | |
| "grad_norm": 0.24957306683063507, | |
| "learning_rate": 0.0007989632298027502, | |
| "loss": 0.0643, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 3.3392689784442364, | |
| "grad_norm": 0.24416255950927734, | |
| "learning_rate": 0.0007989573723440086, | |
| "loss": 0.0626, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 3.358013120899719, | |
| "grad_norm": 0.2224712073802948, | |
| "learning_rate": 0.0007989515148852671, | |
| "loss": 0.0634, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 3.3767572633552017, | |
| "grad_norm": 0.27588558197021484, | |
| "learning_rate": 0.0007989456574265256, | |
| "loss": 0.0644, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.395501405810684, | |
| "grad_norm": 0.26377061009407043, | |
| "learning_rate": 0.000798939799967784, | |
| "loss": 0.0585, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 3.414245548266167, | |
| "grad_norm": 0.23178541660308838, | |
| "learning_rate": 0.0007989339425090424, | |
| "loss": 0.0588, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 3.4329896907216497, | |
| "grad_norm": 0.1893617808818817, | |
| "learning_rate": 0.000798928085050301, | |
| "loss": 0.0649, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 3.451733833177132, | |
| "grad_norm": 0.23445335030555725, | |
| "learning_rate": 0.0007989222275915595, | |
| "loss": 0.0629, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 3.470477975632615, | |
| "grad_norm": 0.457109659910202, | |
| "learning_rate": 0.0007989163701328179, | |
| "loss": 0.0646, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 3.4892221180880973, | |
| "grad_norm": 0.2316947728395462, | |
| "learning_rate": 0.0007989105126740764, | |
| "loss": 0.0677, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 3.50796626054358, | |
| "grad_norm": 0.26950669288635254, | |
| "learning_rate": 0.0007989046552153348, | |
| "loss": 0.0732, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 3.526710402999063, | |
| "grad_norm": 0.25258171558380127, | |
| "learning_rate": 0.0007988987977565933, | |
| "loss": 0.0635, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 3.5454545454545454, | |
| "grad_norm": 0.2282831370830536, | |
| "learning_rate": 0.0007988929402978518, | |
| "loss": 0.0766, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 3.5641986879100283, | |
| "grad_norm": 0.3049706220626831, | |
| "learning_rate": 0.0007988870828391103, | |
| "loss": 0.0766, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 3.5829428303655106, | |
| "grad_norm": 0.21556228399276733, | |
| "learning_rate": 0.0007988812253803688, | |
| "loss": 0.0694, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 3.6016869728209935, | |
| "grad_norm": 0.2859863340854645, | |
| "learning_rate": 0.0007988753679216272, | |
| "loss": 0.0665, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 3.6204311152764763, | |
| "grad_norm": 0.22522784769535065, | |
| "learning_rate": 0.0007988695104628857, | |
| "loss": 0.073, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 3.6391752577319587, | |
| "grad_norm": 0.3301334083080292, | |
| "learning_rate": 0.0007988636530041442, | |
| "loss": 0.0745, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 3.6579194001874415, | |
| "grad_norm": 0.21438319981098175, | |
| "learning_rate": 0.0007988577955454027, | |
| "loss": 0.0713, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 3.676663542642924, | |
| "grad_norm": 0.3207626938819885, | |
| "learning_rate": 0.0007988519380866611, | |
| "loss": 0.0759, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 3.695407685098407, | |
| "grad_norm": 0.25493231415748596, | |
| "learning_rate": 0.0007988460806279197, | |
| "loss": 0.0722, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 3.7141518275538896, | |
| "grad_norm": 0.2732018530368805, | |
| "learning_rate": 0.0007988402231691781, | |
| "loss": 0.0773, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 3.732895970009372, | |
| "grad_norm": 0.19611899554729462, | |
| "learning_rate": 0.0007988343657104365, | |
| "loss": 0.0773, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 3.751640112464855, | |
| "grad_norm": 0.2664394676685333, | |
| "learning_rate": 0.0007988285082516951, | |
| "loss": 0.072, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.751640112464855, | |
| "eval_loss": 0.05059043690562248, | |
| "eval_pearson_cosine": 0.7549334764480591, | |
| "eval_pearson_dot": 0.7364022731781006, | |
| "eval_pearson_euclidean": 0.7430644035339355, | |
| "eval_pearson_manhattan": 0.7528964281082153, | |
| "eval_runtime": 27.2774, | |
| "eval_samples_per_second": 54.991, | |
| "eval_spearman_cosine": 0.7612361982335023, | |
| "eval_spearman_dot": 0.7370856746295986, | |
| "eval_spearman_euclidean": 0.7449844586260276, | |
| "eval_spearman_manhattan": 0.7551494271561938, | |
| "eval_steps_per_second": 6.892, | |
| "step": 2000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 5330, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |