| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.222658667991288, | |
| "eval_steps": 500, | |
| "global_step": 10000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.003912727828512476, | |
| "grad_norm": 8.57561206817627, | |
| "learning_rate": 1.55e-06, | |
| "loss": 5.0338, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.007825455657024952, | |
| "grad_norm": 5.36021089553833, | |
| "learning_rate": 3.1500000000000003e-06, | |
| "loss": 4.456, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.011738183485537427, | |
| "grad_norm": 3.3196067810058594, | |
| "learning_rate": 4.75e-06, | |
| "loss": 3.9216, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.015650911314049904, | |
| "grad_norm": 2.2839956283569336, | |
| "learning_rate": 6.35e-06, | |
| "loss": 3.6983, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.01956363914256238, | |
| "grad_norm": 1.4226499795913696, | |
| "learning_rate": 7.95e-06, | |
| "loss": 3.5863, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.023476366971074854, | |
| "grad_norm": 0.9770936369895935, | |
| "learning_rate": 9.55e-06, | |
| "loss": 3.5076, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.02738909479958733, | |
| "grad_norm": 0.6855128407478333, | |
| "learning_rate": 1.115e-05, | |
| "loss": 3.4515, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.03130182262809981, | |
| "grad_norm": 0.5743525624275208, | |
| "learning_rate": 1.2750000000000002e-05, | |
| "loss": 3.4145, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.03521455045661228, | |
| "grad_norm": 0.4765739440917969, | |
| "learning_rate": 1.435e-05, | |
| "loss": 3.3892, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.03912727828512476, | |
| "grad_norm": 0.40247443318367004, | |
| "learning_rate": 1.595e-05, | |
| "loss": 3.3664, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.04304000611363723, | |
| "grad_norm": 0.3582874834537506, | |
| "learning_rate": 1.755e-05, | |
| "loss": 3.3487, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.04695273394214971, | |
| "grad_norm": 0.31657862663269043, | |
| "learning_rate": 1.915e-05, | |
| "loss": 3.3349, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.05086546177066218, | |
| "grad_norm": 0.28206518292427063, | |
| "learning_rate": 2.075e-05, | |
| "loss": 3.3197, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.05477818959917466, | |
| "grad_norm": 0.2577824890613556, | |
| "learning_rate": 2.235e-05, | |
| "loss": 3.3058, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.05869091742768714, | |
| "grad_norm": 0.23786848783493042, | |
| "learning_rate": 2.395e-05, | |
| "loss": 3.2955, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.06260364525619962, | |
| "grad_norm": 0.2239329218864441, | |
| "learning_rate": 2.555e-05, | |
| "loss": 3.2846, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.06651637308471209, | |
| "grad_norm": 0.22519271075725555, | |
| "learning_rate": 2.7150000000000003e-05, | |
| "loss": 3.2731, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.07042910091322456, | |
| "grad_norm": 0.2189016044139862, | |
| "learning_rate": 2.8749999999999997e-05, | |
| "loss": 3.2663, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.07434182874173703, | |
| "grad_norm": 0.20760661363601685, | |
| "learning_rate": 3.035e-05, | |
| "loss": 3.2581, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.07825455657024952, | |
| "grad_norm": 0.205606147646904, | |
| "learning_rate": 3.1950000000000004e-05, | |
| "loss": 3.2451, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.08216728439876199, | |
| "grad_norm": 0.22558899223804474, | |
| "learning_rate": 3.355e-05, | |
| "loss": 3.2412, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.08608001222727446, | |
| "grad_norm": 0.22584667801856995, | |
| "learning_rate": 3.515e-05, | |
| "loss": 3.2358, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.08999274005578695, | |
| "grad_norm": 0.22091105580329895, | |
| "learning_rate": 3.675e-05, | |
| "loss": 3.2302, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.09390546788429942, | |
| "grad_norm": 0.22428959608078003, | |
| "learning_rate": 3.8350000000000004e-05, | |
| "loss": 3.2228, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.09781819571281189, | |
| "grad_norm": 0.22730223834514618, | |
| "learning_rate": 3.995e-05, | |
| "loss": 3.2207, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.10173092354132436, | |
| "grad_norm": 0.28039082884788513, | |
| "learning_rate": 4.155e-05, | |
| "loss": 3.2171, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.10564365136983685, | |
| "grad_norm": 0.32776346802711487, | |
| "learning_rate": 4.315e-05, | |
| "loss": 3.2104, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.10955637919834932, | |
| "grad_norm": 0.2800813615322113, | |
| "learning_rate": 4.4750000000000004e-05, | |
| "loss": 3.2053, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.11346910702686179, | |
| "grad_norm": 0.24571874737739563, | |
| "learning_rate": 4.635e-05, | |
| "loss": 3.2046, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.11738183485537428, | |
| "grad_norm": 0.5581298470497131, | |
| "learning_rate": 4.795e-05, | |
| "loss": 3.2004, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.12129456268388675, | |
| "grad_norm": 0.47118815779685974, | |
| "learning_rate": 4.9550000000000005e-05, | |
| "loss": 3.1967, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.12520729051239923, | |
| "grad_norm": 0.23707512021064758, | |
| "learning_rate": 4.9872222222222225e-05, | |
| "loss": 3.1945, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.1291200183409117, | |
| "grad_norm": 0.41069141030311584, | |
| "learning_rate": 4.969444444444445e-05, | |
| "loss": 3.1928, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.13303274616942418, | |
| "grad_norm": 0.376223623752594, | |
| "learning_rate": 4.9516666666666666e-05, | |
| "loss": 3.1871, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.13694547399793663, | |
| "grad_norm": 0.22380244731903076, | |
| "learning_rate": 4.933888888888889e-05, | |
| "loss": 3.1862, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.14085820182644912, | |
| "grad_norm": 0.2950900197029114, | |
| "learning_rate": 4.9161111111111115e-05, | |
| "loss": 3.1828, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.1447709296549616, | |
| "grad_norm": 0.25872257351875305, | |
| "learning_rate": 4.8983333333333336e-05, | |
| "loss": 3.1828, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.14868365748347406, | |
| "grad_norm": 0.3597142994403839, | |
| "learning_rate": 4.880555555555556e-05, | |
| "loss": 3.1845, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.15259638531198655, | |
| "grad_norm": 0.30377593636512756, | |
| "learning_rate": 4.862777777777778e-05, | |
| "loss": 3.1806, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.15650911314049903, | |
| "grad_norm": 0.3617115318775177, | |
| "learning_rate": 4.845e-05, | |
| "loss": 3.178, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.1604218409690115, | |
| "grad_norm": 0.31589606404304504, | |
| "learning_rate": 4.8272222222222226e-05, | |
| "loss": 3.1787, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 0.16433456879752398, | |
| "grad_norm": 0.30715763568878174, | |
| "learning_rate": 4.809444444444445e-05, | |
| "loss": 3.1754, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 0.16824729662603646, | |
| "grad_norm": 0.2574257254600525, | |
| "learning_rate": 4.791666666666667e-05, | |
| "loss": 3.1732, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 0.17216002445454892, | |
| "grad_norm": 0.3290633261203766, | |
| "learning_rate": 4.773888888888889e-05, | |
| "loss": 3.1723, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 0.1760727522830614, | |
| "grad_norm": 0.24164608120918274, | |
| "learning_rate": 4.756111111111111e-05, | |
| "loss": 3.1693, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.1799854801115739, | |
| "grad_norm": 0.30125918984413147, | |
| "learning_rate": 4.738333333333334e-05, | |
| "loss": 3.1685, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 0.18389820794008635, | |
| "grad_norm": 0.3488104045391083, | |
| "learning_rate": 4.720555555555556e-05, | |
| "loss": 3.1678, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 0.18781093576859884, | |
| "grad_norm": 0.2793637812137604, | |
| "learning_rate": 4.702777777777778e-05, | |
| "loss": 3.1668, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 0.1917236635971113, | |
| "grad_norm": 0.2682870030403137, | |
| "learning_rate": 4.685000000000001e-05, | |
| "loss": 3.1642, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 0.19563639142562378, | |
| "grad_norm": 0.36307454109191895, | |
| "learning_rate": 4.667222222222222e-05, | |
| "loss": 3.1654, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.19954911925413626, | |
| "grad_norm": 0.23930683732032776, | |
| "learning_rate": 4.649444444444445e-05, | |
| "loss": 3.1641, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 0.20346184708264872, | |
| "grad_norm": 0.3049800992012024, | |
| "learning_rate": 4.631666666666667e-05, | |
| "loss": 3.1654, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 0.2073745749111612, | |
| "grad_norm": 0.27725374698638916, | |
| "learning_rate": 4.613888888888889e-05, | |
| "loss": 3.1642, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 0.2112873027396737, | |
| "grad_norm": 0.2733665108680725, | |
| "learning_rate": 4.596111111111112e-05, | |
| "loss": 3.1584, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 0.21520003056818615, | |
| "grad_norm": 0.34570956230163574, | |
| "learning_rate": 4.578333333333333e-05, | |
| "loss": 3.162, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.21911275839669864, | |
| "grad_norm": 0.2521582543849945, | |
| "learning_rate": 4.560555555555556e-05, | |
| "loss": 3.1603, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 0.22302548622521112, | |
| "grad_norm": 0.29344356060028076, | |
| "learning_rate": 4.542777777777778e-05, | |
| "loss": 3.1587, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 0.22693821405372358, | |
| "grad_norm": 0.426881343126297, | |
| "learning_rate": 4.525e-05, | |
| "loss": 3.1561, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 0.23085094188223607, | |
| "grad_norm": 0.27699196338653564, | |
| "learning_rate": 4.507222222222223e-05, | |
| "loss": 3.1581, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 0.23476366971074855, | |
| "grad_norm": 0.32313504815101624, | |
| "learning_rate": 4.4894444444444444e-05, | |
| "loss": 3.1578, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.238676397539261, | |
| "grad_norm": 0.26697778701782227, | |
| "learning_rate": 4.4716666666666665e-05, | |
| "loss": 3.157, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 0.2425891253677735, | |
| "grad_norm": 0.2206508368253708, | |
| "learning_rate": 4.453888888888889e-05, | |
| "loss": 3.1551, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 0.24650185319628595, | |
| "grad_norm": 0.252888947725296, | |
| "learning_rate": 4.4361111111111113e-05, | |
| "loss": 3.1563, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 0.25041458102479847, | |
| "grad_norm": 0.28254494071006775, | |
| "learning_rate": 4.4183333333333334e-05, | |
| "loss": 3.156, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 0.2543273088533109, | |
| "grad_norm": 0.28460440039634705, | |
| "learning_rate": 4.4005555555555555e-05, | |
| "loss": 3.156, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.2582400366818234, | |
| "grad_norm": 0.290326863527298, | |
| "learning_rate": 4.3827777777777776e-05, | |
| "loss": 3.1518, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 0.26215276451033587, | |
| "grad_norm": 0.2769670784473419, | |
| "learning_rate": 4.3650000000000004e-05, | |
| "loss": 3.1515, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 0.26606549233884835, | |
| "grad_norm": 0.21678052842617035, | |
| "learning_rate": 4.3472222222222225e-05, | |
| "loss": 3.1518, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 0.26997822016736084, | |
| "grad_norm": 0.3134085536003113, | |
| "learning_rate": 4.3294444444444446e-05, | |
| "loss": 3.1501, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 0.27389094799587327, | |
| "grad_norm": 0.35099807381629944, | |
| "learning_rate": 4.311666666666667e-05, | |
| "loss": 3.1523, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.27780367582438575, | |
| "grad_norm": 0.27320197224617004, | |
| "learning_rate": 4.293888888888889e-05, | |
| "loss": 3.1507, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 0.28171640365289824, | |
| "grad_norm": 0.28096139430999756, | |
| "learning_rate": 4.2761111111111115e-05, | |
| "loss": 3.1474, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 0.2856291314814107, | |
| "grad_norm": 0.30300965905189514, | |
| "learning_rate": 4.2583333333333336e-05, | |
| "loss": 3.15, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 0.2895418593099232, | |
| "grad_norm": 0.2996535003185272, | |
| "learning_rate": 4.240555555555556e-05, | |
| "loss": 3.1528, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 0.2934545871384357, | |
| "grad_norm": 0.2503749132156372, | |
| "learning_rate": 4.222777777777778e-05, | |
| "loss": 3.1522, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.2973673149669481, | |
| "grad_norm": 0.2272900640964508, | |
| "learning_rate": 4.205e-05, | |
| "loss": 3.1472, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 0.3012800427954606, | |
| "grad_norm": 0.2367839366197586, | |
| "learning_rate": 4.1872222222222227e-05, | |
| "loss": 3.1479, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 0.3051927706239731, | |
| "grad_norm": 0.3656509220600128, | |
| "learning_rate": 4.169444444444445e-05, | |
| "loss": 3.1506, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 0.3091054984524856, | |
| "grad_norm": 0.25474536418914795, | |
| "learning_rate": 4.151666666666667e-05, | |
| "loss": 3.1506, | |
| "step": 2528 | |
| }, | |
| { | |
| "epoch": 0.31301822628099807, | |
| "grad_norm": 0.21729741990566254, | |
| "learning_rate": 4.133888888888889e-05, | |
| "loss": 3.1466, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.31693095410951055, | |
| "grad_norm": 0.26999133825302124, | |
| "learning_rate": 4.116111111111111e-05, | |
| "loss": 3.1468, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 0.320843681938023, | |
| "grad_norm": 0.2668827176094055, | |
| "learning_rate": 4.098333333333334e-05, | |
| "loss": 3.144, | |
| "step": 2624 | |
| }, | |
| { | |
| "epoch": 0.32475640976653547, | |
| "grad_norm": 0.24051733314990997, | |
| "learning_rate": 4.080555555555556e-05, | |
| "loss": 3.1465, | |
| "step": 2656 | |
| }, | |
| { | |
| "epoch": 0.32866913759504796, | |
| "grad_norm": 0.24717700481414795, | |
| "learning_rate": 4.062777777777778e-05, | |
| "loss": 3.1465, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 0.33258186542356044, | |
| "grad_norm": 0.23907746374607086, | |
| "learning_rate": 4.045000000000001e-05, | |
| "loss": 3.1453, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.3364945932520729, | |
| "grad_norm": 0.24447326362133026, | |
| "learning_rate": 4.027222222222222e-05, | |
| "loss": 3.1406, | |
| "step": 2752 | |
| }, | |
| { | |
| "epoch": 0.34040732108058536, | |
| "grad_norm": 0.25871723890304565, | |
| "learning_rate": 4.009444444444444e-05, | |
| "loss": 3.1435, | |
| "step": 2784 | |
| }, | |
| { | |
| "epoch": 0.34432004890909784, | |
| "grad_norm": 0.3173305094242096, | |
| "learning_rate": 3.991666666666667e-05, | |
| "loss": 3.1439, | |
| "step": 2816 | |
| }, | |
| { | |
| "epoch": 0.34823277673761033, | |
| "grad_norm": 0.2715188264846802, | |
| "learning_rate": 3.973888888888889e-05, | |
| "loss": 3.1433, | |
| "step": 2848 | |
| }, | |
| { | |
| "epoch": 0.3521455045661228, | |
| "grad_norm": 0.2764374315738678, | |
| "learning_rate": 3.956111111111112e-05, | |
| "loss": 3.1455, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.3560582323946353, | |
| "grad_norm": 0.3014623522758484, | |
| "learning_rate": 3.938333333333333e-05, | |
| "loss": 3.1399, | |
| "step": 2912 | |
| }, | |
| { | |
| "epoch": 0.3599709602231478, | |
| "grad_norm": 0.22385312616825104, | |
| "learning_rate": 3.9205555555555554e-05, | |
| "loss": 3.1426, | |
| "step": 2944 | |
| }, | |
| { | |
| "epoch": 0.3638836880516602, | |
| "grad_norm": 0.22400549054145813, | |
| "learning_rate": 3.902777777777778e-05, | |
| "loss": 3.1393, | |
| "step": 2976 | |
| }, | |
| { | |
| "epoch": 0.3677964158801727, | |
| "grad_norm": 0.266812801361084, | |
| "learning_rate": 3.885e-05, | |
| "loss": 3.1426, | |
| "step": 3008 | |
| }, | |
| { | |
| "epoch": 0.3717091437086852, | |
| "grad_norm": 0.2830856442451477, | |
| "learning_rate": 3.867222222222222e-05, | |
| "loss": 3.14, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.37562187153719767, | |
| "grad_norm": 0.2724515199661255, | |
| "learning_rate": 3.8494444444444444e-05, | |
| "loss": 3.1419, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 0.37953459936571016, | |
| "grad_norm": 0.22998973727226257, | |
| "learning_rate": 3.8316666666666665e-05, | |
| "loss": 3.139, | |
| "step": 3104 | |
| }, | |
| { | |
| "epoch": 0.3834473271942226, | |
| "grad_norm": 0.23931734263896942, | |
| "learning_rate": 3.813888888888889e-05, | |
| "loss": 3.1408, | |
| "step": 3136 | |
| }, | |
| { | |
| "epoch": 0.3873600550227351, | |
| "grad_norm": 0.26907482743263245, | |
| "learning_rate": 3.7961111111111114e-05, | |
| "loss": 3.1374, | |
| "step": 3168 | |
| }, | |
| { | |
| "epoch": 0.39127278285124756, | |
| "grad_norm": 0.24700401723384857, | |
| "learning_rate": 3.7783333333333335e-05, | |
| "loss": 3.137, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.39518551067976004, | |
| "grad_norm": 0.2963546812534332, | |
| "learning_rate": 3.7605555555555556e-05, | |
| "loss": 3.1401, | |
| "step": 3232 | |
| }, | |
| { | |
| "epoch": 0.39909823850827253, | |
| "grad_norm": 0.2659439444541931, | |
| "learning_rate": 3.7427777777777777e-05, | |
| "loss": 3.1387, | |
| "step": 3264 | |
| }, | |
| { | |
| "epoch": 0.403010966336785, | |
| "grad_norm": 0.26796412467956543, | |
| "learning_rate": 3.7250000000000004e-05, | |
| "loss": 3.1403, | |
| "step": 3296 | |
| }, | |
| { | |
| "epoch": 0.40692369416529744, | |
| "grad_norm": 0.29361388087272644, | |
| "learning_rate": 3.7072222222222225e-05, | |
| "loss": 3.1389, | |
| "step": 3328 | |
| }, | |
| { | |
| "epoch": 0.41083642199380993, | |
| "grad_norm": 0.24953944981098175, | |
| "learning_rate": 3.6894444444444446e-05, | |
| "loss": 3.1402, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.4147491498223224, | |
| "grad_norm": 0.23955155909061432, | |
| "learning_rate": 3.671666666666667e-05, | |
| "loss": 3.1377, | |
| "step": 3392 | |
| }, | |
| { | |
| "epoch": 0.4186618776508349, | |
| "grad_norm": 0.22984126210212708, | |
| "learning_rate": 3.653888888888889e-05, | |
| "loss": 3.1375, | |
| "step": 3424 | |
| }, | |
| { | |
| "epoch": 0.4225746054793474, | |
| "grad_norm": 0.2523467540740967, | |
| "learning_rate": 3.6361111111111116e-05, | |
| "loss": 3.1364, | |
| "step": 3456 | |
| }, | |
| { | |
| "epoch": 0.4264873333078598, | |
| "grad_norm": 0.23315957188606262, | |
| "learning_rate": 3.6183333333333336e-05, | |
| "loss": 3.1389, | |
| "step": 3488 | |
| }, | |
| { | |
| "epoch": 0.4304000611363723, | |
| "grad_norm": 0.22483432292938232, | |
| "learning_rate": 3.600555555555556e-05, | |
| "loss": 3.1357, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.4343127889648848, | |
| "grad_norm": 0.23685774207115173, | |
| "learning_rate": 3.582777777777778e-05, | |
| "loss": 3.136, | |
| "step": 3552 | |
| }, | |
| { | |
| "epoch": 0.4382255167933973, | |
| "grad_norm": 0.24475786089897156, | |
| "learning_rate": 3.565e-05, | |
| "loss": 3.1364, | |
| "step": 3584 | |
| }, | |
| { | |
| "epoch": 0.44213824462190976, | |
| "grad_norm": 0.21655669808387756, | |
| "learning_rate": 3.547222222222222e-05, | |
| "loss": 3.1363, | |
| "step": 3616 | |
| }, | |
| { | |
| "epoch": 0.44605097245042225, | |
| "grad_norm": 0.24810287356376648, | |
| "learning_rate": 3.529444444444445e-05, | |
| "loss": 3.1364, | |
| "step": 3648 | |
| }, | |
| { | |
| "epoch": 0.4499637002789347, | |
| "grad_norm": 0.23016402125358582, | |
| "learning_rate": 3.511666666666667e-05, | |
| "loss": 3.1345, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.45387642810744716, | |
| "grad_norm": 0.24041368067264557, | |
| "learning_rate": 3.4938888888888896e-05, | |
| "loss": 3.1389, | |
| "step": 3712 | |
| }, | |
| { | |
| "epoch": 0.45778915593595965, | |
| "grad_norm": 0.237365260720253, | |
| "learning_rate": 3.476111111111111e-05, | |
| "loss": 3.1335, | |
| "step": 3744 | |
| }, | |
| { | |
| "epoch": 0.46170188376447213, | |
| "grad_norm": 0.21840572357177734, | |
| "learning_rate": 3.458333333333333e-05, | |
| "loss": 3.1365, | |
| "step": 3776 | |
| }, | |
| { | |
| "epoch": 0.4656146115929846, | |
| "grad_norm": 0.22491848468780518, | |
| "learning_rate": 3.440555555555556e-05, | |
| "loss": 3.1365, | |
| "step": 3808 | |
| }, | |
| { | |
| "epoch": 0.4695273394214971, | |
| "grad_norm": 0.2349662482738495, | |
| "learning_rate": 3.422777777777778e-05, | |
| "loss": 3.1364, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.47344006725000953, | |
| "grad_norm": 0.3244574964046478, | |
| "learning_rate": 3.405e-05, | |
| "loss": 3.1333, | |
| "step": 3872 | |
| }, | |
| { | |
| "epoch": 0.477352795078522, | |
| "grad_norm": 0.20271480083465576, | |
| "learning_rate": 3.387222222222222e-05, | |
| "loss": 3.1337, | |
| "step": 3904 | |
| }, | |
| { | |
| "epoch": 0.4812655229070345, | |
| "grad_norm": 0.22787164151668549, | |
| "learning_rate": 3.369444444444444e-05, | |
| "loss": 3.1359, | |
| "step": 3936 | |
| }, | |
| { | |
| "epoch": 0.485178250735547, | |
| "grad_norm": 0.2814686894416809, | |
| "learning_rate": 3.351666666666667e-05, | |
| "loss": 3.1344, | |
| "step": 3968 | |
| }, | |
| { | |
| "epoch": 0.4890909785640595, | |
| "grad_norm": 0.20366469025611877, | |
| "learning_rate": 3.333888888888889e-05, | |
| "loss": 3.1342, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.4930037063925719, | |
| "grad_norm": 0.2670027017593384, | |
| "learning_rate": 3.316111111111111e-05, | |
| "loss": 3.1319, | |
| "step": 4032 | |
| }, | |
| { | |
| "epoch": 0.4969164342210844, | |
| "grad_norm": 0.2204466164112091, | |
| "learning_rate": 3.298333333333333e-05, | |
| "loss": 3.1328, | |
| "step": 4064 | |
| }, | |
| { | |
| "epoch": 0.5008291620495969, | |
| "grad_norm": 0.2765197157859802, | |
| "learning_rate": 3.2805555555555554e-05, | |
| "loss": 3.132, | |
| "step": 4096 | |
| }, | |
| { | |
| "epoch": 0.5047418898781093, | |
| "grad_norm": 0.2624960243701935, | |
| "learning_rate": 3.262777777777778e-05, | |
| "loss": 3.1348, | |
| "step": 4128 | |
| }, | |
| { | |
| "epoch": 0.5086546177066218, | |
| "grad_norm": 0.2254333347082138, | |
| "learning_rate": 3.245e-05, | |
| "loss": 3.1327, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.5125673455351343, | |
| "grad_norm": 0.25047773122787476, | |
| "learning_rate": 3.2272222222222224e-05, | |
| "loss": 3.1318, | |
| "step": 4192 | |
| }, | |
| { | |
| "epoch": 0.5164800733636468, | |
| "grad_norm": 0.23816271126270294, | |
| "learning_rate": 3.2094444444444445e-05, | |
| "loss": 3.1331, | |
| "step": 4224 | |
| }, | |
| { | |
| "epoch": 0.5203928011921592, | |
| "grad_norm": 0.22233732044696808, | |
| "learning_rate": 3.1916666666666665e-05, | |
| "loss": 3.1315, | |
| "step": 4256 | |
| }, | |
| { | |
| "epoch": 0.5243055290206717, | |
| "grad_norm": 0.25133851170539856, | |
| "learning_rate": 3.173888888888889e-05, | |
| "loss": 3.1333, | |
| "step": 4288 | |
| }, | |
| { | |
| "epoch": 0.5282182568491842, | |
| "grad_norm": 0.21504898369312286, | |
| "learning_rate": 3.1561111111111114e-05, | |
| "loss": 3.1332, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.5321309846776967, | |
| "grad_norm": 0.2872157394886017, | |
| "learning_rate": 3.1383333333333335e-05, | |
| "loss": 3.1303, | |
| "step": 4352 | |
| }, | |
| { | |
| "epoch": 0.5360437125062092, | |
| "grad_norm": 0.244154691696167, | |
| "learning_rate": 3.1205555555555556e-05, | |
| "loss": 3.1323, | |
| "step": 4384 | |
| }, | |
| { | |
| "epoch": 0.5399564403347217, | |
| "grad_norm": 0.24791453778743744, | |
| "learning_rate": 3.102777777777778e-05, | |
| "loss": 3.1312, | |
| "step": 4416 | |
| }, | |
| { | |
| "epoch": 0.5438691681632342, | |
| "grad_norm": 0.2378605306148529, | |
| "learning_rate": 3.0850000000000004e-05, | |
| "loss": 3.1309, | |
| "step": 4448 | |
| }, | |
| { | |
| "epoch": 0.5477818959917465, | |
| "grad_norm": 0.21514585614204407, | |
| "learning_rate": 3.0672222222222225e-05, | |
| "loss": 3.1244, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.551694623820259, | |
| "grad_norm": 0.22684329748153687, | |
| "learning_rate": 3.0494444444444446e-05, | |
| "loss": 3.1297, | |
| "step": 4512 | |
| }, | |
| { | |
| "epoch": 0.5556073516487715, | |
| "grad_norm": 0.21271203458309174, | |
| "learning_rate": 3.0316666666666664e-05, | |
| "loss": 3.1286, | |
| "step": 4544 | |
| }, | |
| { | |
| "epoch": 0.559520079477284, | |
| "grad_norm": 0.22873900830745697, | |
| "learning_rate": 3.0138888888888888e-05, | |
| "loss": 3.1262, | |
| "step": 4576 | |
| }, | |
| { | |
| "epoch": 0.5634328073057965, | |
| "grad_norm": 0.24229228496551514, | |
| "learning_rate": 2.9961111111111112e-05, | |
| "loss": 3.1312, | |
| "step": 4608 | |
| }, | |
| { | |
| "epoch": 0.567345535134309, | |
| "grad_norm": 0.2754037380218506, | |
| "learning_rate": 2.9783333333333337e-05, | |
| "loss": 3.1296, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.5712582629628215, | |
| "grad_norm": 0.20053815841674805, | |
| "learning_rate": 2.9605555555555558e-05, | |
| "loss": 3.128, | |
| "step": 4672 | |
| }, | |
| { | |
| "epoch": 0.5751709907913339, | |
| "grad_norm": 0.24577876925468445, | |
| "learning_rate": 2.9427777777777782e-05, | |
| "loss": 3.1302, | |
| "step": 4704 | |
| }, | |
| { | |
| "epoch": 0.5790837186198464, | |
| "grad_norm": 0.2547786235809326, | |
| "learning_rate": 2.925e-05, | |
| "loss": 3.1263, | |
| "step": 4736 | |
| }, | |
| { | |
| "epoch": 0.5829964464483589, | |
| "grad_norm": 0.18451441824436188, | |
| "learning_rate": 2.9072222222222224e-05, | |
| "loss": 3.1282, | |
| "step": 4768 | |
| }, | |
| { | |
| "epoch": 0.5869091742768714, | |
| "grad_norm": 0.21002881228923798, | |
| "learning_rate": 2.8894444444444445e-05, | |
| "loss": 3.1271, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.5908219021053838, | |
| "grad_norm": 0.21180187165737152, | |
| "learning_rate": 2.871666666666667e-05, | |
| "loss": 3.1272, | |
| "step": 4832 | |
| }, | |
| { | |
| "epoch": 0.5947346299338963, | |
| "grad_norm": 0.2123003453016281, | |
| "learning_rate": 2.8538888888888893e-05, | |
| "loss": 3.1285, | |
| "step": 4864 | |
| }, | |
| { | |
| "epoch": 0.5986473577624087, | |
| "grad_norm": 0.20064932107925415, | |
| "learning_rate": 2.836111111111111e-05, | |
| "loss": 3.1289, | |
| "step": 4896 | |
| }, | |
| { | |
| "epoch": 0.6025600855909212, | |
| "grad_norm": 0.19583889842033386, | |
| "learning_rate": 2.8183333333333335e-05, | |
| "loss": 3.128, | |
| "step": 4928 | |
| }, | |
| { | |
| "epoch": 0.6064728134194337, | |
| "grad_norm": 0.1817025989294052, | |
| "learning_rate": 2.8005555555555556e-05, | |
| "loss": 3.1263, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.6103855412479462, | |
| "grad_norm": 0.18323124945163727, | |
| "learning_rate": 2.782777777777778e-05, | |
| "loss": 3.1276, | |
| "step": 4992 | |
| }, | |
| { | |
| "epoch": 0.6142982690764587, | |
| "grad_norm": 0.21348968148231506, | |
| "learning_rate": 2.7650000000000005e-05, | |
| "loss": 3.1262, | |
| "step": 5024 | |
| }, | |
| { | |
| "epoch": 0.6182109969049712, | |
| "grad_norm": 0.24803143739700317, | |
| "learning_rate": 2.7472222222222222e-05, | |
| "loss": 3.1278, | |
| "step": 5056 | |
| }, | |
| { | |
| "epoch": 0.6221237247334837, | |
| "grad_norm": 0.27887552976608276, | |
| "learning_rate": 2.7294444444444443e-05, | |
| "loss": 3.1261, | |
| "step": 5088 | |
| }, | |
| { | |
| "epoch": 0.6260364525619961, | |
| "grad_norm": 0.20992670953273773, | |
| "learning_rate": 2.7116666666666667e-05, | |
| "loss": 3.1248, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.6299491803905086, | |
| "grad_norm": 0.20632390677928925, | |
| "learning_rate": 2.693888888888889e-05, | |
| "loss": 3.1295, | |
| "step": 5152 | |
| }, | |
| { | |
| "epoch": 0.6338619082190211, | |
| "grad_norm": 0.22720162570476532, | |
| "learning_rate": 2.6761111111111116e-05, | |
| "loss": 3.124, | |
| "step": 5184 | |
| }, | |
| { | |
| "epoch": 0.6377746360475335, | |
| "grad_norm": 0.20604351162910461, | |
| "learning_rate": 2.6583333333333333e-05, | |
| "loss": 3.1246, | |
| "step": 5216 | |
| }, | |
| { | |
| "epoch": 0.641687363876046, | |
| "grad_norm": 0.21567173302173615, | |
| "learning_rate": 2.6405555555555554e-05, | |
| "loss": 3.1266, | |
| "step": 5248 | |
| }, | |
| { | |
| "epoch": 0.6456000917045585, | |
| "grad_norm": 0.22443106770515442, | |
| "learning_rate": 2.622777777777778e-05, | |
| "loss": 3.1265, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.6495128195330709, | |
| "grad_norm": 0.2323237955570221, | |
| "learning_rate": 2.6050000000000003e-05, | |
| "loss": 3.1214, | |
| "step": 5312 | |
| }, | |
| { | |
| "epoch": 0.6534255473615834, | |
| "grad_norm": 0.21166770160198212, | |
| "learning_rate": 2.5872222222222224e-05, | |
| "loss": 3.125, | |
| "step": 5344 | |
| }, | |
| { | |
| "epoch": 0.6573382751900959, | |
| "grad_norm": 0.21922937035560608, | |
| "learning_rate": 2.5694444444444445e-05, | |
| "loss": 3.1236, | |
| "step": 5376 | |
| }, | |
| { | |
| "epoch": 0.6612510030186084, | |
| "grad_norm": 0.19853883981704712, | |
| "learning_rate": 2.5516666666666666e-05, | |
| "loss": 3.1256, | |
| "step": 5408 | |
| }, | |
| { | |
| "epoch": 0.6651637308471209, | |
| "grad_norm": 0.22357633709907532, | |
| "learning_rate": 2.533888888888889e-05, | |
| "loss": 3.1257, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 0.6690764586756334, | |
| "grad_norm": 0.22123898565769196, | |
| "learning_rate": 2.5161111111111114e-05, | |
| "loss": 3.1265, | |
| "step": 5472 | |
| }, | |
| { | |
| "epoch": 0.6729891865041459, | |
| "grad_norm": 0.20758691430091858, | |
| "learning_rate": 2.4983333333333335e-05, | |
| "loss": 3.1244, | |
| "step": 5504 | |
| }, | |
| { | |
| "epoch": 0.6769019143326583, | |
| "grad_norm": 0.19084863364696503, | |
| "learning_rate": 2.4805555555555556e-05, | |
| "loss": 3.124, | |
| "step": 5536 | |
| }, | |
| { | |
| "epoch": 0.6808146421611707, | |
| "grad_norm": 0.21082304418087006, | |
| "learning_rate": 2.462777777777778e-05, | |
| "loss": 3.1247, | |
| "step": 5568 | |
| }, | |
| { | |
| "epoch": 0.6847273699896832, | |
| "grad_norm": 0.19547946751117706, | |
| "learning_rate": 2.445e-05, | |
| "loss": 3.1254, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.6886400978181957, | |
| "grad_norm": 0.20289190113544464, | |
| "learning_rate": 2.4272222222222222e-05, | |
| "loss": 3.1274, | |
| "step": 5632 | |
| }, | |
| { | |
| "epoch": 0.6925528256467082, | |
| "grad_norm": 0.21069744229316711, | |
| "learning_rate": 2.4094444444444443e-05, | |
| "loss": 3.1235, | |
| "step": 5664 | |
| }, | |
| { | |
| "epoch": 0.6964655534752207, | |
| "grad_norm": 0.20337700843811035, | |
| "learning_rate": 2.3916666666666668e-05, | |
| "loss": 3.1253, | |
| "step": 5696 | |
| }, | |
| { | |
| "epoch": 0.7003782813037331, | |
| "grad_norm": 0.2150067836046219, | |
| "learning_rate": 2.3738888888888892e-05, | |
| "loss": 3.1255, | |
| "step": 5728 | |
| }, | |
| { | |
| "epoch": 0.7042910091322456, | |
| "grad_norm": 0.1990475058555603, | |
| "learning_rate": 2.3561111111111113e-05, | |
| "loss": 3.1247, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.7082037369607581, | |
| "grad_norm": 0.20272456109523773, | |
| "learning_rate": 2.3383333333333334e-05, | |
| "loss": 3.1235, | |
| "step": 5792 | |
| }, | |
| { | |
| "epoch": 0.7121164647892706, | |
| "grad_norm": 0.21050025522708893, | |
| "learning_rate": 2.3205555555555555e-05, | |
| "loss": 3.1226, | |
| "step": 5824 | |
| }, | |
| { | |
| "epoch": 0.7160291926177831, | |
| "grad_norm": 0.2530113160610199, | |
| "learning_rate": 2.302777777777778e-05, | |
| "loss": 3.1242, | |
| "step": 5856 | |
| }, | |
| { | |
| "epoch": 0.7199419204462956, | |
| "grad_norm": 0.2530890703201294, | |
| "learning_rate": 2.2850000000000003e-05, | |
| "loss": 3.1215, | |
| "step": 5888 | |
| }, | |
| { | |
| "epoch": 0.7238546482748079, | |
| "grad_norm": 0.19028717279434204, | |
| "learning_rate": 2.2672222222222224e-05, | |
| "loss": 3.1236, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 0.7277673761033204, | |
| "grad_norm": 0.20547839999198914, | |
| "learning_rate": 2.2494444444444445e-05, | |
| "loss": 3.1225, | |
| "step": 5952 | |
| }, | |
| { | |
| "epoch": 0.7316801039318329, | |
| "grad_norm": 0.19479484856128693, | |
| "learning_rate": 2.231666666666667e-05, | |
| "loss": 3.1248, | |
| "step": 5984 | |
| }, | |
| { | |
| "epoch": 0.7355928317603454, | |
| "grad_norm": 0.2140408456325531, | |
| "learning_rate": 2.213888888888889e-05, | |
| "loss": 3.1237, | |
| "step": 6016 | |
| }, | |
| { | |
| "epoch": 0.7395055595888579, | |
| "grad_norm": 0.17809583246707916, | |
| "learning_rate": 2.1961111111111114e-05, | |
| "loss": 3.1243, | |
| "step": 6048 | |
| }, | |
| { | |
| "epoch": 0.7434182874173704, | |
| "grad_norm": 0.19468888640403748, | |
| "learning_rate": 2.1783333333333332e-05, | |
| "loss": 3.1246, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 0.7473310152458829, | |
| "grad_norm": 0.2106105089187622, | |
| "learning_rate": 2.1605555555555556e-05, | |
| "loss": 3.1224, | |
| "step": 6112 | |
| }, | |
| { | |
| "epoch": 0.7512437430743953, | |
| "grad_norm": 0.20489418506622314, | |
| "learning_rate": 2.142777777777778e-05, | |
| "loss": 3.1237, | |
| "step": 6144 | |
| }, | |
| { | |
| "epoch": 0.7551564709029078, | |
| "grad_norm": 0.2453160136938095, | |
| "learning_rate": 2.125e-05, | |
| "loss": 3.1212, | |
| "step": 6176 | |
| }, | |
| { | |
| "epoch": 0.7590691987314203, | |
| "grad_norm": 0.2121828943490982, | |
| "learning_rate": 2.1072222222222222e-05, | |
| "loss": 3.1192, | |
| "step": 6208 | |
| }, | |
| { | |
| "epoch": 0.7629819265599328, | |
| "grad_norm": 0.18198275566101074, | |
| "learning_rate": 2.0894444444444443e-05, | |
| "loss": 3.1213, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 0.7668946543884452, | |
| "grad_norm": 0.1795693039894104, | |
| "learning_rate": 2.0716666666666668e-05, | |
| "loss": 3.1201, | |
| "step": 6272 | |
| }, | |
| { | |
| "epoch": 0.7708073822169577, | |
| "grad_norm": 0.24014544486999512, | |
| "learning_rate": 2.0538888888888892e-05, | |
| "loss": 3.122, | |
| "step": 6304 | |
| }, | |
| { | |
| "epoch": 0.7747201100454701, | |
| "grad_norm": 0.20040743052959442, | |
| "learning_rate": 2.0361111111111113e-05, | |
| "loss": 3.1207, | |
| "step": 6336 | |
| }, | |
| { | |
| "epoch": 0.7786328378739826, | |
| "grad_norm": 0.2076857089996338, | |
| "learning_rate": 2.0183333333333334e-05, | |
| "loss": 3.1245, | |
| "step": 6368 | |
| }, | |
| { | |
| "epoch": 0.7825455657024951, | |
| "grad_norm": 0.19411978125572205, | |
| "learning_rate": 2.0005555555555555e-05, | |
| "loss": 3.1216, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.7864582935310076, | |
| "grad_norm": 0.17701873183250427, | |
| "learning_rate": 1.982777777777778e-05, | |
| "loss": 3.1228, | |
| "step": 6432 | |
| }, | |
| { | |
| "epoch": 0.7903710213595201, | |
| "grad_norm": 0.19787663221359253, | |
| "learning_rate": 1.9650000000000003e-05, | |
| "loss": 3.122, | |
| "step": 6464 | |
| }, | |
| { | |
| "epoch": 0.7942837491880326, | |
| "grad_norm": 0.18991973996162415, | |
| "learning_rate": 1.947222222222222e-05, | |
| "loss": 3.1211, | |
| "step": 6496 | |
| }, | |
| { | |
| "epoch": 0.7981964770165451, | |
| "grad_norm": 0.18508349359035492, | |
| "learning_rate": 1.9294444444444445e-05, | |
| "loss": 3.1211, | |
| "step": 6528 | |
| }, | |
| { | |
| "epoch": 0.8021092048450575, | |
| "grad_norm": 0.17648939788341522, | |
| "learning_rate": 1.911666666666667e-05, | |
| "loss": 3.1237, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 0.80602193267357, | |
| "grad_norm": 0.20672652125358582, | |
| "learning_rate": 1.893888888888889e-05, | |
| "loss": 3.1213, | |
| "step": 6592 | |
| }, | |
| { | |
| "epoch": 0.8099346605020824, | |
| "grad_norm": 0.21490968763828278, | |
| "learning_rate": 1.876111111111111e-05, | |
| "loss": 3.1201, | |
| "step": 6624 | |
| }, | |
| { | |
| "epoch": 0.8138473883305949, | |
| "grad_norm": 0.20175087451934814, | |
| "learning_rate": 1.8583333333333332e-05, | |
| "loss": 3.1184, | |
| "step": 6656 | |
| }, | |
| { | |
| "epoch": 0.8177601161591074, | |
| "grad_norm": 0.17700786888599396, | |
| "learning_rate": 1.8405555555555556e-05, | |
| "loss": 3.1194, | |
| "step": 6688 | |
| }, | |
| { | |
| "epoch": 0.8216728439876199, | |
| "grad_norm": 0.19697381556034088, | |
| "learning_rate": 1.822777777777778e-05, | |
| "loss": 3.1208, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 0.8255855718161323, | |
| "grad_norm": 0.19516746699810028, | |
| "learning_rate": 1.805e-05, | |
| "loss": 3.122, | |
| "step": 6752 | |
| }, | |
| { | |
| "epoch": 0.8294982996446448, | |
| "grad_norm": 0.19233250617980957, | |
| "learning_rate": 1.7872222222222223e-05, | |
| "loss": 3.1237, | |
| "step": 6784 | |
| }, | |
| { | |
| "epoch": 0.8334110274731573, | |
| "grad_norm": 0.20740792155265808, | |
| "learning_rate": 1.7694444444444443e-05, | |
| "loss": 3.1227, | |
| "step": 6816 | |
| }, | |
| { | |
| "epoch": 0.8373237553016698, | |
| "grad_norm": 0.18789739906787872, | |
| "learning_rate": 1.7516666666666668e-05, | |
| "loss": 3.1198, | |
| "step": 6848 | |
| }, | |
| { | |
| "epoch": 0.8412364831301823, | |
| "grad_norm": 0.17981740832328796, | |
| "learning_rate": 1.7338888888888892e-05, | |
| "loss": 3.121, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 0.8451492109586948, | |
| "grad_norm": 0.2110264003276825, | |
| "learning_rate": 1.716111111111111e-05, | |
| "loss": 3.1186, | |
| "step": 6912 | |
| }, | |
| { | |
| "epoch": 0.8490619387872073, | |
| "grad_norm": 0.19858282804489136, | |
| "learning_rate": 1.6983333333333334e-05, | |
| "loss": 3.1236, | |
| "step": 6944 | |
| }, | |
| { | |
| "epoch": 0.8529746666157196, | |
| "grad_norm": 0.17566311359405518, | |
| "learning_rate": 1.6805555555555558e-05, | |
| "loss": 3.1225, | |
| "step": 6976 | |
| }, | |
| { | |
| "epoch": 0.8568873944442321, | |
| "grad_norm": 0.19274671375751495, | |
| "learning_rate": 1.662777777777778e-05, | |
| "loss": 3.1197, | |
| "step": 7008 | |
| }, | |
| { | |
| "epoch": 0.8608001222727446, | |
| "grad_norm": 0.20043255388736725, | |
| "learning_rate": 1.645e-05, | |
| "loss": 3.1221, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 0.8647128501012571, | |
| "grad_norm": 0.17369119822978973, | |
| "learning_rate": 1.627222222222222e-05, | |
| "loss": 3.119, | |
| "step": 7072 | |
| }, | |
| { | |
| "epoch": 0.8686255779297696, | |
| "grad_norm": 0.18795572221279144, | |
| "learning_rate": 1.6094444444444445e-05, | |
| "loss": 3.116, | |
| "step": 7104 | |
| }, | |
| { | |
| "epoch": 0.8725383057582821, | |
| "grad_norm": 0.20084317028522491, | |
| "learning_rate": 1.591666666666667e-05, | |
| "loss": 3.1164, | |
| "step": 7136 | |
| }, | |
| { | |
| "epoch": 0.8764510335867945, | |
| "grad_norm": 0.1732749342918396, | |
| "learning_rate": 1.573888888888889e-05, | |
| "loss": 3.1184, | |
| "step": 7168 | |
| }, | |
| { | |
| "epoch": 0.880363761415307, | |
| "grad_norm": 0.18775592744350433, | |
| "learning_rate": 1.556111111111111e-05, | |
| "loss": 3.1186, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.8842764892438195, | |
| "grad_norm": 0.1810338944196701, | |
| "learning_rate": 1.5383333333333332e-05, | |
| "loss": 3.1211, | |
| "step": 7232 | |
| }, | |
| { | |
| "epoch": 0.888189217072332, | |
| "grad_norm": 0.17264607548713684, | |
| "learning_rate": 1.5205555555555557e-05, | |
| "loss": 3.115, | |
| "step": 7264 | |
| }, | |
| { | |
| "epoch": 0.8921019449008445, | |
| "grad_norm": 0.18331947922706604, | |
| "learning_rate": 1.502777777777778e-05, | |
| "loss": 3.1176, | |
| "step": 7296 | |
| }, | |
| { | |
| "epoch": 0.896014672729357, | |
| "grad_norm": 0.1883401870727539, | |
| "learning_rate": 1.485e-05, | |
| "loss": 3.1194, | |
| "step": 7328 | |
| }, | |
| { | |
| "epoch": 0.8999274005578694, | |
| "grad_norm": 0.17407892644405365, | |
| "learning_rate": 1.4672222222222223e-05, | |
| "loss": 3.1188, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 0.9038401283863818, | |
| "grad_norm": 0.1941099464893341, | |
| "learning_rate": 1.4494444444444444e-05, | |
| "loss": 3.1211, | |
| "step": 7392 | |
| }, | |
| { | |
| "epoch": 0.9077528562148943, | |
| "grad_norm": 0.17381389439105988, | |
| "learning_rate": 1.4316666666666668e-05, | |
| "loss": 3.1194, | |
| "step": 7424 | |
| }, | |
| { | |
| "epoch": 0.9116655840434068, | |
| "grad_norm": 0.18369047343730927, | |
| "learning_rate": 1.413888888888889e-05, | |
| "loss": 3.1165, | |
| "step": 7456 | |
| }, | |
| { | |
| "epoch": 0.9155783118719193, | |
| "grad_norm": 0.17392371594905853, | |
| "learning_rate": 1.3961111111111111e-05, | |
| "loss": 3.1165, | |
| "step": 7488 | |
| }, | |
| { | |
| "epoch": 0.9194910397004318, | |
| "grad_norm": 0.17337463796138763, | |
| "learning_rate": 1.3783333333333334e-05, | |
| "loss": 3.1192, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 0.9234037675289443, | |
| "grad_norm": 0.1813974380493164, | |
| "learning_rate": 1.3605555555555557e-05, | |
| "loss": 3.1158, | |
| "step": 7552 | |
| }, | |
| { | |
| "epoch": 0.9273164953574567, | |
| "grad_norm": 0.1770683377981186, | |
| "learning_rate": 1.3427777777777778e-05, | |
| "loss": 3.1173, | |
| "step": 7584 | |
| }, | |
| { | |
| "epoch": 0.9312292231859692, | |
| "grad_norm": 0.18390090763568878, | |
| "learning_rate": 1.3250000000000002e-05, | |
| "loss": 3.1211, | |
| "step": 7616 | |
| }, | |
| { | |
| "epoch": 0.9351419510144817, | |
| "grad_norm": 0.17356765270233154, | |
| "learning_rate": 1.3072222222222221e-05, | |
| "loss": 3.1187, | |
| "step": 7648 | |
| }, | |
| { | |
| "epoch": 0.9390546788429942, | |
| "grad_norm": 0.173334538936615, | |
| "learning_rate": 1.2894444444444445e-05, | |
| "loss": 3.1191, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 0.9429674066715066, | |
| "grad_norm": 0.18598856031894684, | |
| "learning_rate": 1.2716666666666668e-05, | |
| "loss": 3.1192, | |
| "step": 7712 | |
| }, | |
| { | |
| "epoch": 0.9468801345000191, | |
| "grad_norm": 0.1667858213186264, | |
| "learning_rate": 1.2538888888888889e-05, | |
| "loss": 3.1173, | |
| "step": 7744 | |
| }, | |
| { | |
| "epoch": 0.9507928623285316, | |
| "grad_norm": 0.17433424293994904, | |
| "learning_rate": 1.2361111111111112e-05, | |
| "loss": 3.1184, | |
| "step": 7776 | |
| }, | |
| { | |
| "epoch": 0.954705590157044, | |
| "grad_norm": 0.1921132653951645, | |
| "learning_rate": 1.2183333333333334e-05, | |
| "loss": 3.119, | |
| "step": 7808 | |
| }, | |
| { | |
| "epoch": 0.9586183179855565, | |
| "grad_norm": 0.16437648236751556, | |
| "learning_rate": 1.2005555555555557e-05, | |
| "loss": 3.1179, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 0.962531045814069, | |
| "grad_norm": 0.17323090136051178, | |
| "learning_rate": 1.1827777777777778e-05, | |
| "loss": 3.1192, | |
| "step": 7872 | |
| }, | |
| { | |
| "epoch": 0.9664437736425815, | |
| "grad_norm": 0.16646146774291992, | |
| "learning_rate": 1.1650000000000002e-05, | |
| "loss": 3.1176, | |
| "step": 7904 | |
| }, | |
| { | |
| "epoch": 0.970356501471094, | |
| "grad_norm": 0.18198241293430328, | |
| "learning_rate": 1.1472222222222223e-05, | |
| "loss": 3.1178, | |
| "step": 7936 | |
| }, | |
| { | |
| "epoch": 0.9742692292996065, | |
| "grad_norm": 0.17490531504154205, | |
| "learning_rate": 1.1294444444444445e-05, | |
| "loss": 3.1161, | |
| "step": 7968 | |
| }, | |
| { | |
| "epoch": 0.978181957128119, | |
| "grad_norm": 0.17505322396755219, | |
| "learning_rate": 1.1116666666666666e-05, | |
| "loss": 3.1213, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.9820946849566314, | |
| "grad_norm": 0.17005711793899536, | |
| "learning_rate": 1.0938888888888889e-05, | |
| "loss": 3.1187, | |
| "step": 8032 | |
| }, | |
| { | |
| "epoch": 0.9860074127851438, | |
| "grad_norm": 0.18125712871551514, | |
| "learning_rate": 1.0761111111111112e-05, | |
| "loss": 3.12, | |
| "step": 8064 | |
| }, | |
| { | |
| "epoch": 0.9899201406136563, | |
| "grad_norm": 0.17013822495937347, | |
| "learning_rate": 1.0583333333333334e-05, | |
| "loss": 3.1157, | |
| "step": 8096 | |
| }, | |
| { | |
| "epoch": 0.9938328684421688, | |
| "grad_norm": 0.1698048710823059, | |
| "learning_rate": 1.0405555555555555e-05, | |
| "loss": 3.1172, | |
| "step": 8128 | |
| }, | |
| { | |
| "epoch": 0.9977455962706813, | |
| "grad_norm": 0.17143802344799042, | |
| "learning_rate": 1.0227777777777778e-05, | |
| "loss": 3.1153, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 1.0015895456803332, | |
| "grad_norm": 0.1739780455827713, | |
| "learning_rate": 1.005e-05, | |
| "loss": 3.1163, | |
| "step": 8192 | |
| }, | |
| { | |
| "epoch": 1.0055022735088457, | |
| "grad_norm": 0.17907440662384033, | |
| "learning_rate": 9.872222222222223e-06, | |
| "loss": 3.1143, | |
| "step": 8224 | |
| }, | |
| { | |
| "epoch": 1.0094150013373582, | |
| "grad_norm": 0.17365169525146484, | |
| "learning_rate": 9.694444444444446e-06, | |
| "loss": 3.1157, | |
| "step": 8256 | |
| }, | |
| { | |
| "epoch": 1.0133277291658707, | |
| "grad_norm": 0.1645737588405609, | |
| "learning_rate": 9.516666666666666e-06, | |
| "loss": 3.1134, | |
| "step": 8288 | |
| }, | |
| { | |
| "epoch": 1.0172404569943831, | |
| "grad_norm": 0.15174245834350586, | |
| "learning_rate": 9.338888888888889e-06, | |
| "loss": 3.1142, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 1.0211531848228956, | |
| "grad_norm": 0.16984011232852936, | |
| "learning_rate": 9.161111111111112e-06, | |
| "loss": 3.1142, | |
| "step": 8352 | |
| }, | |
| { | |
| "epoch": 1.0250659126514081, | |
| "grad_norm": 0.1772463321685791, | |
| "learning_rate": 8.983333333333334e-06, | |
| "loss": 3.1178, | |
| "step": 8384 | |
| }, | |
| { | |
| "epoch": 1.0289786404799206, | |
| "grad_norm": 0.16304141283035278, | |
| "learning_rate": 8.805555555555555e-06, | |
| "loss": 3.113, | |
| "step": 8416 | |
| }, | |
| { | |
| "epoch": 1.032891368308433, | |
| "grad_norm": 0.15513816475868225, | |
| "learning_rate": 8.627777777777778e-06, | |
| "loss": 3.1145, | |
| "step": 8448 | |
| }, | |
| { | |
| "epoch": 1.0368040961369456, | |
| "grad_norm": 0.1862088292837143, | |
| "learning_rate": 8.45e-06, | |
| "loss": 3.1109, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 1.0407168239654578, | |
| "grad_norm": 0.17995817959308624, | |
| "learning_rate": 8.272222222222223e-06, | |
| "loss": 3.1128, | |
| "step": 8512 | |
| }, | |
| { | |
| "epoch": 1.0446295517939703, | |
| "grad_norm": 0.1758676916360855, | |
| "learning_rate": 8.094444444444444e-06, | |
| "loss": 3.1128, | |
| "step": 8544 | |
| }, | |
| { | |
| "epoch": 1.0485422796224828, | |
| "grad_norm": 0.16609688103199005, | |
| "learning_rate": 7.916666666666667e-06, | |
| "loss": 3.114, | |
| "step": 8576 | |
| }, | |
| { | |
| "epoch": 1.0524550074509953, | |
| "grad_norm": 0.15258896350860596, | |
| "learning_rate": 7.738888888888889e-06, | |
| "loss": 3.1171, | |
| "step": 8608 | |
| }, | |
| { | |
| "epoch": 1.0563677352795078, | |
| "grad_norm": 0.16240954399108887, | |
| "learning_rate": 7.561111111111112e-06, | |
| "loss": 3.113, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 1.0602804631080203, | |
| "grad_norm": 0.16423362493515015, | |
| "learning_rate": 7.3833333333333335e-06, | |
| "loss": 3.1154, | |
| "step": 8672 | |
| }, | |
| { | |
| "epoch": 1.0641931909365328, | |
| "grad_norm": 0.17032068967819214, | |
| "learning_rate": 7.205555555555555e-06, | |
| "loss": 3.1146, | |
| "step": 8704 | |
| }, | |
| { | |
| "epoch": 1.0681059187650452, | |
| "grad_norm": 0.1564359813928604, | |
| "learning_rate": 7.027777777777778e-06, | |
| "loss": 3.1162, | |
| "step": 8736 | |
| }, | |
| { | |
| "epoch": 1.0720186465935577, | |
| "grad_norm": 0.15838623046875, | |
| "learning_rate": 6.8500000000000005e-06, | |
| "loss": 3.113, | |
| "step": 8768 | |
| }, | |
| { | |
| "epoch": 1.0759313744220702, | |
| "grad_norm": 0.17325465381145477, | |
| "learning_rate": 6.672222222222223e-06, | |
| "loss": 3.1153, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 1.0798441022505827, | |
| "grad_norm": 0.16170760989189148, | |
| "learning_rate": 6.494444444444445e-06, | |
| "loss": 3.115, | |
| "step": 8832 | |
| }, | |
| { | |
| "epoch": 1.0837568300790952, | |
| "grad_norm": 0.15591956675052643, | |
| "learning_rate": 6.316666666666667e-06, | |
| "loss": 3.1088, | |
| "step": 8864 | |
| }, | |
| { | |
| "epoch": 1.0876695579076077, | |
| "grad_norm": 0.15115121006965637, | |
| "learning_rate": 6.138888888888889e-06, | |
| "loss": 3.1103, | |
| "step": 8896 | |
| }, | |
| { | |
| "epoch": 1.0915822857361202, | |
| "grad_norm": 0.1577509045600891, | |
| "learning_rate": 5.961111111111111e-06, | |
| "loss": 3.112, | |
| "step": 8928 | |
| }, | |
| { | |
| "epoch": 1.0954950135646326, | |
| "grad_norm": 0.1545899361371994, | |
| "learning_rate": 5.783333333333334e-06, | |
| "loss": 3.1108, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 1.0994077413931451, | |
| "grad_norm": 0.1597297489643097, | |
| "learning_rate": 5.605555555555555e-06, | |
| "loss": 3.1172, | |
| "step": 8992 | |
| }, | |
| { | |
| "epoch": 1.1033204692216576, | |
| "grad_norm": 0.16016387939453125, | |
| "learning_rate": 5.427777777777778e-06, | |
| "loss": 3.1156, | |
| "step": 9024 | |
| }, | |
| { | |
| "epoch": 1.10723319705017, | |
| "grad_norm": 0.15304987132549286, | |
| "learning_rate": 5.25e-06, | |
| "loss": 3.1126, | |
| "step": 9056 | |
| }, | |
| { | |
| "epoch": 1.1111459248786826, | |
| "grad_norm": 0.1560225784778595, | |
| "learning_rate": 5.072222222222222e-06, | |
| "loss": 3.1152, | |
| "step": 9088 | |
| }, | |
| { | |
| "epoch": 1.115058652707195, | |
| "grad_norm": 0.16613492369651794, | |
| "learning_rate": 4.894444444444445e-06, | |
| "loss": 3.1147, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 1.1189713805357075, | |
| "grad_norm": 0.15055406093597412, | |
| "learning_rate": 4.7166666666666675e-06, | |
| "loss": 3.1116, | |
| "step": 9152 | |
| }, | |
| { | |
| "epoch": 1.12288410836422, | |
| "grad_norm": 0.16280752420425415, | |
| "learning_rate": 4.538888888888889e-06, | |
| "loss": 3.1148, | |
| "step": 9184 | |
| }, | |
| { | |
| "epoch": 1.1267968361927325, | |
| "grad_norm": 0.1523207277059555, | |
| "learning_rate": 4.361111111111112e-06, | |
| "loss": 3.1133, | |
| "step": 9216 | |
| }, | |
| { | |
| "epoch": 1.1307095640212448, | |
| "grad_norm": 0.1500737965106964, | |
| "learning_rate": 4.183333333333334e-06, | |
| "loss": 3.1177, | |
| "step": 9248 | |
| }, | |
| { | |
| "epoch": 1.1346222918497573, | |
| "grad_norm": 0.16134943068027496, | |
| "learning_rate": 4.005555555555555e-06, | |
| "loss": 3.1143, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 1.1385350196782698, | |
| "grad_norm": 0.1499546766281128, | |
| "learning_rate": 3.827777777777778e-06, | |
| "loss": 3.1133, | |
| "step": 9312 | |
| }, | |
| { | |
| "epoch": 1.1424477475067822, | |
| "grad_norm": 0.15620845556259155, | |
| "learning_rate": 3.6499999999999998e-06, | |
| "loss": 3.1122, | |
| "step": 9344 | |
| }, | |
| { | |
| "epoch": 1.1463604753352947, | |
| "grad_norm": 0.15544985234737396, | |
| "learning_rate": 3.4722222222222224e-06, | |
| "loss": 3.1146, | |
| "step": 9376 | |
| }, | |
| { | |
| "epoch": 1.1502732031638072, | |
| "grad_norm": 0.15928788483142853, | |
| "learning_rate": 3.2944444444444446e-06, | |
| "loss": 3.1123, | |
| "step": 9408 | |
| }, | |
| { | |
| "epoch": 1.1541859309923197, | |
| "grad_norm": 0.14999979734420776, | |
| "learning_rate": 3.1166666666666668e-06, | |
| "loss": 3.1149, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 1.1580986588208322, | |
| "grad_norm": 0.15014442801475525, | |
| "learning_rate": 2.938888888888889e-06, | |
| "loss": 3.1113, | |
| "step": 9472 | |
| }, | |
| { | |
| "epoch": 1.1620113866493447, | |
| "grad_norm": 0.14749625325202942, | |
| "learning_rate": 2.761111111111111e-06, | |
| "loss": 3.113, | |
| "step": 9504 | |
| }, | |
| { | |
| "epoch": 1.1659241144778572, | |
| "grad_norm": 0.14931970834732056, | |
| "learning_rate": 2.5833333333333333e-06, | |
| "loss": 3.1144, | |
| "step": 9536 | |
| }, | |
| { | |
| "epoch": 1.1698368423063696, | |
| "grad_norm": 0.14572674036026, | |
| "learning_rate": 2.4055555555555555e-06, | |
| "loss": 3.1093, | |
| "step": 9568 | |
| }, | |
| { | |
| "epoch": 1.1737495701348821, | |
| "grad_norm": 0.15361888706684113, | |
| "learning_rate": 2.227777777777778e-06, | |
| "loss": 3.1138, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 1.1776622979633946, | |
| "grad_norm": 0.1433536857366562, | |
| "learning_rate": 2.0500000000000003e-06, | |
| "loss": 3.1123, | |
| "step": 9632 | |
| }, | |
| { | |
| "epoch": 1.181575025791907, | |
| "grad_norm": 0.14533208310604095, | |
| "learning_rate": 1.8722222222222225e-06, | |
| "loss": 3.1116, | |
| "step": 9664 | |
| }, | |
| { | |
| "epoch": 1.1854877536204196, | |
| "grad_norm": 0.14816279709339142, | |
| "learning_rate": 1.6944444444444446e-06, | |
| "loss": 3.1128, | |
| "step": 9696 | |
| }, | |
| { | |
| "epoch": 1.189400481448932, | |
| "grad_norm": 0.14798638224601746, | |
| "learning_rate": 1.5166666666666668e-06, | |
| "loss": 3.116, | |
| "step": 9728 | |
| }, | |
| { | |
| "epoch": 1.1933132092774446, | |
| "grad_norm": 0.1386597454547882, | |
| "learning_rate": 1.338888888888889e-06, | |
| "loss": 3.1145, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 1.197225937105957, | |
| "grad_norm": 0.14148685336112976, | |
| "learning_rate": 1.161111111111111e-06, | |
| "loss": 3.1115, | |
| "step": 9792 | |
| }, | |
| { | |
| "epoch": 1.2011386649344695, | |
| "grad_norm": 0.14324016869068146, | |
| "learning_rate": 9.833333333333334e-07, | |
| "loss": 3.1117, | |
| "step": 9824 | |
| }, | |
| { | |
| "epoch": 1.205051392762982, | |
| "grad_norm": 0.14499281346797943, | |
| "learning_rate": 8.055555555555556e-07, | |
| "loss": 3.1129, | |
| "step": 9856 | |
| }, | |
| { | |
| "epoch": 1.2089641205914945, | |
| "grad_norm": 0.1464635133743286, | |
| "learning_rate": 6.277777777777778e-07, | |
| "loss": 3.1169, | |
| "step": 9888 | |
| }, | |
| { | |
| "epoch": 1.2128768484200068, | |
| "grad_norm": 0.14767299592494965, | |
| "learning_rate": 4.5e-07, | |
| "loss": 3.1131, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 1.2167895762485195, | |
| "grad_norm": 0.14456725120544434, | |
| "learning_rate": 2.722222222222222e-07, | |
| "loss": 3.116, | |
| "step": 9952 | |
| }, | |
| { | |
| "epoch": 1.2207023040770317, | |
| "grad_norm": 0.1386868953704834, | |
| "learning_rate": 9.444444444444445e-08, | |
| "loss": 3.1151, | |
| "step": 9984 | |
| }, | |
| { | |
| "epoch": 1.222658667991288, | |
| "step": 10000, | |
| "total_flos": 8.246852548747592e+18, | |
| "train_loss": 1.5593041332244872, | |
| "train_runtime": 85792.9956, | |
| "train_samples_per_second": 238.714, | |
| "train_steps_per_second": 0.117 | |
| } | |
| ], | |
| "logging_steps": 32, | |
| "max_steps": 10000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.246852548747592e+18, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |