| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 10.0, |
| "eval_steps": 500, |
| "global_step": 2490, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.040160642570281124, |
| "grad_norm": 3.447462558746338, |
| "learning_rate": 7.2e-06, |
| "loss": 0.5292, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.08032128514056225, |
| "grad_norm": 1.379734754562378, |
| "learning_rate": 1.52e-05, |
| "loss": 0.2833, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.12048192771084337, |
| "grad_norm": 1.1571083068847656, |
| "learning_rate": 2.32e-05, |
| "loss": 0.2114, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.1606425702811245, |
| "grad_norm": 1.3929016590118408, |
| "learning_rate": 3.12e-05, |
| "loss": 0.2085, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.20080321285140562, |
| "grad_norm": 1.096237301826477, |
| "learning_rate": 3.9200000000000004e-05, |
| "loss": 0.1772, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.24096385542168675, |
| "grad_norm": 0.8818596601486206, |
| "learning_rate": 4.72e-05, |
| "loss": 0.1577, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.28112449799196787, |
| "grad_norm": 0.775509774684906, |
| "learning_rate": 5.520000000000001e-05, |
| "loss": 0.1603, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.321285140562249, |
| "grad_norm": 1.0346589088439941, |
| "learning_rate": 6.32e-05, |
| "loss": 0.1372, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.3614457831325301, |
| "grad_norm": 0.8337526917457581, |
| "learning_rate": 7.12e-05, |
| "loss": 0.1263, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.40160642570281124, |
| "grad_norm": 0.9970714449882507, |
| "learning_rate": 7.920000000000001e-05, |
| "loss": 0.1232, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.44176706827309237, |
| "grad_norm": 0.47735488414764404, |
| "learning_rate": 8.72e-05, |
| "loss": 0.113, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.4819277108433735, |
| "grad_norm": 1.0498414039611816, |
| "learning_rate": 9.52e-05, |
| "loss": 0.105, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5220883534136547, |
| "grad_norm": 0.5586184859275818, |
| "learning_rate": 9.999929417599468e-05, |
| "loss": 0.1085, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5622489959839357, |
| "grad_norm": 0.7290149927139282, |
| "learning_rate": 9.999135388478797e-05, |
| "loss": 0.1037, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6024096385542169, |
| "grad_norm": 0.9249761700630188, |
| "learning_rate": 9.997459242813312e-05, |
| "loss": 0.085, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.642570281124498, |
| "grad_norm": 0.6690048575401306, |
| "learning_rate": 9.994901276365323e-05, |
| "loss": 0.0865, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.6827309236947792, |
| "grad_norm": 0.4290846288204193, |
| "learning_rate": 9.991461940497786e-05, |
| "loss": 0.0848, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.7228915662650602, |
| "grad_norm": 0.6786843538284302, |
| "learning_rate": 9.987141842094658e-05, |
| "loss": 0.089, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.7630522088353414, |
| "grad_norm": 0.9659805297851562, |
| "learning_rate": 9.981941743453815e-05, |
| "loss": 0.0819, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.8032128514056225, |
| "grad_norm": 0.7636594772338867, |
| "learning_rate": 9.975862562152532e-05, |
| "loss": 0.0878, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.8433734939759037, |
| "grad_norm": 0.6373817324638367, |
| "learning_rate": 9.968905370885586e-05, |
| "loss": 0.0767, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.8835341365461847, |
| "grad_norm": 0.7489560842514038, |
| "learning_rate": 9.961071397275963e-05, |
| "loss": 0.0724, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.9236947791164659, |
| "grad_norm": 0.588756263256073, |
| "learning_rate": 9.952362023658249e-05, |
| "loss": 0.0685, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.963855421686747, |
| "grad_norm": 0.4825253486633301, |
| "learning_rate": 9.9427787868347e-05, |
| "loss": 0.0705, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.0040160642570282, |
| "grad_norm": 0.4361709654331207, |
| "learning_rate": 9.93232337780408e-05, |
| "loss": 0.0692, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.0441767068273093, |
| "grad_norm": 0.6188638210296631, |
| "learning_rate": 9.920997641463272e-05, |
| "loss": 0.0682, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.0843373493975903, |
| "grad_norm": 0.4582713842391968, |
| "learning_rate": 9.908803576281736e-05, |
| "loss": 0.0731, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.1244979919678715, |
| "grad_norm": 0.6378023028373718, |
| "learning_rate": 9.895743333948874e-05, |
| "loss": 0.0832, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.1646586345381527, |
| "grad_norm": 0.45142269134521484, |
| "learning_rate": 9.881819218994365e-05, |
| "loss": 0.0707, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.2048192771084336, |
| "grad_norm": 0.5397023558616638, |
| "learning_rate": 9.867033688381502e-05, |
| "loss": 0.0747, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.2449799196787148, |
| "grad_norm": 0.8213003277778625, |
| "learning_rate": 9.85138935107367e-05, |
| "loss": 0.0691, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.285140562248996, |
| "grad_norm": 0.7952361106872559, |
| "learning_rate": 9.834888967573976e-05, |
| "loss": 0.0689, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.3253012048192772, |
| "grad_norm": 0.8249073624610901, |
| "learning_rate": 9.817535449438148e-05, |
| "loss": 0.075, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.3654618473895583, |
| "grad_norm": 0.7501718997955322, |
| "learning_rate": 9.799331858760786e-05, |
| "loss": 0.0657, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.4056224899598393, |
| "grad_norm": 0.5428710579872131, |
| "learning_rate": 9.78028140763503e-05, |
| "loss": 0.0657, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.4457831325301205, |
| "grad_norm": 0.6559877395629883, |
| "learning_rate": 9.76038745758579e-05, |
| "loss": 0.066, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.4859437751004017, |
| "grad_norm": 0.566528856754303, |
| "learning_rate": 9.739653518976581e-05, |
| "loss": 0.0614, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.5261044176706826, |
| "grad_norm": 0.4224630892276764, |
| "learning_rate": 9.718083250390113e-05, |
| "loss": 0.0662, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.5662650602409638, |
| "grad_norm": 0.5740475058555603, |
| "learning_rate": 9.695680457982713e-05, |
| "loss": 0.065, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.606425702811245, |
| "grad_norm": 0.4563257098197937, |
| "learning_rate": 9.67244909481272e-05, |
| "loss": 0.0672, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.6465863453815262, |
| "grad_norm": 0.5204518437385559, |
| "learning_rate": 9.648393260142948e-05, |
| "loss": 0.0567, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.6867469879518073, |
| "grad_norm": 0.47403523325920105, |
| "learning_rate": 9.623517198717362e-05, |
| "loss": 0.0572, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.7269076305220885, |
| "grad_norm": 0.6863958835601807, |
| "learning_rate": 9.597825300012073e-05, |
| "loss": 0.0616, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.7670682730923695, |
| "grad_norm": 0.4068869352340698, |
| "learning_rate": 9.571322097460793e-05, |
| "loss": 0.0689, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.8072289156626506, |
| "grad_norm": 0.43284496665000916, |
| "learning_rate": 9.544012267654901e-05, |
| "loss": 0.0633, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.8473895582329316, |
| "grad_norm": 0.9921565651893616, |
| "learning_rate": 9.51590062951824e-05, |
| "loss": 0.0653, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.8875502008032128, |
| "grad_norm": 0.6336984634399414, |
| "learning_rate": 9.486992143456792e-05, |
| "loss": 0.0622, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.927710843373494, |
| "grad_norm": 0.48028799891471863, |
| "learning_rate": 9.457291910483409e-05, |
| "loss": 0.0578, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.9678714859437751, |
| "grad_norm": 0.6273623108863831, |
| "learning_rate": 9.426805171317701e-05, |
| "loss": 0.0546, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.0080321285140563, |
| "grad_norm": 0.7483130097389221, |
| "learning_rate": 9.395537305461311e-05, |
| "loss": 0.0505, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.0481927710843375, |
| "grad_norm": 0.5147470235824585, |
| "learning_rate": 9.363493830248666e-05, |
| "loss": 0.0557, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.0883534136546187, |
| "grad_norm": 0.433223694562912, |
| "learning_rate": 9.33068039987343e-05, |
| "loss": 0.051, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.1285140562248994, |
| "grad_norm": 0.440390408039093, |
| "learning_rate": 9.297102804390798e-05, |
| "loss": 0.0571, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.1686746987951806, |
| "grad_norm": 0.6565499305725098, |
| "learning_rate": 9.26276696869582e-05, |
| "loss": 0.0523, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.208835341365462, |
| "grad_norm": 0.4944412112236023, |
| "learning_rate": 9.227678951477925e-05, |
| "loss": 0.0532, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.248995983935743, |
| "grad_norm": 0.48403704166412354, |
| "learning_rate": 9.19184494415185e-05, |
| "loss": 0.0575, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.289156626506024, |
| "grad_norm": 0.6252164244651794, |
| "learning_rate": 9.15527126976514e-05, |
| "loss": 0.0541, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.3293172690763053, |
| "grad_norm": 0.48273801803588867, |
| "learning_rate": 9.117964381882413e-05, |
| "loss": 0.0518, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.3694779116465865, |
| "grad_norm": 0.7384123206138611, |
| "learning_rate": 9.079930863446612e-05, |
| "loss": 0.0589, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.4096385542168672, |
| "grad_norm": 0.48452362418174744, |
| "learning_rate": 9.041177425617427e-05, |
| "loss": 0.0567, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.4497991967871484, |
| "grad_norm": 0.46295881271362305, |
| "learning_rate": 9.001710906587064e-05, |
| "loss": 0.0561, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.4899598393574296, |
| "grad_norm": 0.49404817819595337, |
| "learning_rate": 8.961538270373639e-05, |
| "loss": 0.0517, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.5301204819277108, |
| "grad_norm": 0.4810844361782074, |
| "learning_rate": 8.920666605592341e-05, |
| "loss": 0.0554, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.570281124497992, |
| "grad_norm": 0.3409781754016876, |
| "learning_rate": 8.879103124204626e-05, |
| "loss": 0.0509, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.610441767068273, |
| "grad_norm": 0.661351203918457, |
| "learning_rate": 8.836855160245629e-05, |
| "loss": 0.0575, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.6506024096385543, |
| "grad_norm": 0.4867478013038635, |
| "learning_rate": 8.79393016853005e-05, |
| "loss": 0.055, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.6907630522088355, |
| "grad_norm": 0.8102213144302368, |
| "learning_rate": 8.750335723336728e-05, |
| "loss": 0.0554, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.7309236947791167, |
| "grad_norm": 0.5746546387672424, |
| "learning_rate": 8.706079517072127e-05, |
| "loss": 0.0529, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.7710843373493974, |
| "grad_norm": 0.6538225412368774, |
| "learning_rate": 8.661169358912978e-05, |
| "loss": 0.0473, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.8112449799196786, |
| "grad_norm": 0.5305560231208801, |
| "learning_rate": 8.615613173428321e-05, |
| "loss": 0.0613, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.8514056224899598, |
| "grad_norm": 0.6382079124450684, |
| "learning_rate": 8.569418999181194e-05, |
| "loss": 0.0477, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.891566265060241, |
| "grad_norm": 0.5018407106399536, |
| "learning_rate": 8.522594987310184e-05, |
| "loss": 0.0454, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.931726907630522, |
| "grad_norm": 0.3260187804698944, |
| "learning_rate": 8.475149400091137e-05, |
| "loss": 0.0499, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.9718875502008033, |
| "grad_norm": 0.39027342200279236, |
| "learning_rate": 8.427090609479245e-05, |
| "loss": 0.0528, |
| "step": 740 |
| }, |
| { |
| "epoch": 3.0120481927710845, |
| "grad_norm": 0.5888795256614685, |
| "learning_rate": 8.378427095631776e-05, |
| "loss": 0.0521, |
| "step": 750 |
| }, |
| { |
| "epoch": 3.0522088353413657, |
| "grad_norm": 0.6883623003959656, |
| "learning_rate": 8.329167445411732e-05, |
| "loss": 0.0449, |
| "step": 760 |
| }, |
| { |
| "epoch": 3.0923694779116464, |
| "grad_norm": 0.6276723742485046, |
| "learning_rate": 8.279320350872655e-05, |
| "loss": 0.0485, |
| "step": 770 |
| }, |
| { |
| "epoch": 3.1325301204819276, |
| "grad_norm": 0.48254498839378357, |
| "learning_rate": 8.228894607724878e-05, |
| "loss": 0.0473, |
| "step": 780 |
| }, |
| { |
| "epoch": 3.1726907630522088, |
| "grad_norm": 0.34066540002822876, |
| "learning_rate": 8.177899113783492e-05, |
| "loss": 0.0542, |
| "step": 790 |
| }, |
| { |
| "epoch": 3.21285140562249, |
| "grad_norm": 0.4851544499397278, |
| "learning_rate": 8.126342867398301e-05, |
| "loss": 0.0491, |
| "step": 800 |
| }, |
| { |
| "epoch": 3.253012048192771, |
| "grad_norm": 0.5018098950386047, |
| "learning_rate": 8.074234965866012e-05, |
| "loss": 0.0467, |
| "step": 810 |
| }, |
| { |
| "epoch": 3.2931726907630523, |
| "grad_norm": 0.47141626477241516, |
| "learning_rate": 8.021584603824996e-05, |
| "loss": 0.0497, |
| "step": 820 |
| }, |
| { |
| "epoch": 3.3333333333333335, |
| "grad_norm": 0.5625479221343994, |
| "learning_rate": 7.968401071632855e-05, |
| "loss": 0.0515, |
| "step": 830 |
| }, |
| { |
| "epoch": 3.3734939759036147, |
| "grad_norm": 0.38696932792663574, |
| "learning_rate": 7.914693753727091e-05, |
| "loss": 0.05, |
| "step": 840 |
| }, |
| { |
| "epoch": 3.4136546184738954, |
| "grad_norm": 0.4147994816303253, |
| "learning_rate": 7.860472126969213e-05, |
| "loss": 0.0514, |
| "step": 850 |
| }, |
| { |
| "epoch": 3.4538152610441766, |
| "grad_norm": 0.38693082332611084, |
| "learning_rate": 7.805745758972481e-05, |
| "loss": 0.0504, |
| "step": 860 |
| }, |
| { |
| "epoch": 3.4939759036144578, |
| "grad_norm": 0.3496777415275574, |
| "learning_rate": 7.75052430641368e-05, |
| "loss": 0.0458, |
| "step": 870 |
| }, |
| { |
| "epoch": 3.534136546184739, |
| "grad_norm": 0.3338903784751892, |
| "learning_rate": 7.694817513329159e-05, |
| "loss": 0.0413, |
| "step": 880 |
| }, |
| { |
| "epoch": 3.57429718875502, |
| "grad_norm": 0.4792289137840271, |
| "learning_rate": 7.638635209395453e-05, |
| "loss": 0.0461, |
| "step": 890 |
| }, |
| { |
| "epoch": 3.6144578313253013, |
| "grad_norm": 0.3006054162979126, |
| "learning_rate": 7.58198730819481e-05, |
| "loss": 0.0402, |
| "step": 900 |
| }, |
| { |
| "epoch": 3.6546184738955825, |
| "grad_norm": 0.2644276022911072, |
| "learning_rate": 7.524883805465888e-05, |
| "loss": 0.0386, |
| "step": 910 |
| }, |
| { |
| "epoch": 3.694779116465863, |
| "grad_norm": 0.4371125400066376, |
| "learning_rate": 7.467334777339985e-05, |
| "loss": 0.0411, |
| "step": 920 |
| }, |
| { |
| "epoch": 3.734939759036145, |
| "grad_norm": 0.5912031531333923, |
| "learning_rate": 7.409350378563046e-05, |
| "loss": 0.0474, |
| "step": 930 |
| }, |
| { |
| "epoch": 3.7751004016064256, |
| "grad_norm": 0.29996731877326965, |
| "learning_rate": 7.350940840703842e-05, |
| "loss": 0.0522, |
| "step": 940 |
| }, |
| { |
| "epoch": 3.8152610441767068, |
| "grad_norm": 0.49531587958335876, |
| "learning_rate": 7.292116470348554e-05, |
| "loss": 0.0514, |
| "step": 950 |
| }, |
| { |
| "epoch": 3.855421686746988, |
| "grad_norm": 0.44657862186431885, |
| "learning_rate": 7.232887647282147e-05, |
| "loss": 0.0432, |
| "step": 960 |
| }, |
| { |
| "epoch": 3.895582329317269, |
| "grad_norm": 0.36731448769569397, |
| "learning_rate": 7.173264822656806e-05, |
| "loss": 0.0468, |
| "step": 970 |
| }, |
| { |
| "epoch": 3.9357429718875503, |
| "grad_norm": 0.6286973357200623, |
| "learning_rate": 7.113258517147801e-05, |
| "loss": 0.048, |
| "step": 980 |
| }, |
| { |
| "epoch": 3.9759036144578315, |
| "grad_norm": 0.41374683380126953, |
| "learning_rate": 7.052879319097072e-05, |
| "loss": 0.0478, |
| "step": 990 |
| }, |
| { |
| "epoch": 4.016064257028113, |
| "grad_norm": 0.4046255052089691, |
| "learning_rate": 6.992137882644868e-05, |
| "loss": 0.044, |
| "step": 1000 |
| }, |
| { |
| "epoch": 4.056224899598393, |
| "grad_norm": 0.47352728247642517, |
| "learning_rate": 6.931044925849789e-05, |
| "loss": 0.0487, |
| "step": 1010 |
| }, |
| { |
| "epoch": 4.096385542168675, |
| "grad_norm": 0.445796936750412, |
| "learning_rate": 6.869611228797546e-05, |
| "loss": 0.0432, |
| "step": 1020 |
| }, |
| { |
| "epoch": 4.136546184738956, |
| "grad_norm": 0.5685417652130127, |
| "learning_rate": 6.807847631698769e-05, |
| "loss": 0.0423, |
| "step": 1030 |
| }, |
| { |
| "epoch": 4.176706827309237, |
| "grad_norm": 0.31597232818603516, |
| "learning_rate": 6.745765032976214e-05, |
| "loss": 0.0415, |
| "step": 1040 |
| }, |
| { |
| "epoch": 4.216867469879518, |
| "grad_norm": 0.3107571005821228, |
| "learning_rate": 6.683374387341687e-05, |
| "loss": 0.0428, |
| "step": 1050 |
| }, |
| { |
| "epoch": 4.257028112449799, |
| "grad_norm": 0.5054974555969238, |
| "learning_rate": 6.620686703863054e-05, |
| "loss": 0.04, |
| "step": 1060 |
| }, |
| { |
| "epoch": 4.2971887550200805, |
| "grad_norm": 0.3022196590900421, |
| "learning_rate": 6.557713044021642e-05, |
| "loss": 0.0408, |
| "step": 1070 |
| }, |
| { |
| "epoch": 4.337349397590361, |
| "grad_norm": 0.39395079016685486, |
| "learning_rate": 6.494464519760401e-05, |
| "loss": 0.0455, |
| "step": 1080 |
| }, |
| { |
| "epoch": 4.377510040160643, |
| "grad_norm": 0.3358634114265442, |
| "learning_rate": 6.430952291523158e-05, |
| "loss": 0.0362, |
| "step": 1090 |
| }, |
| { |
| "epoch": 4.417670682730924, |
| "grad_norm": 0.39022529125213623, |
| "learning_rate": 6.367187566285314e-05, |
| "loss": 0.0443, |
| "step": 1100 |
| }, |
| { |
| "epoch": 4.457831325301205, |
| "grad_norm": 0.3976607620716095, |
| "learning_rate": 6.303181595576328e-05, |
| "loss": 0.0421, |
| "step": 1110 |
| }, |
| { |
| "epoch": 4.497991967871486, |
| "grad_norm": 0.47239330410957336, |
| "learning_rate": 6.238945673494354e-05, |
| "loss": 0.044, |
| "step": 1120 |
| }, |
| { |
| "epoch": 4.538152610441767, |
| "grad_norm": 0.23172323405742645, |
| "learning_rate": 6.174491134713332e-05, |
| "loss": 0.044, |
| "step": 1130 |
| }, |
| { |
| "epoch": 4.578313253012048, |
| "grad_norm": 0.5892140865325928, |
| "learning_rate": 6.109829352482964e-05, |
| "loss": 0.0428, |
| "step": 1140 |
| }, |
| { |
| "epoch": 4.618473895582329, |
| "grad_norm": 0.9103882908821106, |
| "learning_rate": 6.044971736621842e-05, |
| "loss": 0.0433, |
| "step": 1150 |
| }, |
| { |
| "epoch": 4.658634538152611, |
| "grad_norm": 0.3588651716709137, |
| "learning_rate": 5.979929731504158e-05, |
| "loss": 0.0423, |
| "step": 1160 |
| }, |
| { |
| "epoch": 4.698795180722891, |
| "grad_norm": 0.5172322988510132, |
| "learning_rate": 5.91471481404029e-05, |
| "loss": 0.042, |
| "step": 1170 |
| }, |
| { |
| "epoch": 4.738955823293173, |
| "grad_norm": 0.38006845116615295, |
| "learning_rate": 5.849338491651661e-05, |
| "loss": 0.0394, |
| "step": 1180 |
| }, |
| { |
| "epoch": 4.779116465863454, |
| "grad_norm": 0.3825148046016693, |
| "learning_rate": 5.783812300240209e-05, |
| "loss": 0.0355, |
| "step": 1190 |
| }, |
| { |
| "epoch": 4.8192771084337345, |
| "grad_norm": 0.6012735366821289, |
| "learning_rate": 5.718147802152833e-05, |
| "loss": 0.0431, |
| "step": 1200 |
| }, |
| { |
| "epoch": 4.859437751004016, |
| "grad_norm": 0.41699841618537903, |
| "learning_rate": 5.652356584141177e-05, |
| "loss": 0.0364, |
| "step": 1210 |
| }, |
| { |
| "epoch": 4.899598393574297, |
| "grad_norm": 0.3592054843902588, |
| "learning_rate": 5.586450255317097e-05, |
| "loss": 0.036, |
| "step": 1220 |
| }, |
| { |
| "epoch": 4.9397590361445785, |
| "grad_norm": 0.5711321234703064, |
| "learning_rate": 5.5204404451041894e-05, |
| "loss": 0.0366, |
| "step": 1230 |
| }, |
| { |
| "epoch": 4.979919678714859, |
| "grad_norm": 0.44127699732780457, |
| "learning_rate": 5.4543388011857456e-05, |
| "loss": 0.0316, |
| "step": 1240 |
| }, |
| { |
| "epoch": 5.020080321285141, |
| "grad_norm": 0.4283978044986725, |
| "learning_rate": 5.388156987449454e-05, |
| "loss": 0.0467, |
| "step": 1250 |
| }, |
| { |
| "epoch": 5.0602409638554215, |
| "grad_norm": 0.3291616439819336, |
| "learning_rate": 5.321906681929284e-05, |
| "loss": 0.0411, |
| "step": 1260 |
| }, |
| { |
| "epoch": 5.100401606425703, |
| "grad_norm": 0.2523420751094818, |
| "learning_rate": 5.2555995747448364e-05, |
| "loss": 0.0415, |
| "step": 1270 |
| }, |
| { |
| "epoch": 5.140562248995984, |
| "grad_norm": 0.5164968967437744, |
| "learning_rate": 5.189247366038583e-05, |
| "loss": 0.0381, |
| "step": 1280 |
| }, |
| { |
| "epoch": 5.180722891566265, |
| "grad_norm": 0.2963825464248657, |
| "learning_rate": 5.1228617639113355e-05, |
| "loss": 0.0391, |
| "step": 1290 |
| }, |
| { |
| "epoch": 5.220883534136546, |
| "grad_norm": 0.39251625537872314, |
| "learning_rate": 5.0564544823562945e-05, |
| "loss": 0.0397, |
| "step": 1300 |
| }, |
| { |
| "epoch": 5.261044176706827, |
| "grad_norm": 1.2152100801467896, |
| "learning_rate": 4.9900372391920875e-05, |
| "loss": 0.0366, |
| "step": 1310 |
| }, |
| { |
| "epoch": 5.301204819277109, |
| "grad_norm": 0.35082605481147766, |
| "learning_rate": 4.923621753995099e-05, |
| "loss": 0.0312, |
| "step": 1320 |
| }, |
| { |
| "epoch": 5.341365461847389, |
| "grad_norm": 0.505351722240448, |
| "learning_rate": 4.85721974603152e-05, |
| "loss": 0.036, |
| "step": 1330 |
| }, |
| { |
| "epoch": 5.381526104417671, |
| "grad_norm": 0.2755846679210663, |
| "learning_rate": 4.790842932189424e-05, |
| "loss": 0.0352, |
| "step": 1340 |
| }, |
| { |
| "epoch": 5.421686746987952, |
| "grad_norm": 0.4208662807941437, |
| "learning_rate": 4.724503024911292e-05, |
| "loss": 0.0351, |
| "step": 1350 |
| }, |
| { |
| "epoch": 5.461847389558233, |
| "grad_norm": 0.5660949945449829, |
| "learning_rate": 4.6582117301273006e-05, |
| "loss": 0.0347, |
| "step": 1360 |
| }, |
| { |
| "epoch": 5.502008032128514, |
| "grad_norm": 0.3927326202392578, |
| "learning_rate": 4.591980745189762e-05, |
| "loss": 0.0326, |
| "step": 1370 |
| }, |
| { |
| "epoch": 5.542168674698795, |
| "grad_norm": 0.39894360303878784, |
| "learning_rate": 4.5258217568090876e-05, |
| "loss": 0.0355, |
| "step": 1380 |
| }, |
| { |
| "epoch": 5.582329317269076, |
| "grad_norm": 0.3474249541759491, |
| "learning_rate": 4.4597464389916204e-05, |
| "loss": 0.0361, |
| "step": 1390 |
| }, |
| { |
| "epoch": 5.622489959839357, |
| "grad_norm": 0.4053770899772644, |
| "learning_rate": 4.3937664509797173e-05, |
| "loss": 0.0371, |
| "step": 1400 |
| }, |
| { |
| "epoch": 5.662650602409639, |
| "grad_norm": 0.30829307436943054, |
| "learning_rate": 4.3278934351944185e-05, |
| "loss": 0.0402, |
| "step": 1410 |
| }, |
| { |
| "epoch": 5.7028112449799195, |
| "grad_norm": 0.396782249212265, |
| "learning_rate": 4.262139015181111e-05, |
| "loss": 0.0388, |
| "step": 1420 |
| }, |
| { |
| "epoch": 5.742971887550201, |
| "grad_norm": 0.5452107191085815, |
| "learning_rate": 4.196514793558508e-05, |
| "loss": 0.0358, |
| "step": 1430 |
| }, |
| { |
| "epoch": 5.783132530120482, |
| "grad_norm": 0.35190486907958984, |
| "learning_rate": 4.13103234997131e-05, |
| "loss": 0.038, |
| "step": 1440 |
| }, |
| { |
| "epoch": 5.823293172690763, |
| "grad_norm": 0.43659260869026184, |
| "learning_rate": 4.065703239046951e-05, |
| "loss": 0.035, |
| "step": 1450 |
| }, |
| { |
| "epoch": 5.863453815261044, |
| "grad_norm": 0.27444201707839966, |
| "learning_rate": 4.000538988356723e-05, |
| "loss": 0.0349, |
| "step": 1460 |
| }, |
| { |
| "epoch": 5.903614457831325, |
| "grad_norm": 0.40698736906051636, |
| "learning_rate": 3.9355510963817046e-05, |
| "loss": 0.0354, |
| "step": 1470 |
| }, |
| { |
| "epoch": 5.943775100401607, |
| "grad_norm": 0.40334540605545044, |
| "learning_rate": 3.8707510304838e-05, |
| "loss": 0.0379, |
| "step": 1480 |
| }, |
| { |
| "epoch": 5.983935742971887, |
| "grad_norm": 0.4215904474258423, |
| "learning_rate": 3.80615022488229e-05, |
| "loss": 0.0344, |
| "step": 1490 |
| }, |
| { |
| "epoch": 6.024096385542169, |
| "grad_norm": 0.30104267597198486, |
| "learning_rate": 3.741760078636213e-05, |
| "loss": 0.0338, |
| "step": 1500 |
| }, |
| { |
| "epoch": 6.06425702811245, |
| "grad_norm": 0.6103472709655762, |
| "learning_rate": 3.677591953632955e-05, |
| "loss": 0.0363, |
| "step": 1510 |
| }, |
| { |
| "epoch": 6.104417670682731, |
| "grad_norm": 0.40117689967155457, |
| "learning_rate": 3.6136571725834116e-05, |
| "loss": 0.0327, |
| "step": 1520 |
| }, |
| { |
| "epoch": 6.144578313253012, |
| "grad_norm": 0.2717328667640686, |
| "learning_rate": 3.5499670170240395e-05, |
| "loss": 0.0385, |
| "step": 1530 |
| }, |
| { |
| "epoch": 6.184738955823293, |
| "grad_norm": 0.28105250000953674, |
| "learning_rate": 3.486532725326199e-05, |
| "loss": 0.0343, |
| "step": 1540 |
| }, |
| { |
| "epoch": 6.224899598393574, |
| "grad_norm": 0.3413033187389374, |
| "learning_rate": 3.4233654907130875e-05, |
| "loss": 0.034, |
| "step": 1550 |
| }, |
| { |
| "epoch": 6.265060240963855, |
| "grad_norm": 0.2755061388015747, |
| "learning_rate": 3.3604764592846636e-05, |
| "loss": 0.0326, |
| "step": 1560 |
| }, |
| { |
| "epoch": 6.305220883534137, |
| "grad_norm": 0.7181720733642578, |
| "learning_rate": 3.2978767280508736e-05, |
| "loss": 0.0347, |
| "step": 1570 |
| }, |
| { |
| "epoch": 6.3453815261044175, |
| "grad_norm": 0.2261345237493515, |
| "learning_rate": 3.2355773429735314e-05, |
| "loss": 0.03, |
| "step": 1580 |
| }, |
| { |
| "epoch": 6.385542168674699, |
| "grad_norm": 0.5460085868835449, |
| "learning_rate": 3.17358929701723e-05, |
| "loss": 0.0304, |
| "step": 1590 |
| }, |
| { |
| "epoch": 6.42570281124498, |
| "grad_norm": 0.955663800239563, |
| "learning_rate": 3.111923528209577e-05, |
| "loss": 0.0338, |
| "step": 1600 |
| }, |
| { |
| "epoch": 6.4658634538152615, |
| "grad_norm": 0.26345881819725037, |
| "learning_rate": 3.0505909177111574e-05, |
| "loss": 0.032, |
| "step": 1610 |
| }, |
| { |
| "epoch": 6.506024096385542, |
| "grad_norm": 0.8473414778709412, |
| "learning_rate": 2.9896022878954878e-05, |
| "loss": 0.0314, |
| "step": 1620 |
| }, |
| { |
| "epoch": 6.546184738955823, |
| "grad_norm": 0.2904120087623596, |
| "learning_rate": 2.9289684004393836e-05, |
| "loss": 0.0317, |
| "step": 1630 |
| }, |
| { |
| "epoch": 6.586345381526105, |
| "grad_norm": 0.4706421196460724, |
| "learning_rate": 2.8686999544240172e-05, |
| "loss": 0.0337, |
| "step": 1640 |
| }, |
| { |
| "epoch": 6.626506024096385, |
| "grad_norm": 0.4121145009994507, |
| "learning_rate": 2.808807584447018e-05, |
| "loss": 0.0279, |
| "step": 1650 |
| }, |
| { |
| "epoch": 6.666666666666667, |
| "grad_norm": 0.3063754737377167, |
| "learning_rate": 2.7493018587459628e-05, |
| "loss": 0.033, |
| "step": 1660 |
| }, |
| { |
| "epoch": 6.706827309236948, |
| "grad_norm": 0.5062171220779419, |
| "learning_rate": 2.6901932773335692e-05, |
| "loss": 0.0419, |
| "step": 1670 |
| }, |
| { |
| "epoch": 6.746987951807229, |
| "grad_norm": 0.41636672616004944, |
| "learning_rate": 2.6314922701449286e-05, |
| "loss": 0.0283, |
| "step": 1680 |
| }, |
| { |
| "epoch": 6.78714859437751, |
| "grad_norm": 0.6910147666931152, |
| "learning_rate": 2.5732091951970937e-05, |
| "loss": 0.0319, |
| "step": 1690 |
| }, |
| { |
| "epoch": 6.827309236947791, |
| "grad_norm": 0.6493785381317139, |
| "learning_rate": 2.515354336761391e-05, |
| "loss": 0.0335, |
| "step": 1700 |
| }, |
| { |
| "epoch": 6.867469879518072, |
| "grad_norm": 0.3420906960964203, |
| "learning_rate": 2.457937903548695e-05, |
| "loss": 0.0306, |
| "step": 1710 |
| }, |
| { |
| "epoch": 6.907630522088353, |
| "grad_norm": 0.33870965242385864, |
| "learning_rate": 2.4009700269080793e-05, |
| "loss": 0.0298, |
| "step": 1720 |
| }, |
| { |
| "epoch": 6.947791164658635, |
| "grad_norm": 0.3856060802936554, |
| "learning_rate": 2.344460759039097e-05, |
| "loss": 0.0278, |
| "step": 1730 |
| }, |
| { |
| "epoch": 6.9879518072289155, |
| "grad_norm": 0.3366270363330841, |
| "learning_rate": 2.2884200712180227e-05, |
| "loss": 0.0351, |
| "step": 1740 |
| }, |
| { |
| "epoch": 7.028112449799197, |
| "grad_norm": 0.4241548180580139, |
| "learning_rate": 2.2328578520384037e-05, |
| "loss": 0.0359, |
| "step": 1750 |
| }, |
| { |
| "epoch": 7.068273092369478, |
| "grad_norm": 0.38243892788887024, |
| "learning_rate": 2.1777839056661554e-05, |
| "loss": 0.0315, |
| "step": 1760 |
| }, |
| { |
| "epoch": 7.108433734939759, |
| "grad_norm": 0.5330711603164673, |
| "learning_rate": 2.123207950109596e-05, |
| "loss": 0.0307, |
| "step": 1770 |
| }, |
| { |
| "epoch": 7.14859437751004, |
| "grad_norm": 0.29397761821746826, |
| "learning_rate": 2.0691396155046595e-05, |
| "loss": 0.0279, |
| "step": 1780 |
| }, |
| { |
| "epoch": 7.188755020080321, |
| "grad_norm": 0.3870997130870819, |
| "learning_rate": 2.0155884424156242e-05, |
| "loss": 0.0332, |
| "step": 1790 |
| }, |
| { |
| "epoch": 7.228915662650603, |
| "grad_norm": 0.9195613861083984, |
| "learning_rate": 1.9625638801516407e-05, |
| "loss": 0.0309, |
| "step": 1800 |
| }, |
| { |
| "epoch": 7.269076305220883, |
| "grad_norm": 0.3219093680381775, |
| "learning_rate": 1.9100752850993687e-05, |
| "loss": 0.0281, |
| "step": 1810 |
| }, |
| { |
| "epoch": 7.309236947791165, |
| "grad_norm": 0.2664417028427124, |
| "learning_rate": 1.8581319190720035e-05, |
| "loss": 0.0289, |
| "step": 1820 |
| }, |
| { |
| "epoch": 7.349397590361446, |
| "grad_norm": 0.25901272892951965, |
| "learning_rate": 1.806742947674997e-05, |
| "loss": 0.0324, |
| "step": 1830 |
| }, |
| { |
| "epoch": 7.389558232931727, |
| "grad_norm": 0.3953966200351715, |
| "learning_rate": 1.7559174386887477e-05, |
| "loss": 0.0296, |
| "step": 1840 |
| }, |
| { |
| "epoch": 7.429718875502008, |
| "grad_norm": 1.268621802330017, |
| "learning_rate": 1.7056643604685596e-05, |
| "loss": 0.0255, |
| "step": 1850 |
| }, |
| { |
| "epoch": 7.469879518072289, |
| "grad_norm": 0.2772993743419647, |
| "learning_rate": 1.65599258036214e-05, |
| "loss": 0.0318, |
| "step": 1860 |
| }, |
| { |
| "epoch": 7.51004016064257, |
| "grad_norm": 0.22611959278583527, |
| "learning_rate": 1.6069108631449225e-05, |
| "loss": 0.0293, |
| "step": 1870 |
| }, |
| { |
| "epoch": 7.550200803212851, |
| "grad_norm": 0.27701687812805176, |
| "learning_rate": 1.5584278694734888e-05, |
| "loss": 0.0243, |
| "step": 1880 |
| }, |
| { |
| "epoch": 7.590361445783133, |
| "grad_norm": 0.31095728278160095, |
| "learning_rate": 1.5105521543573647e-05, |
| "loss": 0.0304, |
| "step": 1890 |
| }, |
| { |
| "epoch": 7.6305220883534135, |
| "grad_norm": 0.44475099444389343, |
| "learning_rate": 1.4632921656494469e-05, |
| "loss": 0.0313, |
| "step": 1900 |
| }, |
| { |
| "epoch": 7.670682730923695, |
| "grad_norm": 0.31613418459892273, |
| "learning_rate": 1.416656242555366e-05, |
| "loss": 0.0331, |
| "step": 1910 |
| }, |
| { |
| "epoch": 7.710843373493976, |
| "grad_norm": 0.31190618872642517, |
| "learning_rate": 1.3706526141619792e-05, |
| "loss": 0.0266, |
| "step": 1920 |
| }, |
| { |
| "epoch": 7.7510040160642575, |
| "grad_norm": 0.3129827082157135, |
| "learning_rate": 1.3252893979853304e-05, |
| "loss": 0.03, |
| "step": 1930 |
| }, |
| { |
| "epoch": 7.791164658634538, |
| "grad_norm": 0.2716180980205536, |
| "learning_rate": 1.2805745985382867e-05, |
| "loss": 0.0277, |
| "step": 1940 |
| }, |
| { |
| "epoch": 7.831325301204819, |
| "grad_norm": 0.2133369892835617, |
| "learning_rate": 1.2365161059180942e-05, |
| "loss": 0.0285, |
| "step": 1950 |
| }, |
| { |
| "epoch": 7.871485943775101, |
| "grad_norm": 0.18476147949695587, |
| "learning_rate": 1.1931216944141621e-05, |
| "loss": 0.0256, |
| "step": 1960 |
| }, |
| { |
| "epoch": 7.911646586345381, |
| "grad_norm": 0.39151495695114136, |
| "learning_rate": 1.1503990211362403e-05, |
| "loss": 0.0286, |
| "step": 1970 |
| }, |
| { |
| "epoch": 7.951807228915663, |
| "grad_norm": 0.3535381853580475, |
| "learning_rate": 1.1083556246633048e-05, |
| "loss": 0.0296, |
| "step": 1980 |
| }, |
| { |
| "epoch": 7.991967871485944, |
| "grad_norm": 0.9443646669387817, |
| "learning_rate": 1.0669989237133437e-05, |
| "loss": 0.027, |
| "step": 1990 |
| }, |
| { |
| "epoch": 8.032128514056225, |
| "grad_norm": 0.44257789850234985, |
| "learning_rate": 1.0263362158342948e-05, |
| "loss": 0.0306, |
| "step": 2000 |
| }, |
| { |
| "epoch": 8.072289156626505, |
| "grad_norm": 0.3381843864917755, |
| "learning_rate": 9.863746761163679e-06, |
| "loss": 0.0278, |
| "step": 2010 |
| }, |
| { |
| "epoch": 8.112449799196787, |
| "grad_norm": 0.22615911066532135, |
| "learning_rate": 9.471213559259684e-06, |
| "loss": 0.0321, |
| "step": 2020 |
| }, |
| { |
| "epoch": 8.152610441767068, |
| "grad_norm": 0.334036648273468, |
| "learning_rate": 9.08583181661461e-06, |
| "loss": 0.0264, |
| "step": 2030 |
| }, |
| { |
| "epoch": 8.19277108433735, |
| "grad_norm": 0.2973599135875702, |
| "learning_rate": 8.707669535309793e-06, |
| "loss": 0.0287, |
| "step": 2040 |
| }, |
| { |
| "epoch": 8.23293172690763, |
| "grad_norm": 0.5360568761825562, |
| "learning_rate": 8.33679344352501e-06, |
| "loss": 0.0289, |
| "step": 2050 |
| }, |
| { |
| "epoch": 8.273092369477911, |
| "grad_norm": 0.5042163729667664, |
| "learning_rate": 7.97326898376406e-06, |
| "loss": 0.0254, |
| "step": 2060 |
| }, |
| { |
| "epoch": 8.313253012048193, |
| "grad_norm": 0.191526398062706, |
| "learning_rate": 7.617160301307169e-06, |
| "loss": 0.0282, |
| "step": 2070 |
| }, |
| { |
| "epoch": 8.353413654618475, |
| "grad_norm": 0.3822993040084839, |
| "learning_rate": 7.268530232892317e-06, |
| "loss": 0.0292, |
| "step": 2080 |
| }, |
| { |
| "epoch": 8.393574297188755, |
| "grad_norm": 0.5774808526039124, |
| "learning_rate": 6.9274402956274686e-06, |
| "loss": 0.0263, |
| "step": 2090 |
| }, |
| { |
| "epoch": 8.433734939759036, |
| "grad_norm": 0.3628866970539093, |
| "learning_rate": 6.593950676135624e-06, |
| "loss": 0.0268, |
| "step": 2100 |
| }, |
| { |
| "epoch": 8.473895582329318, |
| "grad_norm": 0.4431964159011841, |
| "learning_rate": 6.268120219934631e-06, |
| "loss": 0.028, |
| "step": 2110 |
| }, |
| { |
| "epoch": 8.514056224899598, |
| "grad_norm": 0.5086953043937683, |
| "learning_rate": 5.950006421053772e-06, |
| "loss": 0.0257, |
| "step": 2120 |
| }, |
| { |
| "epoch": 8.55421686746988, |
| "grad_norm": 0.5995669364929199, |
| "learning_rate": 5.639665411888584e-06, |
| "loss": 0.0298, |
| "step": 2130 |
| }, |
| { |
| "epoch": 8.594377510040161, |
| "grad_norm": 1.7701818943023682, |
| "learning_rate": 5.337151953296188e-06, |
| "loss": 0.0269, |
| "step": 2140 |
| }, |
| { |
| "epoch": 8.634538152610443, |
| "grad_norm": 0.30067703127861023, |
| "learning_rate": 5.042519424932513e-06, |
| "loss": 0.0261, |
| "step": 2150 |
| }, |
| { |
| "epoch": 8.674698795180722, |
| "grad_norm": 0.6958829760551453, |
| "learning_rate": 4.755819815833174e-06, |
| "loss": 0.0242, |
| "step": 2160 |
| }, |
| { |
| "epoch": 8.714859437751004, |
| "grad_norm": 0.4430091679096222, |
| "learning_rate": 4.477103715239922e-06, |
| "loss": 0.0238, |
| "step": 2170 |
| }, |
| { |
| "epoch": 8.755020080321286, |
| "grad_norm": 0.359072208404541, |
| "learning_rate": 4.2064203036738746e-06, |
| "loss": 0.0245, |
| "step": 2180 |
| }, |
| { |
| "epoch": 8.795180722891565, |
| "grad_norm": 0.23414134979248047, |
| "learning_rate": 3.9438173442575e-06, |
| "loss": 0.024, |
| "step": 2190 |
| }, |
| { |
| "epoch": 8.835341365461847, |
| "grad_norm": 0.24696533381938934, |
| "learning_rate": 3.6893411742865814e-06, |
| "loss": 0.0258, |
| "step": 2200 |
| }, |
| { |
| "epoch": 8.875502008032129, |
| "grad_norm": 0.2517929673194885, |
| "learning_rate": 3.443036697053875e-06, |
| "loss": 0.0333, |
| "step": 2210 |
| }, |
| { |
| "epoch": 8.91566265060241, |
| "grad_norm": 0.7909056544303894, |
| "learning_rate": 3.204947373925693e-06, |
| "loss": 0.0272, |
| "step": 2220 |
| }, |
| { |
| "epoch": 8.95582329317269, |
| "grad_norm": 0.2984614670276642, |
| "learning_rate": 2.97511521667303e-06, |
| "loss": 0.0281, |
| "step": 2230 |
| }, |
| { |
| "epoch": 8.995983935742972, |
| "grad_norm": 0.23609600961208344, |
| "learning_rate": 2.7535807800583957e-06, |
| "loss": 0.025, |
| "step": 2240 |
| }, |
| { |
| "epoch": 9.036144578313253, |
| "grad_norm": 0.3227684795856476, |
| "learning_rate": 2.5403831546797875e-06, |
| "loss": 0.021, |
| "step": 2250 |
| }, |
| { |
| "epoch": 9.076305220883533, |
| "grad_norm": 0.25792595744132996, |
| "learning_rate": 2.3355599600729915e-06, |
| "loss": 0.0278, |
| "step": 2260 |
| }, |
| { |
| "epoch": 9.116465863453815, |
| "grad_norm": 0.4950139820575714, |
| "learning_rate": 2.139147338073466e-06, |
| "loss": 0.0266, |
| "step": 2270 |
| }, |
| { |
| "epoch": 9.156626506024097, |
| "grad_norm": 0.27984389662742615, |
| "learning_rate": 1.9511799464390247e-06, |
| "loss": 0.0252, |
| "step": 2280 |
| }, |
| { |
| "epoch": 9.196787148594378, |
| "grad_norm": 0.36016976833343506, |
| "learning_rate": 1.7716909527342839e-06, |
| "loss": 0.0313, |
| "step": 2290 |
| }, |
| { |
| "epoch": 9.236947791164658, |
| "grad_norm": 0.19290144741535187, |
| "learning_rate": 1.6007120284781518e-06, |
| "loss": 0.0237, |
| "step": 2300 |
| }, |
| { |
| "epoch": 9.27710843373494, |
| "grad_norm": 0.3681156039237976, |
| "learning_rate": 1.4382733435552464e-06, |
| "loss": 0.0265, |
| "step": 2310 |
| }, |
| { |
| "epoch": 9.317269076305221, |
| "grad_norm": 0.31840649247169495, |
| "learning_rate": 1.2844035608923222e-06, |
| "loss": 0.0277, |
| "step": 2320 |
| }, |
| { |
| "epoch": 9.357429718875501, |
| "grad_norm": 0.21902896463871002, |
| "learning_rate": 1.1391298314006037e-06, |
| "loss": 0.029, |
| "step": 2330 |
| }, |
| { |
| "epoch": 9.397590361445783, |
| "grad_norm": 0.299748957157135, |
| "learning_rate": 1.0024777891848359e-06, |
| "loss": 0.0271, |
| "step": 2340 |
| }, |
| { |
| "epoch": 9.437751004016064, |
| "grad_norm": 0.2108592987060547, |
| "learning_rate": 8.744715470201336e-07, |
| "loss": 0.0265, |
| "step": 2350 |
| }, |
| { |
| "epoch": 9.477911646586346, |
| "grad_norm": 0.2198696732521057, |
| "learning_rate": 7.551336920971374e-07, |
| "loss": 0.0236, |
| "step": 2360 |
| }, |
| { |
| "epoch": 9.518072289156626, |
| "grad_norm": 0.2825567424297333, |
| "learning_rate": 6.444852820364222e-07, |
| "loss": 0.0224, |
| "step": 2370 |
| }, |
| { |
| "epoch": 9.558232931726907, |
| "grad_norm": 0.279225617647171, |
| "learning_rate": 5.425458411728202e-07, |
| "loss": 0.0265, |
| "step": 2380 |
| }, |
| { |
| "epoch": 9.598393574297189, |
| "grad_norm": 1.2663246393203735, |
| "learning_rate": 4.4933335711025983e-07, |
| "loss": 0.0237, |
| "step": 2390 |
| }, |
| { |
| "epoch": 9.638554216867469, |
| "grad_norm": 0.1881272941827774, |
| "learning_rate": 3.648642775477884e-07, |
| "loss": 0.0242, |
| "step": 2400 |
| }, |
| { |
| "epoch": 9.67871485943775, |
| "grad_norm": 0.602509617805481, |
| "learning_rate": 2.891535073773155e-07, |
| "loss": 0.032, |
| "step": 2410 |
| }, |
| { |
| "epoch": 9.718875502008032, |
| "grad_norm": 0.3743922710418701, |
| "learning_rate": 2.2221440605359466e-07, |
| "loss": 0.0259, |
| "step": 2420 |
| }, |
| { |
| "epoch": 9.759036144578314, |
| "grad_norm": 0.23908697068691254, |
| "learning_rate": 1.6405878523686468e-07, |
| "loss": 0.0294, |
| "step": 2430 |
| }, |
| { |
| "epoch": 9.799196787148594, |
| "grad_norm": 0.7279542088508606, |
| "learning_rate": 1.1469690670868894e-07, |
| "loss": 0.0268, |
| "step": 2440 |
| }, |
| { |
| "epoch": 9.839357429718875, |
| "grad_norm": 0.3277071714401245, |
| "learning_rate": 7.413748056117609e-08, |
| "loss": 0.0276, |
| "step": 2450 |
| }, |
| { |
| "epoch": 9.879518072289157, |
| "grad_norm": 0.4907410740852356, |
| "learning_rate": 4.2387663660081735e-08, |
| "loss": 0.0227, |
| "step": 2460 |
| }, |
| { |
| "epoch": 9.919678714859439, |
| "grad_norm": 0.47046899795532227, |
| "learning_rate": 1.9453058381940782e-08, |
| "loss": 0.0317, |
| "step": 2470 |
| }, |
| { |
| "epoch": 9.959839357429718, |
| "grad_norm": 0.5400535464286804, |
| "learning_rate": 5.337711625497121e-09, |
| "loss": 0.0261, |
| "step": 2480 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.32108399271965027, |
| "learning_rate": 4.4114097635938875e-11, |
| "loss": 0.0245, |
| "step": 2490 |
| }, |
| { |
| "epoch": 10.0, |
| "step": 2490, |
| "total_flos": 0.0, |
| "train_loss": 0.049736060142277716, |
| "train_runtime": 2579.4556, |
| "train_samples_per_second": 47.161, |
| "train_steps_per_second": 0.965 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 2490, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 49, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|