| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 8862, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0022568269013766643, |
| "grad_norm": 342.0, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 6.6491, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0045136538027533285, |
| "grad_norm": 60.75, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 2.1305, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.006770480704129994, |
| "grad_norm": 13.9375, |
| "learning_rate": 6e-06, |
| "loss": 0.2525, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.009027307605506657, |
| "grad_norm": 100.0, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.2105, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.011284134506883321, |
| "grad_norm": 84.5, |
| "learning_rate": 1e-05, |
| "loss": 0.0882, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.013540961408259987, |
| "grad_norm": 0.06591796875, |
| "learning_rate": 9.992448840897079e-06, |
| "loss": 0.2672, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.01579778830963665, |
| "grad_norm": 0.00982666015625, |
| "learning_rate": 9.984897681794156e-06, |
| "loss": 0.0005, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.018054615211013314, |
| "grad_norm": 0.00927734375, |
| "learning_rate": 9.977346522691234e-06, |
| "loss": 0.0073, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.020311442112389978, |
| "grad_norm": 0.0020751953125, |
| "learning_rate": 9.969795363588312e-06, |
| "loss": 0.0, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.022568269013766643, |
| "grad_norm": 0.0023345947265625, |
| "learning_rate": 9.96224420448539e-06, |
| "loss": 0.0, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.02482509591514331, |
| "grad_norm": 0.0027618408203125, |
| "learning_rate": 9.954693045382467e-06, |
| "loss": 0.0, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.027081922816519974, |
| "grad_norm": 0.0198974609375, |
| "learning_rate": 9.947141886279545e-06, |
| "loss": 0.0001, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.02933874971789664, |
| "grad_norm": 0.005035400390625, |
| "learning_rate": 9.939590727176623e-06, |
| "loss": 0.0, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.0315955766192733, |
| "grad_norm": 0.0927734375, |
| "learning_rate": 9.9320395680737e-06, |
| "loss": 0.0, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.033852403520649964, |
| "grad_norm": 0.0025177001953125, |
| "learning_rate": 9.924488408970778e-06, |
| "loss": 0.6798, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.03610923042202663, |
| "grad_norm": 0.01239013671875, |
| "learning_rate": 9.916937249867854e-06, |
| "loss": 0.216, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.03836605732340329, |
| "grad_norm": 0.00543212890625, |
| "learning_rate": 9.909386090764932e-06, |
| "loss": 0.0005, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.040622884224779957, |
| "grad_norm": 0.006072998046875, |
| "learning_rate": 9.901834931662012e-06, |
| "loss": 0.4586, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.04287971112615662, |
| "grad_norm": 0.00101470947265625, |
| "learning_rate": 9.89428377255909e-06, |
| "loss": 0.0, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.045136538027533285, |
| "grad_norm": 0.000591278076171875, |
| "learning_rate": 9.886732613456166e-06, |
| "loss": 0.0001, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.04739336492890995, |
| "grad_norm": 0.042236328125, |
| "learning_rate": 9.879181454353243e-06, |
| "loss": 0.4003, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.04965019183028662, |
| "grad_norm": 0.00171661376953125, |
| "learning_rate": 9.871630295250321e-06, |
| "loss": 0.0001, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.051907018731663285, |
| "grad_norm": 0.0009918212890625, |
| "learning_rate": 9.8640791361474e-06, |
| "loss": 0.38, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.05416384563303995, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 9.856527977044477e-06, |
| "loss": 0.6111, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.05642067253441661, |
| "grad_norm": 0.0033721923828125, |
| "learning_rate": 9.848976817941554e-06, |
| "loss": 0.3474, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.05867749943579328, |
| "grad_norm": 2.5, |
| "learning_rate": 9.841425658838632e-06, |
| "loss": 0.0004, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.06093432633716994, |
| "grad_norm": 0.000698089599609375, |
| "learning_rate": 9.83387449973571e-06, |
| "loss": 0.0, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.0631911532385466, |
| "grad_norm": 0.0159912109375, |
| "learning_rate": 9.82632334063279e-06, |
| "loss": 0.0001, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.06544798013992327, |
| "grad_norm": 0.01904296875, |
| "learning_rate": 9.818772181529865e-06, |
| "loss": 0.0, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.06770480704129993, |
| "grad_norm": 0.00119781494140625, |
| "learning_rate": 9.811221022426943e-06, |
| "loss": 0.0, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.0699616339426766, |
| "grad_norm": 0.00106048583984375, |
| "learning_rate": 9.803669863324021e-06, |
| "loss": 0.0, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.07221846084405326, |
| "grad_norm": 0.00186920166015625, |
| "learning_rate": 9.796118704221099e-06, |
| "loss": 0.2718, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.07447528774542993, |
| "grad_norm": 17.25, |
| "learning_rate": 9.788567545118176e-06, |
| "loss": 0.0021, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.07673211464680658, |
| "grad_norm": 0.16796875, |
| "learning_rate": 9.781016386015254e-06, |
| "loss": 0.3681, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.07898894154818326, |
| "grad_norm": 0.041015625, |
| "learning_rate": 9.773465226912332e-06, |
| "loss": 0.2174, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.08124576844955991, |
| "grad_norm": 0.005401611328125, |
| "learning_rate": 9.76591406780941e-06, |
| "loss": 0.0003, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.08350259535093658, |
| "grad_norm": 0.0069580078125, |
| "learning_rate": 9.758362908706487e-06, |
| "loss": 0.0, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.08575942225231324, |
| "grad_norm": 0.051513671875, |
| "learning_rate": 9.750811749603565e-06, |
| "loss": 0.0, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.08801624915368991, |
| "grad_norm": 0.00099945068359375, |
| "learning_rate": 9.743260590500643e-06, |
| "loss": 0.0001, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.09027307605506657, |
| "grad_norm": 69.0, |
| "learning_rate": 9.73570943139772e-06, |
| "loss": 0.3307, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.09252990295644324, |
| "grad_norm": 0.047607421875, |
| "learning_rate": 9.728158272294797e-06, |
| "loss": 0.0001, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.0947867298578199, |
| "grad_norm": 0.002288818359375, |
| "learning_rate": 9.720607113191876e-06, |
| "loss": 0.0, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.09704355675919657, |
| "grad_norm": 0.00250244140625, |
| "learning_rate": 9.713055954088954e-06, |
| "loss": 0.0001, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.09930038366057324, |
| "grad_norm": 0.00469970703125, |
| "learning_rate": 9.705504794986032e-06, |
| "loss": 0.0, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.1015572105619499, |
| "grad_norm": 0.000396728515625, |
| "learning_rate": 9.697953635883108e-06, |
| "loss": 0.0001, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.10381403746332657, |
| "grad_norm": 57.5, |
| "learning_rate": 9.690402476780186e-06, |
| "loss": 0.3762, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.10607086436470323, |
| "grad_norm": 0.00113677978515625, |
| "learning_rate": 9.682851317677265e-06, |
| "loss": 0.0001, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.1083276912660799, |
| "grad_norm": 0.000865936279296875, |
| "learning_rate": 9.675300158574343e-06, |
| "loss": 0.0, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.11058451816745656, |
| "grad_norm": 0.0208740234375, |
| "learning_rate": 9.66774899947142e-06, |
| "loss": 0.3327, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.11284134506883323, |
| "grad_norm": 0.00079345703125, |
| "learning_rate": 9.660197840368497e-06, |
| "loss": 0.0, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.11509817197020988, |
| "grad_norm": 0.002532958984375, |
| "learning_rate": 9.652646681265574e-06, |
| "loss": 0.8702, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.11735499887158655, |
| "grad_norm": 0.029541015625, |
| "learning_rate": 9.645095522162652e-06, |
| "loss": 0.0, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.11961182577296321, |
| "grad_norm": 0.0025177001953125, |
| "learning_rate": 9.637544363059732e-06, |
| "loss": 0.1008, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.12186865267433988, |
| "grad_norm": 0.00628662109375, |
| "learning_rate": 9.629993203956808e-06, |
| "loss": 0.0, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.12412547957571654, |
| "grad_norm": 0.0291748046875, |
| "learning_rate": 9.622442044853885e-06, |
| "loss": 0.0002, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.1263823064770932, |
| "grad_norm": 0.26171875, |
| "learning_rate": 9.614890885750963e-06, |
| "loss": 0.0001, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.12863913337846988, |
| "grad_norm": 0.0028839111328125, |
| "learning_rate": 9.607339726648041e-06, |
| "loss": 0.0, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.13089596027984654, |
| "grad_norm": 0.0004730224609375, |
| "learning_rate": 9.599788567545119e-06, |
| "loss": 0.0, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.1331527871812232, |
| "grad_norm": 0.0004100799560546875, |
| "learning_rate": 9.592237408442197e-06, |
| "loss": 0.0, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.13540961408259986, |
| "grad_norm": 0.0712890625, |
| "learning_rate": 9.584686249339274e-06, |
| "loss": 0.0, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.13766644098397654, |
| "grad_norm": 0.0244140625, |
| "learning_rate": 9.577135090236352e-06, |
| "loss": 0.0198, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.1399232678853532, |
| "grad_norm": 16.625, |
| "learning_rate": 9.56958393113343e-06, |
| "loss": 0.0008, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.14218009478672985, |
| "grad_norm": 1.6796875, |
| "learning_rate": 9.562032772030508e-06, |
| "loss": 0.0001, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.1444369216881065, |
| "grad_norm": 0.00055694580078125, |
| "learning_rate": 9.554481612927585e-06, |
| "loss": 0.0, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.1466937485894832, |
| "grad_norm": 0.000457763671875, |
| "learning_rate": 9.546930453824663e-06, |
| "loss": 0.0, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.14895057549085985, |
| "grad_norm": 0.001251220703125, |
| "learning_rate": 9.53937929472174e-06, |
| "loss": 0.0, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.1512074023922365, |
| "grad_norm": 0.00077056884765625, |
| "learning_rate": 9.531828135618819e-06, |
| "loss": 0.3169, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.15346422929361317, |
| "grad_norm": 0.00122833251953125, |
| "learning_rate": 9.524276976515896e-06, |
| "loss": 0.7091, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.15572105619498985, |
| "grad_norm": 0.00067901611328125, |
| "learning_rate": 9.516725817412974e-06, |
| "loss": 0.0, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.1579778830963665, |
| "grad_norm": 0.00445556640625, |
| "learning_rate": 9.50917465831005e-06, |
| "loss": 0.4241, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.16023470999774317, |
| "grad_norm": 0.000370025634765625, |
| "learning_rate": 9.501623499207128e-06, |
| "loss": 0.0071, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.16249153689911983, |
| "grad_norm": 0.0016326904296875, |
| "learning_rate": 9.494072340104207e-06, |
| "loss": 0.2712, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.1647483638004965, |
| "grad_norm": 0.07470703125, |
| "learning_rate": 9.486521181001285e-06, |
| "loss": 0.0, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.16700519070187317, |
| "grad_norm": 0.10302734375, |
| "learning_rate": 9.478970021898363e-06, |
| "loss": 0.3055, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.16926201760324983, |
| "grad_norm": 0.032470703125, |
| "learning_rate": 9.471418862795439e-06, |
| "loss": 0.0, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.17151884450462648, |
| "grad_norm": 0.0003681182861328125, |
| "learning_rate": 9.463867703692517e-06, |
| "loss": 0.0, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.17377567140600317, |
| "grad_norm": 0.0004787445068359375, |
| "learning_rate": 9.456316544589596e-06, |
| "loss": 0.0, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.17603249830737983, |
| "grad_norm": 0.0021514892578125, |
| "learning_rate": 9.448765385486674e-06, |
| "loss": 0.0003, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.17828932520875648, |
| "grad_norm": 0.00040435791015625, |
| "learning_rate": 9.44121422638375e-06, |
| "loss": 0.0, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.18054615211013314, |
| "grad_norm": 0.00135040283203125, |
| "learning_rate": 9.433663067280828e-06, |
| "loss": 0.0, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.18280297901150983, |
| "grad_norm": 0.000331878662109375, |
| "learning_rate": 9.426111908177906e-06, |
| "loss": 0.0, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.18505980591288648, |
| "grad_norm": 0.00049591064453125, |
| "learning_rate": 9.418560749074985e-06, |
| "loss": 0.0, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.18731663281426314, |
| "grad_norm": 0.0003795623779296875, |
| "learning_rate": 9.411009589972061e-06, |
| "loss": 0.0, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.1895734597156398, |
| "grad_norm": 0.000629425048828125, |
| "learning_rate": 9.403458430869139e-06, |
| "loss": 0.3432, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.19183028661701648, |
| "grad_norm": 0.00048828125, |
| "learning_rate": 9.395907271766217e-06, |
| "loss": 0.0, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.19408711351839314, |
| "grad_norm": 0.0004253387451171875, |
| "learning_rate": 9.388356112663294e-06, |
| "loss": 0.0, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.1963439404197698, |
| "grad_norm": 0.000579833984375, |
| "learning_rate": 9.380804953560372e-06, |
| "loss": 0.0, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.19860076732114648, |
| "grad_norm": 0.007232666015625, |
| "learning_rate": 9.37325379445745e-06, |
| "loss": 0.0, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.20085759422252314, |
| "grad_norm": 0.003631591796875, |
| "learning_rate": 9.365702635354528e-06, |
| "loss": 0.1363, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.2031144211238998, |
| "grad_norm": 0.003875732421875, |
| "learning_rate": 9.358151476251605e-06, |
| "loss": 0.0, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.20537124802527645, |
| "grad_norm": 0.00830078125, |
| "learning_rate": 9.350600317148683e-06, |
| "loss": 0.0, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.20762807492665314, |
| "grad_norm": 0.00213623046875, |
| "learning_rate": 9.343049158045761e-06, |
| "loss": 0.0, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.2098849018280298, |
| "grad_norm": 0.0003643035888671875, |
| "learning_rate": 9.335497998942839e-06, |
| "loss": 0.0929, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.21214172872940645, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.327946839839916e-06, |
| "loss": 0.6867, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.2143985556307831, |
| "grad_norm": 0.0030975341796875, |
| "learning_rate": 9.320395680736994e-06, |
| "loss": 0.0005, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.2166553825321598, |
| "grad_norm": 0.0011138916015625, |
| "learning_rate": 9.312844521634072e-06, |
| "loss": 0.3669, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.21891220943353645, |
| "grad_norm": 71.5, |
| "learning_rate": 9.30529336253115e-06, |
| "loss": 0.2307, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.2211690363349131, |
| "grad_norm": 0.0164794921875, |
| "learning_rate": 9.297742203428227e-06, |
| "loss": 0.0213, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.22342586323628977, |
| "grad_norm": 0.003570556640625, |
| "learning_rate": 9.290191044325305e-06, |
| "loss": 0.0001, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.22568269013766645, |
| "grad_norm": 0.000316619873046875, |
| "learning_rate": 9.282639885222381e-06, |
| "loss": 0.0002, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2279395170390431, |
| "grad_norm": 0.0004711151123046875, |
| "learning_rate": 9.27508872611946e-06, |
| "loss": 0.0, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.23019634394041977, |
| "grad_norm": 0.00096893310546875, |
| "learning_rate": 9.267537567016539e-06, |
| "loss": 0.0007, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.23245317084179642, |
| "grad_norm": 0.005950927734375, |
| "learning_rate": 9.259986407913616e-06, |
| "loss": 0.4785, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.2347099977431731, |
| "grad_norm": 0.0087890625, |
| "learning_rate": 9.252435248810692e-06, |
| "loss": 0.2708, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.23696682464454977, |
| "grad_norm": 0.00506591796875, |
| "learning_rate": 9.24488408970777e-06, |
| "loss": 0.2259, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.23922365154592642, |
| "grad_norm": 0.001434326171875, |
| "learning_rate": 9.237332930604848e-06, |
| "loss": 0.0045, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.24148047844730308, |
| "grad_norm": 0.003143310546875, |
| "learning_rate": 9.229781771501927e-06, |
| "loss": 0.0, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.24373730534867977, |
| "grad_norm": 0.0025787353515625, |
| "learning_rate": 9.222230612399003e-06, |
| "loss": 0.0, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.24599413225005642, |
| "grad_norm": 0.0003833770751953125, |
| "learning_rate": 9.214679453296081e-06, |
| "loss": 0.0, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.24825095915143308, |
| "grad_norm": 0.00494384765625, |
| "learning_rate": 9.207128294193159e-06, |
| "loss": 0.0, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.25050778605280977, |
| "grad_norm": 0.0024566650390625, |
| "learning_rate": 9.199577135090237e-06, |
| "loss": 0.1667, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.2527646129541864, |
| "grad_norm": 0.00183868408203125, |
| "learning_rate": 9.192025975987314e-06, |
| "loss": 0.0001, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.2550214398555631, |
| "grad_norm": 0.00396728515625, |
| "learning_rate": 9.184474816884392e-06, |
| "loss": 0.0, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.25727826675693977, |
| "grad_norm": 0.0147705078125, |
| "learning_rate": 9.17692365778147e-06, |
| "loss": 0.0, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.2595350936583164, |
| "grad_norm": 0.00179290771484375, |
| "learning_rate": 9.169372498678548e-06, |
| "loss": 0.0, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.2617919205596931, |
| "grad_norm": 0.0025634765625, |
| "learning_rate": 9.161821339575625e-06, |
| "loss": 0.0, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.2640487474610697, |
| "grad_norm": 0.004486083984375, |
| "learning_rate": 9.154270180472703e-06, |
| "loss": 0.0, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.2663055743624464, |
| "grad_norm": 0.0001277923583984375, |
| "learning_rate": 9.146719021369781e-06, |
| "loss": 0.0, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.2685624012638231, |
| "grad_norm": 0.0001659393310546875, |
| "learning_rate": 9.139167862266859e-06, |
| "loss": 0.0, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.2708192281651997, |
| "grad_norm": 0.0002899169921875, |
| "learning_rate": 9.131616703163937e-06, |
| "loss": 0.0412, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.2730760550665764, |
| "grad_norm": 0.006134033203125, |
| "learning_rate": 9.124065544061014e-06, |
| "loss": 0.0, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.2753328819679531, |
| "grad_norm": 0.03759765625, |
| "learning_rate": 9.116514384958092e-06, |
| "loss": 0.2408, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.2775897088693297, |
| "grad_norm": 0.00067138671875, |
| "learning_rate": 9.10896322585517e-06, |
| "loss": 0.0, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.2798465357707064, |
| "grad_norm": 0.0007171630859375, |
| "learning_rate": 9.101412066752248e-06, |
| "loss": 0.0007, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.282103362672083, |
| "grad_norm": 0.004669189453125, |
| "learning_rate": 9.093860907649325e-06, |
| "loss": 0.3364, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.2843601895734597, |
| "grad_norm": 0.000713348388671875, |
| "learning_rate": 9.086309748546403e-06, |
| "loss": 0.0, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.2866170164748364, |
| "grad_norm": 0.0010223388671875, |
| "learning_rate": 9.078758589443481e-06, |
| "loss": 0.0, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.288873843376213, |
| "grad_norm": 0.002960205078125, |
| "learning_rate": 9.071207430340559e-06, |
| "loss": 0.0, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.2911306702775897, |
| "grad_norm": 0.00034332275390625, |
| "learning_rate": 9.063656271237635e-06, |
| "loss": 0.0, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.2933874971789664, |
| "grad_norm": 0.002349853515625, |
| "learning_rate": 9.056105112134712e-06, |
| "loss": 0.0, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.295644324080343, |
| "grad_norm": 0.00058746337890625, |
| "learning_rate": 9.048553953031792e-06, |
| "loss": 0.2488, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.2979011509817197, |
| "grad_norm": 0.00115966796875, |
| "learning_rate": 9.04100279392887e-06, |
| "loss": 0.0, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.3001579778830964, |
| "grad_norm": 0.004425048828125, |
| "learning_rate": 9.033451634825946e-06, |
| "loss": 0.2421, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.302414804784473, |
| "grad_norm": 0.000965118408203125, |
| "learning_rate": 9.025900475723023e-06, |
| "loss": 0.0244, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.3046716316858497, |
| "grad_norm": 0.0810546875, |
| "learning_rate": 9.018349316620101e-06, |
| "loss": 0.0, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.30692845858722634, |
| "grad_norm": 0.00732421875, |
| "learning_rate": 9.01079815751718e-06, |
| "loss": 0.0, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.309185285488603, |
| "grad_norm": 0.0010223388671875, |
| "learning_rate": 9.003246998414258e-06, |
| "loss": 0.0, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.3114421123899797, |
| "grad_norm": 0.00130462646484375, |
| "learning_rate": 8.995695839311335e-06, |
| "loss": 0.0, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.31369893929135634, |
| "grad_norm": 0.0004634857177734375, |
| "learning_rate": 8.988144680208412e-06, |
| "loss": 0.0, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.315955766192733, |
| "grad_norm": 0.0206298828125, |
| "learning_rate": 8.98059352110549e-06, |
| "loss": 0.2664, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3182125930941097, |
| "grad_norm": 0.005218505859375, |
| "learning_rate": 8.973042362002568e-06, |
| "loss": 0.0, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.32046941999548634, |
| "grad_norm": 0.00154876708984375, |
| "learning_rate": 8.965491202899646e-06, |
| "loss": 0.1572, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.322726246896863, |
| "grad_norm": 0.000640869140625, |
| "learning_rate": 8.957940043796723e-06, |
| "loss": 0.0003, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.32498307379823965, |
| "grad_norm": 0.00119781494140625, |
| "learning_rate": 8.950388884693801e-06, |
| "loss": 0.0, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.32723990069961634, |
| "grad_norm": 0.0245361328125, |
| "learning_rate": 8.942837725590879e-06, |
| "loss": 0.0, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.329496727600993, |
| "grad_norm": 0.0004062652587890625, |
| "learning_rate": 8.935286566487957e-06, |
| "loss": 0.0, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.33175355450236965, |
| "grad_norm": 0.000385284423828125, |
| "learning_rate": 8.927735407385034e-06, |
| "loss": 0.0, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.33401038140374634, |
| "grad_norm": 0.00616455078125, |
| "learning_rate": 8.920184248282112e-06, |
| "loss": 0.0, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.336267208305123, |
| "grad_norm": 0.002532958984375, |
| "learning_rate": 8.91263308917919e-06, |
| "loss": 0.0, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.33852403520649965, |
| "grad_norm": 0.0011444091796875, |
| "learning_rate": 8.905081930076268e-06, |
| "loss": 0.0, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.34078086210787634, |
| "grad_norm": 0.0004138946533203125, |
| "learning_rate": 8.897530770973345e-06, |
| "loss": 0.0, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.34303768900925297, |
| "grad_norm": 0.000579833984375, |
| "learning_rate": 8.889979611870423e-06, |
| "loss": 0.3398, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.34529451591062965, |
| "grad_norm": 0.0003948211669921875, |
| "learning_rate": 8.882428452767501e-06, |
| "loss": 0.0, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.34755134281200634, |
| "grad_norm": 0.000247955322265625, |
| "learning_rate": 8.874877293664577e-06, |
| "loss": 0.0, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.34980816971338297, |
| "grad_norm": 0.000255584716796875, |
| "learning_rate": 8.867326134561656e-06, |
| "loss": 0.2765, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.35206499661475965, |
| "grad_norm": 0.0001964569091796875, |
| "learning_rate": 8.859774975458734e-06, |
| "loss": 0.0, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.35432182351613634, |
| "grad_norm": 0.00037384033203125, |
| "learning_rate": 8.852223816355812e-06, |
| "loss": 0.0, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.35657865041751297, |
| "grad_norm": 0.00052642822265625, |
| "learning_rate": 8.84467265725289e-06, |
| "loss": 0.0, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.35883547731888965, |
| "grad_norm": 0.00023555755615234375, |
| "learning_rate": 8.837121498149966e-06, |
| "loss": 0.4111, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.3610923042202663, |
| "grad_norm": 0.05419921875, |
| "learning_rate": 8.829570339047044e-06, |
| "loss": 0.0007, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.36334913112164297, |
| "grad_norm": 0.00982666015625, |
| "learning_rate": 8.822019179944123e-06, |
| "loss": 0.0, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.36560595802301965, |
| "grad_norm": 0.0007476806640625, |
| "learning_rate": 8.8144680208412e-06, |
| "loss": 0.0, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.3678627849243963, |
| "grad_norm": 0.0030059814453125, |
| "learning_rate": 8.806916861738277e-06, |
| "loss": 0.0, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.37011961182577297, |
| "grad_norm": 0.005889892578125, |
| "learning_rate": 8.799365702635355e-06, |
| "loss": 0.0, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.37237643872714965, |
| "grad_norm": 0.006927490234375, |
| "learning_rate": 8.791814543532432e-06, |
| "loss": 0.0001, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.3746332656285263, |
| "grad_norm": 0.00494384765625, |
| "learning_rate": 8.784263384429512e-06, |
| "loss": 0.0, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.37689009252990296, |
| "grad_norm": 0.00799560546875, |
| "learning_rate": 8.776712225326588e-06, |
| "loss": 0.0, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.3791469194312796, |
| "grad_norm": 0.0003604888916015625, |
| "learning_rate": 8.769161066223666e-06, |
| "loss": 0.0, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.3814037463326563, |
| "grad_norm": 66.5, |
| "learning_rate": 8.761609907120743e-06, |
| "loss": 0.651, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.38366057323403296, |
| "grad_norm": 0.005645751953125, |
| "learning_rate": 8.754058748017821e-06, |
| "loss": 0.0, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.3859174001354096, |
| "grad_norm": 0.002105712890625, |
| "learning_rate": 8.746507588914899e-06, |
| "loss": 0.0, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.3881742270367863, |
| "grad_norm": 0.0003948211669921875, |
| "learning_rate": 8.738956429811977e-06, |
| "loss": 0.0, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.39043105393816296, |
| "grad_norm": 0.0014801025390625, |
| "learning_rate": 8.731405270709054e-06, |
| "loss": 0.0, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.3926878808395396, |
| "grad_norm": 0.000560760498046875, |
| "learning_rate": 8.723854111606132e-06, |
| "loss": 0.0, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.3949447077409163, |
| "grad_norm": 0.000659942626953125, |
| "learning_rate": 8.71630295250321e-06, |
| "loss": 0.0, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.39720153464229296, |
| "grad_norm": 0.162109375, |
| "learning_rate": 8.708751793400288e-06, |
| "loss": 0.0, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.3994583615436696, |
| "grad_norm": 0.000843048095703125, |
| "learning_rate": 8.701200634297366e-06, |
| "loss": 0.1511, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.4017151884450463, |
| "grad_norm": 0.003631591796875, |
| "learning_rate": 8.693649475194443e-06, |
| "loss": 0.8574, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.4039720153464229, |
| "grad_norm": 0.004425048828125, |
| "learning_rate": 8.686098316091521e-06, |
| "loss": 0.0013, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.4062288422477996, |
| "grad_norm": 0.004486083984375, |
| "learning_rate": 8.678547156988599e-06, |
| "loss": 0.0002, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.4084856691491763, |
| "grad_norm": 0.0019683837890625, |
| "learning_rate": 8.670995997885677e-06, |
| "loss": 0.4738, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.4107424960505529, |
| "grad_norm": 0.01092529296875, |
| "learning_rate": 8.663444838782754e-06, |
| "loss": 0.0, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.4129993229519296, |
| "grad_norm": 0.0013885498046875, |
| "learning_rate": 8.655893679679832e-06, |
| "loss": 0.0, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.4152561498533063, |
| "grad_norm": 0.09326171875, |
| "learning_rate": 8.648342520576908e-06, |
| "loss": 0.0, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.4175129767546829, |
| "grad_norm": 0.01409912109375, |
| "learning_rate": 8.640791361473988e-06, |
| "loss": 0.0001, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.4197698036560596, |
| "grad_norm": 0.004119873046875, |
| "learning_rate": 8.633240202371065e-06, |
| "loss": 0.4574, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.4220266305574362, |
| "grad_norm": 0.00121307373046875, |
| "learning_rate": 8.625689043268143e-06, |
| "loss": 0.0, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.4242834574588129, |
| "grad_norm": 0.006103515625, |
| "learning_rate": 8.61813788416522e-06, |
| "loss": 0.0, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.4265402843601896, |
| "grad_norm": 0.005157470703125, |
| "learning_rate": 8.610586725062297e-06, |
| "loss": 0.0, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.4287971112615662, |
| "grad_norm": 0.00360107421875, |
| "learning_rate": 8.603035565959376e-06, |
| "loss": 0.715, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.4310539381629429, |
| "grad_norm": 0.0028076171875, |
| "learning_rate": 8.595484406856454e-06, |
| "loss": 0.0, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.4333107650643196, |
| "grad_norm": 0.00799560546875, |
| "learning_rate": 8.58793324775353e-06, |
| "loss": 0.0001, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.4355675919656962, |
| "grad_norm": 0.003265380859375, |
| "learning_rate": 8.580382088650608e-06, |
| "loss": 0.2658, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.4378244188670729, |
| "grad_norm": 0.00970458984375, |
| "learning_rate": 8.572830929547686e-06, |
| "loss": 0.0, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.44008124576844954, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 8.565279770444764e-06, |
| "loss": 0.0, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.4423380726698262, |
| "grad_norm": 0.007568359375, |
| "learning_rate": 8.557728611341841e-06, |
| "loss": 0.0, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.4445948995712029, |
| "grad_norm": 0.000518798828125, |
| "learning_rate": 8.550177452238919e-06, |
| "loss": 0.0, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.44685172647257954, |
| "grad_norm": 0.0037841796875, |
| "learning_rate": 8.542626293135997e-06, |
| "loss": 0.0, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.4491085533739562, |
| "grad_norm": 0.000659942626953125, |
| "learning_rate": 8.535075134033075e-06, |
| "loss": 0.0, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.4513653802753329, |
| "grad_norm": 0.000308990478515625, |
| "learning_rate": 8.527523974930152e-06, |
| "loss": 0.0, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.45362220717670954, |
| "grad_norm": 0.00167083740234375, |
| "learning_rate": 8.51997281582723e-06, |
| "loss": 0.187, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.4558790340780862, |
| "grad_norm": 0.001251220703125, |
| "learning_rate": 8.512421656724308e-06, |
| "loss": 0.7379, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.45813586097946285, |
| "grad_norm": 0.003753662109375, |
| "learning_rate": 8.504870497621386e-06, |
| "loss": 0.0, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.46039268788083954, |
| "grad_norm": 0.00179290771484375, |
| "learning_rate": 8.497319338518463e-06, |
| "loss": 0.0, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.4626495147822162, |
| "grad_norm": 0.00034332275390625, |
| "learning_rate": 8.489768179415541e-06, |
| "loss": 0.0, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.46490634168359285, |
| "grad_norm": 0.0003948211669921875, |
| "learning_rate": 8.482217020312619e-06, |
| "loss": 0.0, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.46716316858496953, |
| "grad_norm": 0.0012359619140625, |
| "learning_rate": 8.474665861209697e-06, |
| "loss": 0.0, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.4694199954863462, |
| "grad_norm": 0.000896453857421875, |
| "learning_rate": 8.467114702106774e-06, |
| "loss": 0.0, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.47167682238772285, |
| "grad_norm": 4.8125, |
| "learning_rate": 8.459563543003852e-06, |
| "loss": 0.0007, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.47393364928909953, |
| "grad_norm": 0.00017452239990234375, |
| "learning_rate": 8.45201238390093e-06, |
| "loss": 0.0, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.47619047619047616, |
| "grad_norm": 0.00037384033203125, |
| "learning_rate": 8.444461224798008e-06, |
| "loss": 0.0, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.47844730309185285, |
| "grad_norm": 0.0001983642578125, |
| "learning_rate": 8.436910065695085e-06, |
| "loss": 0.0, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.48070412999322953, |
| "grad_norm": 0.00189971923828125, |
| "learning_rate": 8.429358906592162e-06, |
| "loss": 0.0, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.48296095689460616, |
| "grad_norm": 0.0002422332763671875, |
| "learning_rate": 8.421807747489241e-06, |
| "loss": 0.0, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.48521778379598285, |
| "grad_norm": 0.000522613525390625, |
| "learning_rate": 8.414256588386319e-06, |
| "loss": 0.0, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.48747461069735953, |
| "grad_norm": 0.00021266937255859375, |
| "learning_rate": 8.406705429283396e-06, |
| "loss": 0.0, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.48973143759873616, |
| "grad_norm": 0.000537872314453125, |
| "learning_rate": 8.399154270180473e-06, |
| "loss": 0.0, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.49198826450011285, |
| "grad_norm": 0.001617431640625, |
| "learning_rate": 8.39160311107755e-06, |
| "loss": 0.6385, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.4942450914014895, |
| "grad_norm": 0.0020904541015625, |
| "learning_rate": 8.384051951974628e-06, |
| "loss": 0.0643, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.49650191830286616, |
| "grad_norm": 0.0020904541015625, |
| "learning_rate": 8.376500792871708e-06, |
| "loss": 0.0, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.49875874520424285, |
| "grad_norm": 0.25390625, |
| "learning_rate": 8.368949633768784e-06, |
| "loss": 0.0001, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.5010155721056195, |
| "grad_norm": 0.018310546875, |
| "learning_rate": 8.361398474665861e-06, |
| "loss": 0.3428, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.5032723990069962, |
| "grad_norm": 0.006622314453125, |
| "learning_rate": 8.353847315562939e-06, |
| "loss": 0.307, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.5055292259083728, |
| "grad_norm": 0.03515625, |
| "learning_rate": 8.346296156460017e-06, |
| "loss": 0.0007, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.5077860528097495, |
| "grad_norm": 0.0308837890625, |
| "learning_rate": 8.338744997357096e-06, |
| "loss": 0.0308, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.5100428797111262, |
| "grad_norm": 0.006591796875, |
| "learning_rate": 8.331193838254172e-06, |
| "loss": 0.2879, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.5122997066125028, |
| "grad_norm": 77.0, |
| "learning_rate": 8.32364267915125e-06, |
| "loss": 0.4258, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.5145565335138795, |
| "grad_norm": 0.031005859375, |
| "learning_rate": 8.316091520048328e-06, |
| "loss": 0.0502, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.5168133604152562, |
| "grad_norm": 0.515625, |
| "learning_rate": 8.308540360945406e-06, |
| "loss": 0.0001, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.5190701873166328, |
| "grad_norm": 0.01007080078125, |
| "learning_rate": 8.300989201842483e-06, |
| "loss": 0.0, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.5213270142180095, |
| "grad_norm": 0.01483154296875, |
| "learning_rate": 8.293438042739561e-06, |
| "loss": 0.3391, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.5235838411193862, |
| "grad_norm": 0.150390625, |
| "learning_rate": 8.285886883636639e-06, |
| "loss": 0.0, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.5258406680207628, |
| "grad_norm": 0.002288818359375, |
| "learning_rate": 8.278335724533717e-06, |
| "loss": 0.3255, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.5280974949221394, |
| "grad_norm": 0.00347900390625, |
| "learning_rate": 8.270784565430794e-06, |
| "loss": 0.2631, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.5303543218235162, |
| "grad_norm": 0.038330078125, |
| "learning_rate": 8.263233406327872e-06, |
| "loss": 0.0, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.5326111487248928, |
| "grad_norm": 0.01055908203125, |
| "learning_rate": 8.25568224722495e-06, |
| "loss": 0.0001, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.5348679756262694, |
| "grad_norm": 0.0040283203125, |
| "learning_rate": 8.248131088122028e-06, |
| "loss": 0.0, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.5371248025276462, |
| "grad_norm": 0.00035858154296875, |
| "learning_rate": 8.240579929019104e-06, |
| "loss": 0.0, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.5393816294290228, |
| "grad_norm": 0.001983642578125, |
| "learning_rate": 8.233028769916183e-06, |
| "loss": 0.0003, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.5416384563303994, |
| "grad_norm": 0.0002651214599609375, |
| "learning_rate": 8.225477610813261e-06, |
| "loss": 0.0, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.5438952832317762, |
| "grad_norm": 0.0009918212890625, |
| "learning_rate": 8.217926451710339e-06, |
| "loss": 0.0, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.5461521101331528, |
| "grad_norm": 0.0009613037109375, |
| "learning_rate": 8.210375292607415e-06, |
| "loss": 0.0, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.5484089370345294, |
| "grad_norm": 0.00107574462890625, |
| "learning_rate": 8.202824133504493e-06, |
| "loss": 0.0, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.5506657639359062, |
| "grad_norm": 0.0032958984375, |
| "learning_rate": 8.195272974401572e-06, |
| "loss": 0.248, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.5529225908372828, |
| "grad_norm": 0.0021209716796875, |
| "learning_rate": 8.18772181529865e-06, |
| "loss": 0.0, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.5551794177386594, |
| "grad_norm": 51.75, |
| "learning_rate": 8.180170656195728e-06, |
| "loss": 0.4205, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.5574362446400362, |
| "grad_norm": 0.005767822265625, |
| "learning_rate": 8.172619497092804e-06, |
| "loss": 0.0, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.5596930715414128, |
| "grad_norm": 0.0185546875, |
| "learning_rate": 8.165068337989881e-06, |
| "loss": 0.0, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.5619498984427894, |
| "grad_norm": 0.0118408203125, |
| "learning_rate": 8.15751717888696e-06, |
| "loss": 0.13, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.564206725344166, |
| "grad_norm": 1.375, |
| "learning_rate": 8.149966019784039e-06, |
| "loss": 0.0002, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.5664635522455428, |
| "grad_norm": 0.00433349609375, |
| "learning_rate": 8.142414860681115e-06, |
| "loss": 0.0, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.5687203791469194, |
| "grad_norm": 0.0026702880859375, |
| "learning_rate": 8.134863701578192e-06, |
| "loss": 0.0, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.570977206048296, |
| "grad_norm": 548.0, |
| "learning_rate": 8.12731254247527e-06, |
| "loss": 0.6147, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.5732340329496728, |
| "grad_norm": 0.0020904541015625, |
| "learning_rate": 8.119761383372348e-06, |
| "loss": 0.0, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.5754908598510494, |
| "grad_norm": 0.003387451171875, |
| "learning_rate": 8.112210224269426e-06, |
| "loss": 0.0, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.577747686752426, |
| "grad_norm": 0.0002536773681640625, |
| "learning_rate": 8.104659065166504e-06, |
| "loss": 0.0, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.5800045136538028, |
| "grad_norm": 0.0035552978515625, |
| "learning_rate": 8.097107906063581e-06, |
| "loss": 0.3707, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.5822613405551794, |
| "grad_norm": 0.0018157958984375, |
| "learning_rate": 8.089556746960659e-06, |
| "loss": 0.3046, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.584518167456556, |
| "grad_norm": 0.0078125, |
| "learning_rate": 8.082005587857737e-06, |
| "loss": 0.0, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.5867749943579328, |
| "grad_norm": 0.0037078857421875, |
| "learning_rate": 8.074454428754815e-06, |
| "loss": 0.3055, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.5890318212593094, |
| "grad_norm": 0.0150146484375, |
| "learning_rate": 8.066903269651892e-06, |
| "loss": 0.0, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.591288648160686, |
| "grad_norm": 0.0250244140625, |
| "learning_rate": 8.05935211054897e-06, |
| "loss": 0.0001, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.5935454750620628, |
| "grad_norm": 0.020751953125, |
| "learning_rate": 8.051800951446048e-06, |
| "loss": 0.0001, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.5958023019634394, |
| "grad_norm": 0.00372314453125, |
| "learning_rate": 8.044249792343126e-06, |
| "loss": 0.2485, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.598059128864816, |
| "grad_norm": 0.001800537109375, |
| "learning_rate": 8.036698633240203e-06, |
| "loss": 0.0, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.6003159557661928, |
| "grad_norm": 83.0, |
| "learning_rate": 8.029147474137281e-06, |
| "loss": 0.3156, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.6025727826675694, |
| "grad_norm": 0.007720947265625, |
| "learning_rate": 8.021596315034359e-06, |
| "loss": 0.0, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.604829609568946, |
| "grad_norm": 0.001373291015625, |
| "learning_rate": 8.014045155931437e-06, |
| "loss": 0.0002, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.6070864364703227, |
| "grad_norm": 0.0008087158203125, |
| "learning_rate": 8.006493996828514e-06, |
| "loss": 0.0, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.6093432633716994, |
| "grad_norm": 0.0030059814453125, |
| "learning_rate": 7.998942837725592e-06, |
| "loss": 0.0, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.611600090273076, |
| "grad_norm": 0.0016021728515625, |
| "learning_rate": 7.99139167862267e-06, |
| "loss": 0.0, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.6138569171744527, |
| "grad_norm": 0.0036468505859375, |
| "learning_rate": 7.983840519519746e-06, |
| "loss": 0.0014, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.6161137440758294, |
| "grad_norm": 0.0003185272216796875, |
| "learning_rate": 7.976289360416824e-06, |
| "loss": 0.0, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.618370570977206, |
| "grad_norm": 0.008056640625, |
| "learning_rate": 7.968738201313903e-06, |
| "loss": 0.0, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.6206273978785827, |
| "grad_norm": 0.00103759765625, |
| "learning_rate": 7.961187042210981e-06, |
| "loss": 0.0, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.6228842247799594, |
| "grad_norm": 132.0, |
| "learning_rate": 7.953635883108057e-06, |
| "loss": 0.3156, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.625141051681336, |
| "grad_norm": 0.024658203125, |
| "learning_rate": 7.946084724005135e-06, |
| "loss": 0.0, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.6273978785827127, |
| "grad_norm": 0.000629425048828125, |
| "learning_rate": 7.938533564902213e-06, |
| "loss": 0.0, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.6296547054840894, |
| "grad_norm": 0.00897216796875, |
| "learning_rate": 7.930982405799292e-06, |
| "loss": 0.0, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.631911532385466, |
| "grad_norm": 0.007354736328125, |
| "learning_rate": 7.923431246696368e-06, |
| "loss": 0.0, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.6341683592868427, |
| "grad_norm": 0.0037841796875, |
| "learning_rate": 7.915880087593446e-06, |
| "loss": 0.0001, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.6364251861882194, |
| "grad_norm": 0.0036163330078125, |
| "learning_rate": 7.908328928490524e-06, |
| "loss": 0.0, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.638682013089596, |
| "grad_norm": 0.00250244140625, |
| "learning_rate": 7.900777769387601e-06, |
| "loss": 0.0, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.6409388399909727, |
| "grad_norm": 0.00173187255859375, |
| "learning_rate": 7.893226610284679e-06, |
| "loss": 0.0, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.6431956668923493, |
| "grad_norm": 0.0004520416259765625, |
| "learning_rate": 7.885675451181757e-06, |
| "loss": 0.0, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.645452493793726, |
| "grad_norm": 0.00021457672119140625, |
| "learning_rate": 7.878124292078835e-06, |
| "loss": 0.0, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.6477093206951027, |
| "grad_norm": 0.0004405975341796875, |
| "learning_rate": 7.870573132975912e-06, |
| "loss": 0.0, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.6499661475964793, |
| "grad_norm": 0.00045013427734375, |
| "learning_rate": 7.86302197387299e-06, |
| "loss": 0.0, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.652222974497856, |
| "grad_norm": 0.000522613525390625, |
| "learning_rate": 7.855470814770068e-06, |
| "loss": 0.0, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.6544798013992327, |
| "grad_norm": 0.00029754638671875, |
| "learning_rate": 7.847919655667146e-06, |
| "loss": 0.3409, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.6567366283006093, |
| "grad_norm": 0.00372314453125, |
| "learning_rate": 7.840368496564223e-06, |
| "loss": 0.1273, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.658993455201986, |
| "grad_norm": 0.005401611328125, |
| "learning_rate": 7.832817337461301e-06, |
| "loss": 0.2191, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.6612502821033627, |
| "grad_norm": 47.25, |
| "learning_rate": 7.825266178358379e-06, |
| "loss": 0.2522, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.6635071090047393, |
| "grad_norm": 0.018310546875, |
| "learning_rate": 7.817715019255457e-06, |
| "loss": 0.0982, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.665763935906116, |
| "grad_norm": 0.001220703125, |
| "learning_rate": 7.810163860152535e-06, |
| "loss": 0.0001, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.6680207628074927, |
| "grad_norm": 0.001495361328125, |
| "learning_rate": 7.802612701049612e-06, |
| "loss": 0.1024, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.6702775897088693, |
| "grad_norm": 0.005401611328125, |
| "learning_rate": 7.795061541946688e-06, |
| "loss": 0.0001, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.672534416610246, |
| "grad_norm": 0.0035858154296875, |
| "learning_rate": 7.787510382843768e-06, |
| "loss": 0.0, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.6747912435116227, |
| "grad_norm": 0.0654296875, |
| "learning_rate": 7.779959223740846e-06, |
| "loss": 0.0, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.6770480704129993, |
| "grad_norm": 0.002838134765625, |
| "learning_rate": 7.772408064637923e-06, |
| "loss": 0.0, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.6793048973143759, |
| "grad_norm": 0.001068115234375, |
| "learning_rate": 7.764856905535e-06, |
| "loss": 0.1202, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.6815617242157527, |
| "grad_norm": 0.0078125, |
| "learning_rate": 7.757305746432077e-06, |
| "loss": 0.2662, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.6838185511171293, |
| "grad_norm": 0.002227783203125, |
| "learning_rate": 7.749754587329157e-06, |
| "loss": 0.3129, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.6860753780185059, |
| "grad_norm": 0.00093841552734375, |
| "learning_rate": 7.742203428226234e-06, |
| "loss": 0.0, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.6883322049198827, |
| "grad_norm": 0.00162506103515625, |
| "learning_rate": 7.73465226912331e-06, |
| "loss": 0.0, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.6905890318212593, |
| "grad_norm": 0.0113525390625, |
| "learning_rate": 7.727101110020388e-06, |
| "loss": 0.0, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.6928458587226359, |
| "grad_norm": 0.0042724609375, |
| "learning_rate": 7.719549950917466e-06, |
| "loss": 0.0, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.6951026856240127, |
| "grad_norm": 0.00174713134765625, |
| "learning_rate": 7.711998791814544e-06, |
| "loss": 0.0001, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.6973595125253893, |
| "grad_norm": 0.0021820068359375, |
| "learning_rate": 7.704447632711621e-06, |
| "loss": 0.0, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.6996163394267659, |
| "grad_norm": 0.00016307830810546875, |
| "learning_rate": 7.6968964736087e-06, |
| "loss": 0.0, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.7018731663281427, |
| "grad_norm": 0.0008544921875, |
| "learning_rate": 7.689345314505777e-06, |
| "loss": 0.0868, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.7041299932295193, |
| "grad_norm": 0.00162506103515625, |
| "learning_rate": 7.681794155402855e-06, |
| "loss": 0.0, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.7063868201308959, |
| "grad_norm": 0.83984375, |
| "learning_rate": 7.674242996299933e-06, |
| "loss": 0.0001, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.7086436470322727, |
| "grad_norm": 0.00021266937255859375, |
| "learning_rate": 7.66669183719701e-06, |
| "loss": 0.0, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.7109004739336493, |
| "grad_norm": 0.000926971435546875, |
| "learning_rate": 7.659140678094088e-06, |
| "loss": 0.0, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.7131573008350259, |
| "grad_norm": 2.515625, |
| "learning_rate": 7.651589518991166e-06, |
| "loss": 0.0004, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.7154141277364027, |
| "grad_norm": 0.0029144287109375, |
| "learning_rate": 7.644038359888244e-06, |
| "loss": 0.0, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.7176709546377793, |
| "grad_norm": 62.5, |
| "learning_rate": 7.636487200785321e-06, |
| "loss": 0.3018, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.7199277815391559, |
| "grad_norm": 0.0005950927734375, |
| "learning_rate": 7.628936041682399e-06, |
| "loss": 0.0, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.7221846084405326, |
| "grad_norm": 0.00054168701171875, |
| "learning_rate": 7.621384882579477e-06, |
| "loss": 0.0, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.7244414353419093, |
| "grad_norm": 0.00701904296875, |
| "learning_rate": 7.613833723476555e-06, |
| "loss": 0.0, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.7266982622432859, |
| "grad_norm": 41.75, |
| "learning_rate": 7.6062825643736315e-06, |
| "loss": 0.2809, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.7289550891446626, |
| "grad_norm": 0.0018463134765625, |
| "learning_rate": 7.598731405270709e-06, |
| "loss": 0.0, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.7312119160460393, |
| "grad_norm": 0.0024261474609375, |
| "learning_rate": 7.591180246167788e-06, |
| "loss": 0.0, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.7334687429474159, |
| "grad_norm": 0.0032501220703125, |
| "learning_rate": 7.583629087064866e-06, |
| "loss": 0.0, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.7357255698487926, |
| "grad_norm": 0.0016326904296875, |
| "learning_rate": 7.5760779279619426e-06, |
| "loss": 0.0, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.7379823967501693, |
| "grad_norm": 0.000629425048828125, |
| "learning_rate": 7.56852676885902e-06, |
| "loss": 0.0, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.7402392236515459, |
| "grad_norm": 0.0022735595703125, |
| "learning_rate": 7.560975609756098e-06, |
| "loss": 0.0, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.7424960505529226, |
| "grad_norm": 0.00193023681640625, |
| "learning_rate": 7.553424450653176e-06, |
| "loss": 0.0, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.7447528774542993, |
| "grad_norm": 0.00144195556640625, |
| "learning_rate": 7.545873291550253e-06, |
| "loss": 0.4039, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.7470097043556759, |
| "grad_norm": 0.006134033203125, |
| "learning_rate": 7.538322132447331e-06, |
| "loss": 0.0, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.7492665312570526, |
| "grad_norm": 0.0037384033203125, |
| "learning_rate": 7.530770973344409e-06, |
| "loss": 0.0, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.7515233581584293, |
| "grad_norm": 0.0028228759765625, |
| "learning_rate": 7.523219814241487e-06, |
| "loss": 0.0, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.7537801850598059, |
| "grad_norm": 0.0038604736328125, |
| "learning_rate": 7.515668655138565e-06, |
| "loss": 0.0, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.7560370119611826, |
| "grad_norm": 0.00151824951171875, |
| "learning_rate": 7.5081174960356416e-06, |
| "loss": 0.0, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.7582938388625592, |
| "grad_norm": 0.0002689361572265625, |
| "learning_rate": 7.500566336932719e-06, |
| "loss": 0.0, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.7605506657639359, |
| "grad_norm": 0.000316619873046875, |
| "learning_rate": 7.493015177829798e-06, |
| "loss": 0.0, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.7628074926653126, |
| "grad_norm": 0.00106048583984375, |
| "learning_rate": 7.485464018726876e-06, |
| "loss": 0.0, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.7650643195666892, |
| "grad_norm": 0.00186920166015625, |
| "learning_rate": 7.477912859623953e-06, |
| "loss": 0.0, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.7673211464680659, |
| "grad_norm": 0.00110626220703125, |
| "learning_rate": 7.47036170052103e-06, |
| "loss": 0.0, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.7695779733694426, |
| "grad_norm": 0.0007781982421875, |
| "learning_rate": 7.462810541418108e-06, |
| "loss": 0.0, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.7718348002708192, |
| "grad_norm": 0.000652313232421875, |
| "learning_rate": 7.455259382315187e-06, |
| "loss": 0.4836, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.7740916271721959, |
| "grad_norm": 0.002777099609375, |
| "learning_rate": 7.447708223212264e-06, |
| "loss": 0.0, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.7763484540735726, |
| "grad_norm": 0.002410888671875, |
| "learning_rate": 7.440157064109341e-06, |
| "loss": 0.0, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.7786052809749492, |
| "grad_norm": 0.0023345947265625, |
| "learning_rate": 7.432605905006419e-06, |
| "loss": 0.0, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.7808621078763259, |
| "grad_norm": 0.000885009765625, |
| "learning_rate": 7.425054745903497e-06, |
| "loss": 0.0, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.7831189347777026, |
| "grad_norm": 0.001190185546875, |
| "learning_rate": 7.417503586800574e-06, |
| "loss": 0.0, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.7853757616790792, |
| "grad_norm": 0.00164794921875, |
| "learning_rate": 7.409952427697652e-06, |
| "loss": 0.3779, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.7876325885804559, |
| "grad_norm": 0.0024871826171875, |
| "learning_rate": 7.40240126859473e-06, |
| "loss": 0.3797, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.7898894154818326, |
| "grad_norm": 0.0009002685546875, |
| "learning_rate": 7.394850109491808e-06, |
| "loss": 0.0, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.7921462423832092, |
| "grad_norm": 0.00145721435546875, |
| "learning_rate": 7.387298950388885e-06, |
| "loss": 0.0, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.7944030692845859, |
| "grad_norm": 0.00035858154296875, |
| "learning_rate": 7.379747791285963e-06, |
| "loss": 0.3298, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.7966598961859626, |
| "grad_norm": 0.000583648681640625, |
| "learning_rate": 7.37219663218304e-06, |
| "loss": 0.0, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.7989167230873392, |
| "grad_norm": 0.0028228759765625, |
| "learning_rate": 7.364645473080119e-06, |
| "loss": 0.187, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.8011735499887158, |
| "grad_norm": 0.01263427734375, |
| "learning_rate": 7.357094313977197e-06, |
| "loss": 0.2544, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.8034303768900926, |
| "grad_norm": 0.01226806640625, |
| "learning_rate": 7.349543154874274e-06, |
| "loss": 0.0001, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.8056872037914692, |
| "grad_norm": 0.00079345703125, |
| "learning_rate": 7.3419919957713514e-06, |
| "loss": 0.0, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.8079440306928458, |
| "grad_norm": 0.291015625, |
| "learning_rate": 7.334440836668429e-06, |
| "loss": 0.0001, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.8102008575942226, |
| "grad_norm": 0.0021514892578125, |
| "learning_rate": 7.326889677565508e-06, |
| "loss": 0.0, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.8124576844955992, |
| "grad_norm": 0.0004634857177734375, |
| "learning_rate": 7.319338518462584e-06, |
| "loss": 0.0, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.8147145113969758, |
| "grad_norm": 0.001800537109375, |
| "learning_rate": 7.3117873593596625e-06, |
| "loss": 0.0, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.8169713382983526, |
| "grad_norm": 0.0024871826171875, |
| "learning_rate": 7.30423620025674e-06, |
| "loss": 0.0, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.8192281651997292, |
| "grad_norm": 0.00119781494140625, |
| "learning_rate": 7.296685041153818e-06, |
| "loss": 0.0, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.8214849921011058, |
| "grad_norm": 0.000629425048828125, |
| "learning_rate": 7.289133882050895e-06, |
| "loss": 0.0, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.8237418190024826, |
| "grad_norm": 0.000797271728515625, |
| "learning_rate": 7.281582722947973e-06, |
| "loss": 0.3255, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.8259986459038592, |
| "grad_norm": 0.0008392333984375, |
| "learning_rate": 7.274031563845051e-06, |
| "loss": 0.0036, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.8282554728052358, |
| "grad_norm": 0.00164794921875, |
| "learning_rate": 7.266480404742129e-06, |
| "loss": 0.0, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.8305122997066126, |
| "grad_norm": 0.00012874603271484375, |
| "learning_rate": 7.258929245639206e-06, |
| "loss": 0.0, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.8327691266079892, |
| "grad_norm": 0.000461578369140625, |
| "learning_rate": 7.251378086536284e-06, |
| "loss": 0.3065, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.8350259535093658, |
| "grad_norm": 57.75, |
| "learning_rate": 7.2438269274333615e-06, |
| "loss": 0.3243, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.8372827804107424, |
| "grad_norm": 0.0020599365234375, |
| "learning_rate": 7.236275768330439e-06, |
| "loss": 0.0, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.8395396073121192, |
| "grad_norm": 0.0155029296875, |
| "learning_rate": 7.228724609227516e-06, |
| "loss": 0.0, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.8417964342134958, |
| "grad_norm": 0.005401611328125, |
| "learning_rate": 7.221173450124595e-06, |
| "loss": 0.0, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.8440532611148724, |
| "grad_norm": 0.0025634765625, |
| "learning_rate": 7.2136222910216725e-06, |
| "loss": 0.0, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.8463100880162492, |
| "grad_norm": 0.003143310546875, |
| "learning_rate": 7.20607113191875e-06, |
| "loss": 0.0, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.8485669149176258, |
| "grad_norm": 0.00994873046875, |
| "learning_rate": 7.198519972815828e-06, |
| "loss": 0.0, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.8508237418190024, |
| "grad_norm": 0.0191650390625, |
| "learning_rate": 7.190968813712905e-06, |
| "loss": 0.0, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.8530805687203792, |
| "grad_norm": 0.00024318695068359375, |
| "learning_rate": 7.1834176546099836e-06, |
| "loss": 0.0001, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.8553373956217558, |
| "grad_norm": 0.0035247802734375, |
| "learning_rate": 7.175866495507061e-06, |
| "loss": 0.208, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.8575942225231324, |
| "grad_norm": 0.0002269744873046875, |
| "learning_rate": 7.168315336404139e-06, |
| "loss": 0.0, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.8598510494245092, |
| "grad_norm": 0.00250244140625, |
| "learning_rate": 7.160764177301216e-06, |
| "loss": 0.3297, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.8621078763258858, |
| "grad_norm": 0.00150299072265625, |
| "learning_rate": 7.153213018198294e-06, |
| "loss": 0.0, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.8643647032272624, |
| "grad_norm": 0.00274658203125, |
| "learning_rate": 7.1456618590953715e-06, |
| "loss": 0.0, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.8666215301286392, |
| "grad_norm": 0.00262451171875, |
| "learning_rate": 7.13811069999245e-06, |
| "loss": 0.0033, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.8688783570300158, |
| "grad_norm": 0.002197265625, |
| "learning_rate": 7.130559540889527e-06, |
| "loss": 0.0441, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.8711351839313924, |
| "grad_norm": 0.031982421875, |
| "learning_rate": 7.123008381786605e-06, |
| "loss": 0.0, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.8733920108327691, |
| "grad_norm": 0.00469970703125, |
| "learning_rate": 7.1154572226836826e-06, |
| "loss": 0.0, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.8756488377341458, |
| "grad_norm": 0.00164794921875, |
| "learning_rate": 7.10790606358076e-06, |
| "loss": 0.0, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.8779056646355224, |
| "grad_norm": 0.00125885009765625, |
| "learning_rate": 7.100354904477837e-06, |
| "loss": 0.0, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.8801624915368991, |
| "grad_norm": 0.003814697265625, |
| "learning_rate": 7.092803745374916e-06, |
| "loss": 0.0, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.8824193184382758, |
| "grad_norm": 0.01007080078125, |
| "learning_rate": 7.085252586271994e-06, |
| "loss": 0.0, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.8846761453396524, |
| "grad_norm": 0.000579833984375, |
| "learning_rate": 7.077701427169071e-06, |
| "loss": 0.0, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.8869329722410291, |
| "grad_norm": 0.00848388671875, |
| "learning_rate": 7.070150268066148e-06, |
| "loss": 0.0, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.8891897991424058, |
| "grad_norm": 0.00054931640625, |
| "learning_rate": 7.062599108963226e-06, |
| "loss": 0.0, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.8914466260437824, |
| "grad_norm": 0.000835418701171875, |
| "learning_rate": 7.055047949860304e-06, |
| "loss": 0.0, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.8937034529451591, |
| "grad_norm": 0.00048828125, |
| "learning_rate": 7.047496790757382e-06, |
| "loss": 0.0, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.8959602798465358, |
| "grad_norm": 0.00023365020751953125, |
| "learning_rate": 7.039945631654459e-06, |
| "loss": 0.0, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.8982171067479124, |
| "grad_norm": 0.00335693359375, |
| "learning_rate": 7.032394472551537e-06, |
| "loss": 0.0, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.9004739336492891, |
| "grad_norm": 0.00014400482177734375, |
| "learning_rate": 7.024843313448615e-06, |
| "loss": 0.0, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.9027307605506658, |
| "grad_norm": 0.0002613067626953125, |
| "learning_rate": 7.017292154345693e-06, |
| "loss": 0.0, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.9049875874520424, |
| "grad_norm": 0.00043487548828125, |
| "learning_rate": 7.009740995242771e-06, |
| "loss": 0.0, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.9072444143534191, |
| "grad_norm": 0.318359375, |
| "learning_rate": 7.002189836139847e-06, |
| "loss": 0.0, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.9095012412547958, |
| "grad_norm": 0.00089263916015625, |
| "learning_rate": 6.994638677036926e-06, |
| "loss": 0.0, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.9117580681561724, |
| "grad_norm": 0.000247955322265625, |
| "learning_rate": 6.987087517934004e-06, |
| "loss": 0.0, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.9140148950575491, |
| "grad_norm": 0.00011873245239257812, |
| "learning_rate": 6.979536358831081e-06, |
| "loss": 0.0, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.9162717219589257, |
| "grad_norm": 0.00018978118896484375, |
| "learning_rate": 6.971985199728158e-06, |
| "loss": 0.0, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.9185285488603024, |
| "grad_norm": 0.0001049041748046875, |
| "learning_rate": 6.964434040625236e-06, |
| "loss": 0.3925, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.9207853757616791, |
| "grad_norm": 0.0002918243408203125, |
| "learning_rate": 6.956882881522315e-06, |
| "loss": 0.0, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.9230422026630557, |
| "grad_norm": 0.000255584716796875, |
| "learning_rate": 6.9493317224193925e-06, |
| "loss": 0.0, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.9252990295644324, |
| "grad_norm": 0.0001773834228515625, |
| "learning_rate": 6.941780563316469e-06, |
| "loss": 0.0, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.9275558564658091, |
| "grad_norm": 0.0087890625, |
| "learning_rate": 6.934229404213547e-06, |
| "loss": 0.3115, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.9298126833671857, |
| "grad_norm": 0.00093841552734375, |
| "learning_rate": 6.926678245110625e-06, |
| "loss": 0.086, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.9320695102685624, |
| "grad_norm": 0.000865936279296875, |
| "learning_rate": 6.9191270860077035e-06, |
| "loss": 0.0488, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.9343263371699391, |
| "grad_norm": 0.00144195556640625, |
| "learning_rate": 6.9115759269047796e-06, |
| "loss": 0.0, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.9365831640713157, |
| "grad_norm": 0.00823974609375, |
| "learning_rate": 6.904024767801858e-06, |
| "loss": 0.0, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.9388399909726924, |
| "grad_norm": 0.001922607421875, |
| "learning_rate": 6.896473608698936e-06, |
| "loss": 0.0, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.9410968178740691, |
| "grad_norm": 0.00102996826171875, |
| "learning_rate": 6.888922449596014e-06, |
| "loss": 0.0, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.9433536447754457, |
| "grad_norm": 0.000972747802734375, |
| "learning_rate": 6.881371290493091e-06, |
| "loss": 0.0, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.9456104716768224, |
| "grad_norm": 53.5, |
| "learning_rate": 6.873820131390168e-06, |
| "loss": 0.1112, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.9478672985781991, |
| "grad_norm": 0.00390625, |
| "learning_rate": 6.866268972287247e-06, |
| "loss": 0.4043, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.9501241254795757, |
| "grad_norm": 0.0021514892578125, |
| "learning_rate": 6.858717813184325e-06, |
| "loss": 0.0001, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.9523809523809523, |
| "grad_norm": 0.00074005126953125, |
| "learning_rate": 6.8511666540814025e-06, |
| "loss": 0.3612, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.9546377792823291, |
| "grad_norm": 0.002044677734375, |
| "learning_rate": 6.843615494978479e-06, |
| "loss": 0.0, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.9568946061837057, |
| "grad_norm": 0.0120849609375, |
| "learning_rate": 6.836064335875557e-06, |
| "loss": 0.2291, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.9591514330850823, |
| "grad_norm": 0.004669189453125, |
| "learning_rate": 6.828513176772635e-06, |
| "loss": 0.0, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.9614082599864591, |
| "grad_norm": 0.00494384765625, |
| "learning_rate": 6.8209620176697136e-06, |
| "loss": 0.0, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.9636650868878357, |
| "grad_norm": 0.00072479248046875, |
| "learning_rate": 6.8134108585667905e-06, |
| "loss": 0.0, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.9659219137892123, |
| "grad_norm": 0.005584716796875, |
| "learning_rate": 6.805859699463868e-06, |
| "loss": 0.0, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.9681787406905891, |
| "grad_norm": 0.0017242431640625, |
| "learning_rate": 6.798308540360946e-06, |
| "loss": 0.0, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.9704355675919657, |
| "grad_norm": 0.005035400390625, |
| "learning_rate": 6.790757381258024e-06, |
| "loss": 0.0, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.9726923944933423, |
| "grad_norm": 0.001739501953125, |
| "learning_rate": 6.783206222155101e-06, |
| "loss": 0.2294, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.9749492213947191, |
| "grad_norm": 0.000492095947265625, |
| "learning_rate": 6.775655063052179e-06, |
| "loss": 0.0, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.9772060482960957, |
| "grad_norm": 0.004486083984375, |
| "learning_rate": 6.768103903949257e-06, |
| "loss": 0.0, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.9794628751974723, |
| "grad_norm": 0.000896453857421875, |
| "learning_rate": 6.760552744846335e-06, |
| "loss": 0.0, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.9817197020988491, |
| "grad_norm": 0.01055908203125, |
| "learning_rate": 6.753001585743412e-06, |
| "loss": 0.0, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.9839765290002257, |
| "grad_norm": 0.00104522705078125, |
| "learning_rate": 6.7454504266404895e-06, |
| "loss": 0.1874, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.9862333559016023, |
| "grad_norm": 0.00069427490234375, |
| "learning_rate": 6.737899267537567e-06, |
| "loss": 0.0005, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.988490182802979, |
| "grad_norm": 0.030517578125, |
| "learning_rate": 6.730348108434646e-06, |
| "loss": 0.0, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.9907470097043557, |
| "grad_norm": 0.028076171875, |
| "learning_rate": 6.722796949331723e-06, |
| "loss": 0.0001, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.9930038366057323, |
| "grad_norm": 0.007171630859375, |
| "learning_rate": 6.7152457902288005e-06, |
| "loss": 0.0, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.995260663507109, |
| "grad_norm": 0.00023746490478515625, |
| "learning_rate": 6.707694631125878e-06, |
| "loss": 0.0, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.9975174904084857, |
| "grad_norm": 0.00061798095703125, |
| "learning_rate": 6.700143472022956e-06, |
| "loss": 0.0, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.9997743173098623, |
| "grad_norm": 0.01312255859375, |
| "learning_rate": 6.692592312920035e-06, |
| "loss": 0.0, |
| "step": 4430 |
| }, |
| { |
| "epoch": 1.002031144211239, |
| "grad_norm": 0.00026702880859375, |
| "learning_rate": 6.6850411538171116e-06, |
| "loss": 0.0, |
| "step": 4440 |
| }, |
| { |
| "epoch": 1.0042879711126156, |
| "grad_norm": 0.0025634765625, |
| "learning_rate": 6.677489994714189e-06, |
| "loss": 0.0, |
| "step": 4450 |
| }, |
| { |
| "epoch": 1.0065447980139923, |
| "grad_norm": 0.0003719329833984375, |
| "learning_rate": 6.669938835611267e-06, |
| "loss": 0.02, |
| "step": 4460 |
| }, |
| { |
| "epoch": 1.008801624915369, |
| "grad_norm": 0.003082275390625, |
| "learning_rate": 6.662387676508345e-06, |
| "loss": 0.0, |
| "step": 4470 |
| }, |
| { |
| "epoch": 1.0110584518167456, |
| "grad_norm": 0.00017070770263671875, |
| "learning_rate": 6.654836517405422e-06, |
| "loss": 0.0, |
| "step": 4480 |
| }, |
| { |
| "epoch": 1.0133152787181223, |
| "grad_norm": 0.00022220611572265625, |
| "learning_rate": 6.6472853583024995e-06, |
| "loss": 0.0, |
| "step": 4490 |
| }, |
| { |
| "epoch": 1.015572105619499, |
| "grad_norm": 0.0004119873046875, |
| "learning_rate": 6.639734199199578e-06, |
| "loss": 0.0, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.0178289325208756, |
| "grad_norm": 0.000156402587890625, |
| "learning_rate": 6.632183040096656e-06, |
| "loss": 0.0, |
| "step": 4510 |
| }, |
| { |
| "epoch": 1.0200857594222523, |
| "grad_norm": 152.0, |
| "learning_rate": 6.624631880993733e-06, |
| "loss": 0.1668, |
| "step": 4520 |
| }, |
| { |
| "epoch": 1.022342586323629, |
| "grad_norm": 0.0002727508544921875, |
| "learning_rate": 6.6170807218908106e-06, |
| "loss": 0.0, |
| "step": 4530 |
| }, |
| { |
| "epoch": 1.0245994132250056, |
| "grad_norm": 0.000118255615234375, |
| "learning_rate": 6.609529562787888e-06, |
| "loss": 0.0, |
| "step": 4540 |
| }, |
| { |
| "epoch": 1.0268562401263823, |
| "grad_norm": 0.000286102294921875, |
| "learning_rate": 6.601978403684967e-06, |
| "loss": 0.0, |
| "step": 4550 |
| }, |
| { |
| "epoch": 1.029113067027759, |
| "grad_norm": 0.0001544952392578125, |
| "learning_rate": 6.594427244582044e-06, |
| "loss": 0.0, |
| "step": 4560 |
| }, |
| { |
| "epoch": 1.0313698939291356, |
| "grad_norm": 0.000286102294921875, |
| "learning_rate": 6.586876085479122e-06, |
| "loss": 0.0, |
| "step": 4570 |
| }, |
| { |
| "epoch": 1.0336267208305123, |
| "grad_norm": 0.00054168701171875, |
| "learning_rate": 6.579324926376199e-06, |
| "loss": 0.0, |
| "step": 4580 |
| }, |
| { |
| "epoch": 1.035883547731889, |
| "grad_norm": 0.003662109375, |
| "learning_rate": 6.571773767273277e-06, |
| "loss": 0.0, |
| "step": 4590 |
| }, |
| { |
| "epoch": 1.0381403746332656, |
| "grad_norm": 0.000576019287109375, |
| "learning_rate": 6.564222608170354e-06, |
| "loss": 0.0, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.0403972015346423, |
| "grad_norm": 0.00018310546875, |
| "learning_rate": 6.556671449067432e-06, |
| "loss": 0.0, |
| "step": 4610 |
| }, |
| { |
| "epoch": 1.042654028436019, |
| "grad_norm": 0.00083160400390625, |
| "learning_rate": 6.54912028996451e-06, |
| "loss": 0.0, |
| "step": 4620 |
| }, |
| { |
| "epoch": 1.0449108553373956, |
| "grad_norm": 0.0004596710205078125, |
| "learning_rate": 6.541569130861588e-06, |
| "loss": 0.0, |
| "step": 4630 |
| }, |
| { |
| "epoch": 1.0471676822387723, |
| "grad_norm": 0.0021820068359375, |
| "learning_rate": 6.534017971758666e-06, |
| "loss": 0.0, |
| "step": 4640 |
| }, |
| { |
| "epoch": 1.0494245091401488, |
| "grad_norm": 0.00077056884765625, |
| "learning_rate": 6.526466812655743e-06, |
| "loss": 0.0, |
| "step": 4650 |
| }, |
| { |
| "epoch": 1.0516813360415256, |
| "grad_norm": 0.0002460479736328125, |
| "learning_rate": 6.518915653552821e-06, |
| "loss": 0.0, |
| "step": 4660 |
| }, |
| { |
| "epoch": 1.0539381629429023, |
| "grad_norm": 48.25, |
| "learning_rate": 6.511364494449899e-06, |
| "loss": 0.268, |
| "step": 4670 |
| }, |
| { |
| "epoch": 1.0561949898442788, |
| "grad_norm": 0.00084686279296875, |
| "learning_rate": 6.503813335346977e-06, |
| "loss": 0.2907, |
| "step": 4680 |
| }, |
| { |
| "epoch": 1.0584518167456556, |
| "grad_norm": 0.00946044921875, |
| "learning_rate": 6.496262176244054e-06, |
| "loss": 0.5606, |
| "step": 4690 |
| }, |
| { |
| "epoch": 1.0607086436470323, |
| "grad_norm": 0.001953125, |
| "learning_rate": 6.488711017141132e-06, |
| "loss": 0.0, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.0629654705484088, |
| "grad_norm": 0.0115966796875, |
| "learning_rate": 6.481159858038209e-06, |
| "loss": 0.0, |
| "step": 4710 |
| }, |
| { |
| "epoch": 1.0652222974497856, |
| "grad_norm": 0.05419921875, |
| "learning_rate": 6.473608698935287e-06, |
| "loss": 0.0771, |
| "step": 4720 |
| }, |
| { |
| "epoch": 1.0674791243511623, |
| "grad_norm": 0.002838134765625, |
| "learning_rate": 6.466057539832364e-06, |
| "loss": 0.0, |
| "step": 4730 |
| }, |
| { |
| "epoch": 1.0697359512525388, |
| "grad_norm": 0.002593994140625, |
| "learning_rate": 6.458506380729443e-06, |
| "loss": 0.0, |
| "step": 4740 |
| }, |
| { |
| "epoch": 1.0719927781539156, |
| "grad_norm": 0.0021820068359375, |
| "learning_rate": 6.4509552216265204e-06, |
| "loss": 0.0, |
| "step": 4750 |
| }, |
| { |
| "epoch": 1.0742496050552923, |
| "grad_norm": 0.0012664794921875, |
| "learning_rate": 6.443404062523598e-06, |
| "loss": 0.0, |
| "step": 4760 |
| }, |
| { |
| "epoch": 1.0765064319566688, |
| "grad_norm": 0.024169921875, |
| "learning_rate": 6.435852903420675e-06, |
| "loss": 0.336, |
| "step": 4770 |
| }, |
| { |
| "epoch": 1.0787632588580456, |
| "grad_norm": 0.0072021484375, |
| "learning_rate": 6.428301744317753e-06, |
| "loss": 0.1798, |
| "step": 4780 |
| }, |
| { |
| "epoch": 1.0810200857594223, |
| "grad_norm": 0.00128936767578125, |
| "learning_rate": 6.4207505852148315e-06, |
| "loss": 0.0, |
| "step": 4790 |
| }, |
| { |
| "epoch": 1.0832769126607988, |
| "grad_norm": 0.0050048828125, |
| "learning_rate": 6.413199426111909e-06, |
| "loss": 0.0, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.0855337395621756, |
| "grad_norm": 0.006744384765625, |
| "learning_rate": 6.405648267008986e-06, |
| "loss": 0.0, |
| "step": 4810 |
| }, |
| { |
| "epoch": 1.0877905664635523, |
| "grad_norm": 0.00164031982421875, |
| "learning_rate": 6.398097107906064e-06, |
| "loss": 0.0, |
| "step": 4820 |
| }, |
| { |
| "epoch": 1.0900473933649288, |
| "grad_norm": 0.0177001953125, |
| "learning_rate": 6.390545948803142e-06, |
| "loss": 0.0, |
| "step": 4830 |
| }, |
| { |
| "epoch": 1.0923042202663056, |
| "grad_norm": 0.00015544891357421875, |
| "learning_rate": 6.3829947897002194e-06, |
| "loss": 0.0, |
| "step": 4840 |
| }, |
| { |
| "epoch": 1.0945610471676823, |
| "grad_norm": 0.000179290771484375, |
| "learning_rate": 6.375443630597298e-06, |
| "loss": 0.0, |
| "step": 4850 |
| }, |
| { |
| "epoch": 1.0968178740690588, |
| "grad_norm": 0.005828857421875, |
| "learning_rate": 6.367892471494375e-06, |
| "loss": 0.0, |
| "step": 4860 |
| }, |
| { |
| "epoch": 1.0990747009704356, |
| "grad_norm": 0.00131988525390625, |
| "learning_rate": 6.360341312391453e-06, |
| "loss": 0.0, |
| "step": 4870 |
| }, |
| { |
| "epoch": 1.1013315278718123, |
| "grad_norm": 0.00098419189453125, |
| "learning_rate": 6.3527901532885305e-06, |
| "loss": 0.0, |
| "step": 4880 |
| }, |
| { |
| "epoch": 1.1035883547731888, |
| "grad_norm": 0.000156402587890625, |
| "learning_rate": 6.345238994185608e-06, |
| "loss": 0.0, |
| "step": 4890 |
| }, |
| { |
| "epoch": 1.1058451816745656, |
| "grad_norm": 0.00141143798828125, |
| "learning_rate": 6.337687835082685e-06, |
| "loss": 0.0, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.1081020085759423, |
| "grad_norm": 0.00164031982421875, |
| "learning_rate": 6.330136675979763e-06, |
| "loss": 0.2685, |
| "step": 4910 |
| }, |
| { |
| "epoch": 1.1103588354773188, |
| "grad_norm": 0.0014190673828125, |
| "learning_rate": 6.3225855168768415e-06, |
| "loss": 0.0, |
| "step": 4920 |
| }, |
| { |
| "epoch": 1.1126156623786956, |
| "grad_norm": 0.000972747802734375, |
| "learning_rate": 6.315034357773919e-06, |
| "loss": 0.1327, |
| "step": 4930 |
| }, |
| { |
| "epoch": 1.1148724892800723, |
| "grad_norm": 0.26171875, |
| "learning_rate": 6.307483198670996e-06, |
| "loss": 0.0, |
| "step": 4940 |
| }, |
| { |
| "epoch": 1.1171293161814488, |
| "grad_norm": 0.002227783203125, |
| "learning_rate": 6.299932039568074e-06, |
| "loss": 0.0, |
| "step": 4950 |
| }, |
| { |
| "epoch": 1.1193861430828256, |
| "grad_norm": 0.0145263671875, |
| "learning_rate": 6.292380880465152e-06, |
| "loss": 0.0, |
| "step": 4960 |
| }, |
| { |
| "epoch": 1.1216429699842023, |
| "grad_norm": 0.00238037109375, |
| "learning_rate": 6.28482972136223e-06, |
| "loss": 0.0, |
| "step": 4970 |
| }, |
| { |
| "epoch": 1.1238997968855788, |
| "grad_norm": 0.00031280517578125, |
| "learning_rate": 6.277278562259307e-06, |
| "loss": 0.0, |
| "step": 4980 |
| }, |
| { |
| "epoch": 1.1261566237869556, |
| "grad_norm": 0.0038604736328125, |
| "learning_rate": 6.269727403156385e-06, |
| "loss": 0.0, |
| "step": 4990 |
| }, |
| { |
| "epoch": 1.128413450688332, |
| "grad_norm": 0.002532958984375, |
| "learning_rate": 6.262176244053463e-06, |
| "loss": 0.0, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.1306702775897088, |
| "grad_norm": 0.000244140625, |
| "learning_rate": 6.2546250849505405e-06, |
| "loss": 0.0, |
| "step": 5010 |
| }, |
| { |
| "epoch": 1.1329271044910856, |
| "grad_norm": 0.004119873046875, |
| "learning_rate": 6.2470739258476174e-06, |
| "loss": 0.3443, |
| "step": 5020 |
| }, |
| { |
| "epoch": 1.1351839313924623, |
| "grad_norm": 0.002685546875, |
| "learning_rate": 6.239522766744695e-06, |
| "loss": 0.0, |
| "step": 5030 |
| }, |
| { |
| "epoch": 1.1374407582938388, |
| "grad_norm": 0.05810546875, |
| "learning_rate": 6.231971607641774e-06, |
| "loss": 0.0, |
| "step": 5040 |
| }, |
| { |
| "epoch": 1.1396975851952156, |
| "grad_norm": 0.01239013671875, |
| "learning_rate": 6.2244204485388516e-06, |
| "loss": 0.0, |
| "step": 5050 |
| }, |
| { |
| "epoch": 1.141954412096592, |
| "grad_norm": 0.0234375, |
| "learning_rate": 6.2168692894359285e-06, |
| "loss": 0.3027, |
| "step": 5060 |
| }, |
| { |
| "epoch": 1.1442112389979688, |
| "grad_norm": 0.00051116943359375, |
| "learning_rate": 6.209318130333006e-06, |
| "loss": 0.0, |
| "step": 5070 |
| }, |
| { |
| "epoch": 1.1464680658993456, |
| "grad_norm": 0.000598907470703125, |
| "learning_rate": 6.201766971230084e-06, |
| "loss": 0.0, |
| "step": 5080 |
| }, |
| { |
| "epoch": 1.148724892800722, |
| "grad_norm": 0.0147705078125, |
| "learning_rate": 6.194215812127163e-06, |
| "loss": 0.0, |
| "step": 5090 |
| }, |
| { |
| "epoch": 1.1509817197020988, |
| "grad_norm": 0.005218505859375, |
| "learning_rate": 6.18666465302424e-06, |
| "loss": 0.0, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.1532385466034756, |
| "grad_norm": 0.00250244140625, |
| "learning_rate": 6.179113493921317e-06, |
| "loss": 0.0, |
| "step": 5110 |
| }, |
| { |
| "epoch": 1.155495373504852, |
| "grad_norm": 0.0002765655517578125, |
| "learning_rate": 6.171562334818395e-06, |
| "loss": 0.0, |
| "step": 5120 |
| }, |
| { |
| "epoch": 1.1577522004062288, |
| "grad_norm": 0.000728607177734375, |
| "learning_rate": 6.164011175715473e-06, |
| "loss": 0.0, |
| "step": 5130 |
| }, |
| { |
| "epoch": 1.1600090273076056, |
| "grad_norm": 0.00830078125, |
| "learning_rate": 6.1564600166125506e-06, |
| "loss": 0.1798, |
| "step": 5140 |
| }, |
| { |
| "epoch": 1.162265854208982, |
| "grad_norm": 0.000766754150390625, |
| "learning_rate": 6.1489088575096275e-06, |
| "loss": 0.0, |
| "step": 5150 |
| }, |
| { |
| "epoch": 1.1645226811103588, |
| "grad_norm": 0.0004215240478515625, |
| "learning_rate": 6.141357698406706e-06, |
| "loss": 0.0, |
| "step": 5160 |
| }, |
| { |
| "epoch": 1.1667795080117356, |
| "grad_norm": 0.0034637451171875, |
| "learning_rate": 6.133806539303784e-06, |
| "loss": 0.0, |
| "step": 5170 |
| }, |
| { |
| "epoch": 1.169036334913112, |
| "grad_norm": 8.0108642578125e-05, |
| "learning_rate": 6.126255380200862e-06, |
| "loss": 0.0, |
| "step": 5180 |
| }, |
| { |
| "epoch": 1.1712931618144888, |
| "grad_norm": 0.00191497802734375, |
| "learning_rate": 6.1187042210979385e-06, |
| "loss": 0.0, |
| "step": 5190 |
| }, |
| { |
| "epoch": 1.1735499887158656, |
| "grad_norm": 0.0003147125244140625, |
| "learning_rate": 6.111153061995016e-06, |
| "loss": 0.0, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.175806815617242, |
| "grad_norm": 0.005126953125, |
| "learning_rate": 6.103601902892095e-06, |
| "loss": 0.0, |
| "step": 5210 |
| }, |
| { |
| "epoch": 1.1780636425186188, |
| "grad_norm": 0.00104522705078125, |
| "learning_rate": 6.096050743789173e-06, |
| "loss": 0.0, |
| "step": 5220 |
| }, |
| { |
| "epoch": 1.1803204694199956, |
| "grad_norm": 0.00109100341796875, |
| "learning_rate": 6.0884995846862496e-06, |
| "loss": 0.098, |
| "step": 5230 |
| }, |
| { |
| "epoch": 1.182577296321372, |
| "grad_norm": 0.0028228759765625, |
| "learning_rate": 6.080948425583327e-06, |
| "loss": 0.0, |
| "step": 5240 |
| }, |
| { |
| "epoch": 1.1848341232227488, |
| "grad_norm": 0.0003376007080078125, |
| "learning_rate": 6.073397266480405e-06, |
| "loss": 0.0, |
| "step": 5250 |
| }, |
| { |
| "epoch": 1.1870909501241256, |
| "grad_norm": 0.0022430419921875, |
| "learning_rate": 6.065846107377483e-06, |
| "loss": 0.0, |
| "step": 5260 |
| }, |
| { |
| "epoch": 1.189347777025502, |
| "grad_norm": 0.003082275390625, |
| "learning_rate": 6.05829494827456e-06, |
| "loss": 0.2679, |
| "step": 5270 |
| }, |
| { |
| "epoch": 1.1916046039268788, |
| "grad_norm": 0.0012359619140625, |
| "learning_rate": 6.050743789171638e-06, |
| "loss": 0.122, |
| "step": 5280 |
| }, |
| { |
| "epoch": 1.1938614308282554, |
| "grad_norm": 0.000385284423828125, |
| "learning_rate": 6.043192630068716e-06, |
| "loss": 0.0, |
| "step": 5290 |
| }, |
| { |
| "epoch": 1.196118257729632, |
| "grad_norm": 0.00018596649169921875, |
| "learning_rate": 6.035641470965794e-06, |
| "loss": 0.0, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.1983750846310088, |
| "grad_norm": 0.002349853515625, |
| "learning_rate": 6.028090311862872e-06, |
| "loss": 0.0, |
| "step": 5310 |
| }, |
| { |
| "epoch": 1.2006319115323856, |
| "grad_norm": 0.000621795654296875, |
| "learning_rate": 6.0205391527599486e-06, |
| "loss": 0.0, |
| "step": 5320 |
| }, |
| { |
| "epoch": 1.202888738433762, |
| "grad_norm": 0.001739501953125, |
| "learning_rate": 6.012987993657027e-06, |
| "loss": 0.0, |
| "step": 5330 |
| }, |
| { |
| "epoch": 1.2051455653351388, |
| "grad_norm": 0.00019931793212890625, |
| "learning_rate": 6.005436834554105e-06, |
| "loss": 0.0, |
| "step": 5340 |
| }, |
| { |
| "epoch": 1.2074023922365154, |
| "grad_norm": 0.00186920166015625, |
| "learning_rate": 5.997885675451183e-06, |
| "loss": 0.1819, |
| "step": 5350 |
| }, |
| { |
| "epoch": 1.209659219137892, |
| "grad_norm": 0.00061798095703125, |
| "learning_rate": 5.99033451634826e-06, |
| "loss": 0.0, |
| "step": 5360 |
| }, |
| { |
| "epoch": 1.2119160460392688, |
| "grad_norm": 0.0018157958984375, |
| "learning_rate": 5.982783357245337e-06, |
| "loss": 0.0, |
| "step": 5370 |
| }, |
| { |
| "epoch": 1.2141728729406456, |
| "grad_norm": 0.01904296875, |
| "learning_rate": 5.975232198142415e-06, |
| "loss": 0.0, |
| "step": 5380 |
| }, |
| { |
| "epoch": 1.216429699842022, |
| "grad_norm": 0.0014190673828125, |
| "learning_rate": 5.967681039039494e-06, |
| "loss": 0.0, |
| "step": 5390 |
| }, |
| { |
| "epoch": 1.2186865267433988, |
| "grad_norm": 0.000553131103515625, |
| "learning_rate": 5.960129879936571e-06, |
| "loss": 0.0, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.2209433536447754, |
| "grad_norm": 0.000858306884765625, |
| "learning_rate": 5.952578720833648e-06, |
| "loss": 0.0, |
| "step": 5410 |
| }, |
| { |
| "epoch": 1.223200180546152, |
| "grad_norm": 0.001678466796875, |
| "learning_rate": 5.945027561730726e-06, |
| "loss": 0.6368, |
| "step": 5420 |
| }, |
| { |
| "epoch": 1.2254570074475288, |
| "grad_norm": 0.0004119873046875, |
| "learning_rate": 5.937476402627804e-06, |
| "loss": 0.0676, |
| "step": 5430 |
| }, |
| { |
| "epoch": 1.2277138343489054, |
| "grad_norm": 0.005126953125, |
| "learning_rate": 5.929925243524881e-06, |
| "loss": 0.1247, |
| "step": 5440 |
| }, |
| { |
| "epoch": 1.229970661250282, |
| "grad_norm": 0.000949859619140625, |
| "learning_rate": 5.9223740844219595e-06, |
| "loss": 0.0, |
| "step": 5450 |
| }, |
| { |
| "epoch": 1.2322274881516588, |
| "grad_norm": 0.00148773193359375, |
| "learning_rate": 5.914822925319037e-06, |
| "loss": 0.0, |
| "step": 5460 |
| }, |
| { |
| "epoch": 1.2344843150530354, |
| "grad_norm": 0.0020904541015625, |
| "learning_rate": 5.907271766216115e-06, |
| "loss": 0.0, |
| "step": 5470 |
| }, |
| { |
| "epoch": 1.236741141954412, |
| "grad_norm": 0.000865936279296875, |
| "learning_rate": 5.899720607113192e-06, |
| "loss": 0.0, |
| "step": 5480 |
| }, |
| { |
| "epoch": 1.2389979688557888, |
| "grad_norm": 0.0021820068359375, |
| "learning_rate": 5.89216944801027e-06, |
| "loss": 0.0, |
| "step": 5490 |
| }, |
| { |
| "epoch": 1.2412547957571654, |
| "grad_norm": 0.003570556640625, |
| "learning_rate": 5.884618288907347e-06, |
| "loss": 0.0, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.243511622658542, |
| "grad_norm": 0.000560760498046875, |
| "learning_rate": 5.877067129804426e-06, |
| "loss": 0.2991, |
| "step": 5510 |
| }, |
| { |
| "epoch": 1.2457684495599188, |
| "grad_norm": 0.0027008056640625, |
| "learning_rate": 5.869515970701504e-06, |
| "loss": 0.0, |
| "step": 5520 |
| }, |
| { |
| "epoch": 1.2480252764612954, |
| "grad_norm": 0.003997802734375, |
| "learning_rate": 5.861964811598581e-06, |
| "loss": 0.2871, |
| "step": 5530 |
| }, |
| { |
| "epoch": 1.250282103362672, |
| "grad_norm": 0.0020751953125, |
| "learning_rate": 5.8544136524956585e-06, |
| "loss": 0.0, |
| "step": 5540 |
| }, |
| { |
| "epoch": 1.2525389302640488, |
| "grad_norm": 0.000579833984375, |
| "learning_rate": 5.846862493392736e-06, |
| "loss": 0.0, |
| "step": 5550 |
| }, |
| { |
| "epoch": 1.2547957571654254, |
| "grad_norm": 0.004119873046875, |
| "learning_rate": 5.839311334289815e-06, |
| "loss": 0.0, |
| "step": 5560 |
| }, |
| { |
| "epoch": 1.257052584066802, |
| "grad_norm": 0.00054931640625, |
| "learning_rate": 5.831760175186891e-06, |
| "loss": 0.0001, |
| "step": 5570 |
| }, |
| { |
| "epoch": 1.2593094109681786, |
| "grad_norm": 0.00170135498046875, |
| "learning_rate": 5.8242090160839695e-06, |
| "loss": 0.1847, |
| "step": 5580 |
| }, |
| { |
| "epoch": 1.2615662378695554, |
| "grad_norm": 0.000247955322265625, |
| "learning_rate": 5.816657856981047e-06, |
| "loss": 0.0, |
| "step": 5590 |
| }, |
| { |
| "epoch": 1.263823064770932, |
| "grad_norm": 0.0002956390380859375, |
| "learning_rate": 5.809106697878125e-06, |
| "loss": 0.0, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.2660798916723088, |
| "grad_norm": 0.001983642578125, |
| "learning_rate": 5.801555538775202e-06, |
| "loss": 0.0, |
| "step": 5610 |
| }, |
| { |
| "epoch": 1.2683367185736854, |
| "grad_norm": 0.00022792816162109375, |
| "learning_rate": 5.79400437967228e-06, |
| "loss": 0.0008, |
| "step": 5620 |
| }, |
| { |
| "epoch": 1.270593545475062, |
| "grad_norm": 0.000247955322265625, |
| "learning_rate": 5.786453220569358e-06, |
| "loss": 0.0, |
| "step": 5630 |
| }, |
| { |
| "epoch": 1.2728503723764386, |
| "grad_norm": 0.00080108642578125, |
| "learning_rate": 5.778902061466436e-06, |
| "loss": 0.0, |
| "step": 5640 |
| }, |
| { |
| "epoch": 1.2751071992778154, |
| "grad_norm": 0.0022430419921875, |
| "learning_rate": 5.771350902363513e-06, |
| "loss": 0.2461, |
| "step": 5650 |
| }, |
| { |
| "epoch": 1.277364026179192, |
| "grad_norm": 0.000720977783203125, |
| "learning_rate": 5.763799743260591e-06, |
| "loss": 0.0, |
| "step": 5660 |
| }, |
| { |
| "epoch": 1.2796208530805688, |
| "grad_norm": 0.0001697540283203125, |
| "learning_rate": 5.7562485841576685e-06, |
| "loss": 0.0, |
| "step": 5670 |
| }, |
| { |
| "epoch": 1.2818776799819454, |
| "grad_norm": 0.0003223419189453125, |
| "learning_rate": 5.748697425054747e-06, |
| "loss": 0.0, |
| "step": 5680 |
| }, |
| { |
| "epoch": 1.284134506883322, |
| "grad_norm": 0.00067138671875, |
| "learning_rate": 5.741146265951823e-06, |
| "loss": 0.0, |
| "step": 5690 |
| }, |
| { |
| "epoch": 1.2863913337846986, |
| "grad_norm": 0.000133514404296875, |
| "learning_rate": 5.733595106848902e-06, |
| "loss": 0.0, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.2886481606860754, |
| "grad_norm": 0.0006103515625, |
| "learning_rate": 5.7260439477459796e-06, |
| "loss": 0.0, |
| "step": 5710 |
| }, |
| { |
| "epoch": 1.290904987587452, |
| "grad_norm": 0.01220703125, |
| "learning_rate": 5.718492788643057e-06, |
| "loss": 0.0, |
| "step": 5720 |
| }, |
| { |
| "epoch": 1.2931618144888288, |
| "grad_norm": 0.0005645751953125, |
| "learning_rate": 5.710941629540135e-06, |
| "loss": 0.0, |
| "step": 5730 |
| }, |
| { |
| "epoch": 1.2954186413902053, |
| "grad_norm": 0.00152587890625, |
| "learning_rate": 5.703390470437212e-06, |
| "loss": 0.0, |
| "step": 5740 |
| }, |
| { |
| "epoch": 1.297675468291582, |
| "grad_norm": 0.00170135498046875, |
| "learning_rate": 5.695839311334291e-06, |
| "loss": 0.0, |
| "step": 5750 |
| }, |
| { |
| "epoch": 1.2999322951929586, |
| "grad_norm": 0.0004100799560546875, |
| "learning_rate": 5.688288152231368e-06, |
| "loss": 0.3257, |
| "step": 5760 |
| }, |
| { |
| "epoch": 1.3021891220943353, |
| "grad_norm": 0.000667572021484375, |
| "learning_rate": 5.680736993128446e-06, |
| "loss": 0.0, |
| "step": 5770 |
| }, |
| { |
| "epoch": 1.304445948995712, |
| "grad_norm": 0.0002956390380859375, |
| "learning_rate": 5.673185834025523e-06, |
| "loss": 0.0, |
| "step": 5780 |
| }, |
| { |
| "epoch": 1.3067027758970888, |
| "grad_norm": 9.918212890625e-05, |
| "learning_rate": 5.665634674922601e-06, |
| "loss": 0.0, |
| "step": 5790 |
| }, |
| { |
| "epoch": 1.3089596027984653, |
| "grad_norm": 0.000293731689453125, |
| "learning_rate": 5.6580835158196786e-06, |
| "loss": 0.0, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.311216429699842, |
| "grad_norm": 70.5, |
| "learning_rate": 5.650532356716757e-06, |
| "loss": 0.1552, |
| "step": 5810 |
| }, |
| { |
| "epoch": 1.3134732566012186, |
| "grad_norm": 0.00022029876708984375, |
| "learning_rate": 5.642981197613834e-06, |
| "loss": 0.2409, |
| "step": 5820 |
| }, |
| { |
| "epoch": 1.3157300835025953, |
| "grad_norm": 0.0005340576171875, |
| "learning_rate": 5.635430038510912e-06, |
| "loss": 0.0, |
| "step": 5830 |
| }, |
| { |
| "epoch": 1.317986910403972, |
| "grad_norm": 0.00165557861328125, |
| "learning_rate": 5.62787887940799e-06, |
| "loss": 0.4549, |
| "step": 5840 |
| }, |
| { |
| "epoch": 1.3202437373053486, |
| "grad_norm": 0.000545501708984375, |
| "learning_rate": 5.620327720305067e-06, |
| "loss": 0.0, |
| "step": 5850 |
| }, |
| { |
| "epoch": 1.3225005642067253, |
| "grad_norm": 0.000202178955078125, |
| "learning_rate": 5.612776561202144e-06, |
| "loss": 0.1561, |
| "step": 5860 |
| }, |
| { |
| "epoch": 1.324757391108102, |
| "grad_norm": 0.0022125244140625, |
| "learning_rate": 5.605225402099223e-06, |
| "loss": 0.0, |
| "step": 5870 |
| }, |
| { |
| "epoch": 1.3270142180094786, |
| "grad_norm": 0.001617431640625, |
| "learning_rate": 5.597674242996301e-06, |
| "loss": 0.0, |
| "step": 5880 |
| }, |
| { |
| "epoch": 1.3292710449108553, |
| "grad_norm": 0.0001316070556640625, |
| "learning_rate": 5.590123083893378e-06, |
| "loss": 0.0, |
| "step": 5890 |
| }, |
| { |
| "epoch": 1.331527871812232, |
| "grad_norm": 0.00150299072265625, |
| "learning_rate": 5.582571924790455e-06, |
| "loss": 0.0, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.3337846987136086, |
| "grad_norm": 0.0024261474609375, |
| "learning_rate": 5.575020765687533e-06, |
| "loss": 0.0, |
| "step": 5910 |
| }, |
| { |
| "epoch": 1.3360415256149853, |
| "grad_norm": 0.000370025634765625, |
| "learning_rate": 5.567469606584611e-06, |
| "loss": 0.1382, |
| "step": 5920 |
| }, |
| { |
| "epoch": 1.3382983525163619, |
| "grad_norm": 0.002410888671875, |
| "learning_rate": 5.5599184474816894e-06, |
| "loss": 0.0, |
| "step": 5930 |
| }, |
| { |
| "epoch": 1.3405551794177386, |
| "grad_norm": 0.00010395050048828125, |
| "learning_rate": 5.552367288378766e-06, |
| "loss": 0.0, |
| "step": 5940 |
| }, |
| { |
| "epoch": 1.3428120063191153, |
| "grad_norm": 0.00049591064453125, |
| "learning_rate": 5.544816129275844e-06, |
| "loss": 0.0, |
| "step": 5950 |
| }, |
| { |
| "epoch": 1.345068833220492, |
| "grad_norm": 0.0003814697265625, |
| "learning_rate": 5.537264970172922e-06, |
| "loss": 0.0, |
| "step": 5960 |
| }, |
| { |
| "epoch": 1.3473256601218686, |
| "grad_norm": 0.0228271484375, |
| "learning_rate": 5.52971381107e-06, |
| "loss": 0.0036, |
| "step": 5970 |
| }, |
| { |
| "epoch": 1.3495824870232453, |
| "grad_norm": 0.000568389892578125, |
| "learning_rate": 5.522162651967078e-06, |
| "loss": 0.3318, |
| "step": 5980 |
| }, |
| { |
| "epoch": 1.3518393139246219, |
| "grad_norm": 0.000804901123046875, |
| "learning_rate": 5.514611492864155e-06, |
| "loss": 0.1936, |
| "step": 5990 |
| }, |
| { |
| "epoch": 1.3540961408259986, |
| "grad_norm": 0.0015869140625, |
| "learning_rate": 5.507060333761233e-06, |
| "loss": 0.0, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.3563529677273753, |
| "grad_norm": 0.00189971923828125, |
| "learning_rate": 5.499509174658311e-06, |
| "loss": 0.0, |
| "step": 6010 |
| }, |
| { |
| "epoch": 1.358609794628752, |
| "grad_norm": 0.000518798828125, |
| "learning_rate": 5.4919580155553884e-06, |
| "loss": 0.2387, |
| "step": 6020 |
| }, |
| { |
| "epoch": 1.3608666215301286, |
| "grad_norm": 0.00323486328125, |
| "learning_rate": 5.484406856452465e-06, |
| "loss": 0.0003, |
| "step": 6030 |
| }, |
| { |
| "epoch": 1.3631234484315053, |
| "grad_norm": 0.0181884765625, |
| "learning_rate": 5.476855697349543e-06, |
| "loss": 0.0, |
| "step": 6040 |
| }, |
| { |
| "epoch": 1.3653802753328819, |
| "grad_norm": 0.000217437744140625, |
| "learning_rate": 5.469304538246622e-06, |
| "loss": 0.0, |
| "step": 6050 |
| }, |
| { |
| "epoch": 1.3676371022342586, |
| "grad_norm": 0.0008087158203125, |
| "learning_rate": 5.4617533791436995e-06, |
| "loss": 0.0, |
| "step": 6060 |
| }, |
| { |
| "epoch": 1.3698939291356353, |
| "grad_norm": 0.001922607421875, |
| "learning_rate": 5.454202220040776e-06, |
| "loss": 0.0, |
| "step": 6070 |
| }, |
| { |
| "epoch": 1.372150756037012, |
| "grad_norm": 0.00183868408203125, |
| "learning_rate": 5.446651060937854e-06, |
| "loss": 0.0, |
| "step": 6080 |
| }, |
| { |
| "epoch": 1.3744075829383886, |
| "grad_norm": 0.00193023681640625, |
| "learning_rate": 5.439099901834932e-06, |
| "loss": 0.0, |
| "step": 6090 |
| }, |
| { |
| "epoch": 1.3766644098397653, |
| "grad_norm": 0.007476806640625, |
| "learning_rate": 5.4315487427320105e-06, |
| "loss": 0.0, |
| "step": 6100 |
| }, |
| { |
| "epoch": 1.3789212367411419, |
| "grad_norm": 0.000171661376953125, |
| "learning_rate": 5.423997583629087e-06, |
| "loss": 0.0082, |
| "step": 6110 |
| }, |
| { |
| "epoch": 1.3811780636425186, |
| "grad_norm": 0.00201416015625, |
| "learning_rate": 5.416446424526165e-06, |
| "loss": 0.0, |
| "step": 6120 |
| }, |
| { |
| "epoch": 1.3834348905438953, |
| "grad_norm": 0.0010223388671875, |
| "learning_rate": 5.408895265423243e-06, |
| "loss": 0.0, |
| "step": 6130 |
| }, |
| { |
| "epoch": 1.3856917174452719, |
| "grad_norm": 0.0030517578125, |
| "learning_rate": 5.401344106320321e-06, |
| "loss": 0.0, |
| "step": 6140 |
| }, |
| { |
| "epoch": 1.3879485443466486, |
| "grad_norm": 0.00070953369140625, |
| "learning_rate": 5.393792947217398e-06, |
| "loss": 0.0, |
| "step": 6150 |
| }, |
| { |
| "epoch": 1.3902053712480253, |
| "grad_norm": 0.0004482269287109375, |
| "learning_rate": 5.386241788114475e-06, |
| "loss": 0.0, |
| "step": 6160 |
| }, |
| { |
| "epoch": 1.3924621981494019, |
| "grad_norm": 0.00118255615234375, |
| "learning_rate": 5.378690629011554e-06, |
| "loss": 0.3508, |
| "step": 6170 |
| }, |
| { |
| "epoch": 1.3947190250507786, |
| "grad_norm": 0.0012664794921875, |
| "learning_rate": 5.371139469908632e-06, |
| "loss": 0.2107, |
| "step": 6180 |
| }, |
| { |
| "epoch": 1.3969758519521553, |
| "grad_norm": 0.00125885009765625, |
| "learning_rate": 5.3635883108057095e-06, |
| "loss": 0.0, |
| "step": 6190 |
| }, |
| { |
| "epoch": 1.3992326788535319, |
| "grad_norm": 0.000713348388671875, |
| "learning_rate": 5.3560371517027864e-06, |
| "loss": 0.2159, |
| "step": 6200 |
| }, |
| { |
| "epoch": 1.4014895057549086, |
| "grad_norm": 0.00090789794921875, |
| "learning_rate": 5.348485992599864e-06, |
| "loss": 0.0, |
| "step": 6210 |
| }, |
| { |
| "epoch": 1.4037463326562853, |
| "grad_norm": 0.00982666015625, |
| "learning_rate": 5.340934833496943e-06, |
| "loss": 0.2689, |
| "step": 6220 |
| }, |
| { |
| "epoch": 1.4060031595576619, |
| "grad_norm": 0.01019287109375, |
| "learning_rate": 5.3333836743940206e-06, |
| "loss": 0.0, |
| "step": 6230 |
| }, |
| { |
| "epoch": 1.4082599864590386, |
| "grad_norm": 0.02392578125, |
| "learning_rate": 5.3258325152910975e-06, |
| "loss": 0.0001, |
| "step": 6240 |
| }, |
| { |
| "epoch": 1.4105168133604153, |
| "grad_norm": 0.0023345947265625, |
| "learning_rate": 5.318281356188175e-06, |
| "loss": 0.0001, |
| "step": 6250 |
| }, |
| { |
| "epoch": 1.4127736402617919, |
| "grad_norm": 0.000885009765625, |
| "learning_rate": 5.310730197085253e-06, |
| "loss": 0.1802, |
| "step": 6260 |
| }, |
| { |
| "epoch": 1.4150304671631686, |
| "grad_norm": 0.0024566650390625, |
| "learning_rate": 5.303179037982331e-06, |
| "loss": 0.0, |
| "step": 6270 |
| }, |
| { |
| "epoch": 1.4172872940645451, |
| "grad_norm": 0.00579833984375, |
| "learning_rate": 5.295627878879408e-06, |
| "loss": 0.0, |
| "step": 6280 |
| }, |
| { |
| "epoch": 1.4195441209659219, |
| "grad_norm": 0.0103759765625, |
| "learning_rate": 5.288076719776486e-06, |
| "loss": 0.0, |
| "step": 6290 |
| }, |
| { |
| "epoch": 1.4218009478672986, |
| "grad_norm": 0.000530242919921875, |
| "learning_rate": 5.280525560673564e-06, |
| "loss": 0.0, |
| "step": 6300 |
| }, |
| { |
| "epoch": 1.4240577747686753, |
| "grad_norm": 0.000827789306640625, |
| "learning_rate": 5.272974401570642e-06, |
| "loss": 0.0, |
| "step": 6310 |
| }, |
| { |
| "epoch": 1.4263146016700519, |
| "grad_norm": 0.00836181640625, |
| "learning_rate": 5.265423242467719e-06, |
| "loss": 0.0, |
| "step": 6320 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 0.0013427734375, |
| "learning_rate": 5.2578720833647965e-06, |
| "loss": 0.0, |
| "step": 6330 |
| }, |
| { |
| "epoch": 1.4308282554728051, |
| "grad_norm": 0.001434326171875, |
| "learning_rate": 5.250320924261875e-06, |
| "loss": 0.0, |
| "step": 6340 |
| }, |
| { |
| "epoch": 1.4330850823741819, |
| "grad_norm": 0.00025177001953125, |
| "learning_rate": 5.242769765158953e-06, |
| "loss": 0.0, |
| "step": 6350 |
| }, |
| { |
| "epoch": 1.4353419092755586, |
| "grad_norm": 78.0, |
| "learning_rate": 5.23521860605603e-06, |
| "loss": 0.4103, |
| "step": 6360 |
| }, |
| { |
| "epoch": 1.4375987361769353, |
| "grad_norm": 0.0024871826171875, |
| "learning_rate": 5.2276674469531075e-06, |
| "loss": 0.0, |
| "step": 6370 |
| }, |
| { |
| "epoch": 1.4398555630783119, |
| "grad_norm": 0.000316619873046875, |
| "learning_rate": 5.220116287850185e-06, |
| "loss": 0.0, |
| "step": 6380 |
| }, |
| { |
| "epoch": 1.4421123899796886, |
| "grad_norm": 0.002777099609375, |
| "learning_rate": 5.212565128747263e-06, |
| "loss": 0.0, |
| "step": 6390 |
| }, |
| { |
| "epoch": 1.4443692168810651, |
| "grad_norm": 0.006439208984375, |
| "learning_rate": 5.205013969644342e-06, |
| "loss": 0.0, |
| "step": 6400 |
| }, |
| { |
| "epoch": 1.4466260437824419, |
| "grad_norm": 0.00030517578125, |
| "learning_rate": 5.1974628105414186e-06, |
| "loss": 0.0, |
| "step": 6410 |
| }, |
| { |
| "epoch": 1.4488828706838186, |
| "grad_norm": 0.003997802734375, |
| "learning_rate": 5.189911651438496e-06, |
| "loss": 0.0, |
| "step": 6420 |
| }, |
| { |
| "epoch": 1.4511396975851953, |
| "grad_norm": 0.000629425048828125, |
| "learning_rate": 5.182360492335574e-06, |
| "loss": 0.0002, |
| "step": 6430 |
| }, |
| { |
| "epoch": 1.4533965244865719, |
| "grad_norm": 0.00159454345703125, |
| "learning_rate": 5.174809333232652e-06, |
| "loss": 0.2868, |
| "step": 6440 |
| }, |
| { |
| "epoch": 1.4556533513879486, |
| "grad_norm": 0.00095367431640625, |
| "learning_rate": 5.167258174129729e-06, |
| "loss": 0.0, |
| "step": 6450 |
| }, |
| { |
| "epoch": 1.4579101782893251, |
| "grad_norm": 0.0003032684326171875, |
| "learning_rate": 5.1597070150268065e-06, |
| "loss": 0.0, |
| "step": 6460 |
| }, |
| { |
| "epoch": 1.4601670051907019, |
| "grad_norm": 0.00159454345703125, |
| "learning_rate": 5.152155855923885e-06, |
| "loss": 0.0, |
| "step": 6470 |
| }, |
| { |
| "epoch": 1.4624238320920786, |
| "grad_norm": 0.00213623046875, |
| "learning_rate": 5.144604696820963e-06, |
| "loss": 0.0653, |
| "step": 6480 |
| }, |
| { |
| "epoch": 1.4646806589934551, |
| "grad_norm": 0.0011749267578125, |
| "learning_rate": 5.13705353771804e-06, |
| "loss": 0.0, |
| "step": 6490 |
| }, |
| { |
| "epoch": 1.4669374858948319, |
| "grad_norm": 0.01177978515625, |
| "learning_rate": 5.1295023786151176e-06, |
| "loss": 0.0, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.4691943127962086, |
| "grad_norm": 0.0001354217529296875, |
| "learning_rate": 5.121951219512195e-06, |
| "loss": 0.0, |
| "step": 6510 |
| }, |
| { |
| "epoch": 1.4714511396975851, |
| "grad_norm": 0.00017833709716796875, |
| "learning_rate": 5.114400060409274e-06, |
| "loss": 0.0, |
| "step": 6520 |
| }, |
| { |
| "epoch": 1.4737079665989619, |
| "grad_norm": 0.0006561279296875, |
| "learning_rate": 5.106848901306351e-06, |
| "loss": 0.0, |
| "step": 6530 |
| }, |
| { |
| "epoch": 1.4759647935003386, |
| "grad_norm": 0.000286102294921875, |
| "learning_rate": 5.099297742203429e-06, |
| "loss": 0.0, |
| "step": 6540 |
| }, |
| { |
| "epoch": 1.4782216204017151, |
| "grad_norm": 0.00026702880859375, |
| "learning_rate": 5.091746583100506e-06, |
| "loss": 0.0, |
| "step": 6550 |
| }, |
| { |
| "epoch": 1.4804784473030919, |
| "grad_norm": 0.01361083984375, |
| "learning_rate": 5.084195423997584e-06, |
| "loss": 0.0, |
| "step": 6560 |
| }, |
| { |
| "epoch": 1.4827352742044684, |
| "grad_norm": 0.0001583099365234375, |
| "learning_rate": 5.076644264894661e-06, |
| "loss": 0.0592, |
| "step": 6570 |
| }, |
| { |
| "epoch": 1.4849921011058451, |
| "grad_norm": 0.00131988525390625, |
| "learning_rate": 5.069093105791739e-06, |
| "loss": 0.0, |
| "step": 6580 |
| }, |
| { |
| "epoch": 1.4872489280072219, |
| "grad_norm": 0.000667572021484375, |
| "learning_rate": 5.061541946688817e-06, |
| "loss": 0.0, |
| "step": 6590 |
| }, |
| { |
| "epoch": 1.4895057549085986, |
| "grad_norm": 0.000759124755859375, |
| "learning_rate": 5.053990787585895e-06, |
| "loss": 0.0, |
| "step": 6600 |
| }, |
| { |
| "epoch": 1.4917625818099751, |
| "grad_norm": 0.000255584716796875, |
| "learning_rate": 5.046439628482973e-06, |
| "loss": 0.0364, |
| "step": 6610 |
| }, |
| { |
| "epoch": 1.4940194087113519, |
| "grad_norm": 0.00023365020751953125, |
| "learning_rate": 5.03888846938005e-06, |
| "loss": 0.0, |
| "step": 6620 |
| }, |
| { |
| "epoch": 1.4962762356127284, |
| "grad_norm": 8.153915405273438e-05, |
| "learning_rate": 5.031337310277128e-06, |
| "loss": 0.0, |
| "step": 6630 |
| }, |
| { |
| "epoch": 1.4985330625141051, |
| "grad_norm": 0.0028228759765625, |
| "learning_rate": 5.023786151174206e-06, |
| "loss": 0.0, |
| "step": 6640 |
| }, |
| { |
| "epoch": 1.5007898894154819, |
| "grad_norm": 9.775161743164062e-05, |
| "learning_rate": 5.016234992071284e-06, |
| "loss": 0.0, |
| "step": 6650 |
| }, |
| { |
| "epoch": 1.5030467163168586, |
| "grad_norm": 0.000316619873046875, |
| "learning_rate": 5.008683832968361e-06, |
| "loss": 0.2175, |
| "step": 6660 |
| }, |
| { |
| "epoch": 1.5053035432182351, |
| "grad_norm": 0.0003604888916015625, |
| "learning_rate": 5.001132673865439e-06, |
| "loss": 0.0, |
| "step": 6670 |
| }, |
| { |
| "epoch": 1.5075603701196119, |
| "grad_norm": 0.000179290771484375, |
| "learning_rate": 4.993581514762516e-06, |
| "loss": 0.0, |
| "step": 6680 |
| }, |
| { |
| "epoch": 1.5098171970209884, |
| "grad_norm": 0.0028839111328125, |
| "learning_rate": 4.986030355659594e-06, |
| "loss": 0.0, |
| "step": 6690 |
| }, |
| { |
| "epoch": 1.5120740239223651, |
| "grad_norm": 0.00125885009765625, |
| "learning_rate": 4.978479196556672e-06, |
| "loss": 0.0, |
| "step": 6700 |
| }, |
| { |
| "epoch": 1.5143308508237419, |
| "grad_norm": 0.000331878662109375, |
| "learning_rate": 4.97092803745375e-06, |
| "loss": 0.0, |
| "step": 6710 |
| }, |
| { |
| "epoch": 1.5165876777251186, |
| "grad_norm": 0.000675201416015625, |
| "learning_rate": 4.9633768783508275e-06, |
| "loss": 0.0, |
| "step": 6720 |
| }, |
| { |
| "epoch": 1.5188445046264951, |
| "grad_norm": 0.00015735626220703125, |
| "learning_rate": 4.955825719247904e-06, |
| "loss": 0.0897, |
| "step": 6730 |
| }, |
| { |
| "epoch": 1.5211013315278719, |
| "grad_norm": 0.00061798095703125, |
| "learning_rate": 4.948274560144983e-06, |
| "loss": 0.0, |
| "step": 6740 |
| }, |
| { |
| "epoch": 1.5233581584292484, |
| "grad_norm": 0.0009765625, |
| "learning_rate": 4.94072340104206e-06, |
| "loss": 0.0, |
| "step": 6750 |
| }, |
| { |
| "epoch": 1.5256149853306251, |
| "grad_norm": 0.00010585784912109375, |
| "learning_rate": 4.9331722419391385e-06, |
| "loss": 0.0, |
| "step": 6760 |
| }, |
| { |
| "epoch": 1.5278718122320019, |
| "grad_norm": 0.0001087188720703125, |
| "learning_rate": 4.925621082836215e-06, |
| "loss": 0.0, |
| "step": 6770 |
| }, |
| { |
| "epoch": 1.5301286391333786, |
| "grad_norm": 0.0009307861328125, |
| "learning_rate": 4.918069923733293e-06, |
| "loss": 0.0, |
| "step": 6780 |
| }, |
| { |
| "epoch": 1.5323854660347551, |
| "grad_norm": 0.002593994140625, |
| "learning_rate": 4.910518764630371e-06, |
| "loss": 0.0, |
| "step": 6790 |
| }, |
| { |
| "epoch": 1.5346422929361316, |
| "grad_norm": 0.00048828125, |
| "learning_rate": 4.902967605527449e-06, |
| "loss": 0.0, |
| "step": 6800 |
| }, |
| { |
| "epoch": 1.5368991198375084, |
| "grad_norm": 0.00016021728515625, |
| "learning_rate": 4.8954164464245265e-06, |
| "loss": 0.0, |
| "step": 6810 |
| }, |
| { |
| "epoch": 1.5391559467388851, |
| "grad_norm": 0.00019168853759765625, |
| "learning_rate": 4.887865287321604e-06, |
| "loss": 0.6172, |
| "step": 6820 |
| }, |
| { |
| "epoch": 1.5414127736402619, |
| "grad_norm": 0.00167083740234375, |
| "learning_rate": 4.880314128218682e-06, |
| "loss": 0.0, |
| "step": 6830 |
| }, |
| { |
| "epoch": 1.5436696005416386, |
| "grad_norm": 0.0029754638671875, |
| "learning_rate": 4.87276296911576e-06, |
| "loss": 0.0, |
| "step": 6840 |
| }, |
| { |
| "epoch": 1.5459264274430151, |
| "grad_norm": 0.0010833740234375, |
| "learning_rate": 4.8652118100128375e-06, |
| "loss": 0.0, |
| "step": 6850 |
| }, |
| { |
| "epoch": 1.5481832543443916, |
| "grad_norm": 0.00012683868408203125, |
| "learning_rate": 4.857660650909915e-06, |
| "loss": 0.0, |
| "step": 6860 |
| }, |
| { |
| "epoch": 1.5504400812457684, |
| "grad_norm": 0.0010223388671875, |
| "learning_rate": 4.850109491806993e-06, |
| "loss": 0.2717, |
| "step": 6870 |
| }, |
| { |
| "epoch": 1.5526969081471451, |
| "grad_norm": 0.00341796875, |
| "learning_rate": 4.842558332704071e-06, |
| "loss": 0.0, |
| "step": 6880 |
| }, |
| { |
| "epoch": 1.5549537350485219, |
| "grad_norm": 0.002685546875, |
| "learning_rate": 4.8350071736011486e-06, |
| "loss": 0.0, |
| "step": 6890 |
| }, |
| { |
| "epoch": 1.5572105619498986, |
| "grad_norm": 9.5367431640625e-05, |
| "learning_rate": 4.8274560144982255e-06, |
| "loss": 0.0, |
| "step": 6900 |
| }, |
| { |
| "epoch": 1.5594673888512751, |
| "grad_norm": 0.0003795623779296875, |
| "learning_rate": 4.819904855395304e-06, |
| "loss": 0.0, |
| "step": 6910 |
| }, |
| { |
| "epoch": 1.5617242157526516, |
| "grad_norm": 0.00022220611572265625, |
| "learning_rate": 4.812353696292381e-06, |
| "loss": 0.0, |
| "step": 6920 |
| }, |
| { |
| "epoch": 1.5639810426540284, |
| "grad_norm": 0.00040435791015625, |
| "learning_rate": 4.804802537189459e-06, |
| "loss": 0.0, |
| "step": 6930 |
| }, |
| { |
| "epoch": 1.5662378695554051, |
| "grad_norm": 0.000690460205078125, |
| "learning_rate": 4.7972513780865365e-06, |
| "loss": 0.0, |
| "step": 6940 |
| }, |
| { |
| "epoch": 1.5684946964567819, |
| "grad_norm": 0.001983642578125, |
| "learning_rate": 4.789700218983614e-06, |
| "loss": 0.0, |
| "step": 6950 |
| }, |
| { |
| "epoch": 1.5707515233581584, |
| "grad_norm": 0.00018787384033203125, |
| "learning_rate": 4.782149059880692e-06, |
| "loss": 0.0, |
| "step": 6960 |
| }, |
| { |
| "epoch": 1.5730083502595351, |
| "grad_norm": 0.000152587890625, |
| "learning_rate": 4.77459790077777e-06, |
| "loss": 0.0, |
| "step": 6970 |
| }, |
| { |
| "epoch": 1.5752651771609116, |
| "grad_norm": 0.00014591217041015625, |
| "learning_rate": 4.7670467416748476e-06, |
| "loss": 0.0, |
| "step": 6980 |
| }, |
| { |
| "epoch": 1.5775220040622884, |
| "grad_norm": 0.00018978118896484375, |
| "learning_rate": 4.759495582571925e-06, |
| "loss": 0.0, |
| "step": 6990 |
| }, |
| { |
| "epoch": 1.5797788309636651, |
| "grad_norm": 0.000286102294921875, |
| "learning_rate": 4.751944423469002e-06, |
| "loss": 0.0, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.5820356578650419, |
| "grad_norm": 0.000293731689453125, |
| "learning_rate": 4.744393264366081e-06, |
| "loss": 0.2482, |
| "step": 7010 |
| }, |
| { |
| "epoch": 1.5842924847664184, |
| "grad_norm": 0.0005035400390625, |
| "learning_rate": 4.736842105263158e-06, |
| "loss": 0.3, |
| "step": 7020 |
| }, |
| { |
| "epoch": 1.5865493116677951, |
| "grad_norm": 0.00110626220703125, |
| "learning_rate": 4.729290946160236e-06, |
| "loss": 0.0, |
| "step": 7030 |
| }, |
| { |
| "epoch": 1.5888061385691716, |
| "grad_norm": 0.00040435791015625, |
| "learning_rate": 4.721739787057314e-06, |
| "loss": 0.0, |
| "step": 7040 |
| }, |
| { |
| "epoch": 1.5910629654705484, |
| "grad_norm": 0.00555419921875, |
| "learning_rate": 4.714188627954391e-06, |
| "loss": 0.0, |
| "step": 7050 |
| }, |
| { |
| "epoch": 1.5933197923719251, |
| "grad_norm": 0.00018215179443359375, |
| "learning_rate": 4.70663746885147e-06, |
| "loss": 0.0, |
| "step": 7060 |
| }, |
| { |
| "epoch": 1.5955766192733019, |
| "grad_norm": 10.125, |
| "learning_rate": 4.6990863097485466e-06, |
| "loss": 0.0016, |
| "step": 7070 |
| }, |
| { |
| "epoch": 1.5978334461746784, |
| "grad_norm": 0.000713348388671875, |
| "learning_rate": 4.691535150645624e-06, |
| "loss": 0.0, |
| "step": 7080 |
| }, |
| { |
| "epoch": 1.6000902730760551, |
| "grad_norm": 0.0004825592041015625, |
| "learning_rate": 4.683983991542702e-06, |
| "loss": 0.0002, |
| "step": 7090 |
| }, |
| { |
| "epoch": 1.6023470999774316, |
| "grad_norm": 0.00099945068359375, |
| "learning_rate": 4.67643283243978e-06, |
| "loss": 0.3182, |
| "step": 7100 |
| }, |
| { |
| "epoch": 1.6046039268788084, |
| "grad_norm": 0.004852294921875, |
| "learning_rate": 4.668881673336858e-06, |
| "loss": 0.0, |
| "step": 7110 |
| }, |
| { |
| "epoch": 1.6068607537801851, |
| "grad_norm": 0.000957489013671875, |
| "learning_rate": 4.661330514233935e-06, |
| "loss": 0.0001, |
| "step": 7120 |
| }, |
| { |
| "epoch": 1.6091175806815619, |
| "grad_norm": 0.0024566650390625, |
| "learning_rate": 4.653779355131013e-06, |
| "loss": 0.2413, |
| "step": 7130 |
| }, |
| { |
| "epoch": 1.6113744075829384, |
| "grad_norm": 44.25, |
| "learning_rate": 4.646228196028091e-06, |
| "loss": 0.287, |
| "step": 7140 |
| }, |
| { |
| "epoch": 1.613631234484315, |
| "grad_norm": 0.00049591064453125, |
| "learning_rate": 4.638677036925169e-06, |
| "loss": 0.2296, |
| "step": 7150 |
| }, |
| { |
| "epoch": 1.6158880613856916, |
| "grad_norm": 0.000514984130859375, |
| "learning_rate": 4.631125877822246e-06, |
| "loss": 0.0, |
| "step": 7160 |
| }, |
| { |
| "epoch": 1.6181448882870684, |
| "grad_norm": 0.0059814453125, |
| "learning_rate": 4.623574718719323e-06, |
| "loss": 0.0, |
| "step": 7170 |
| }, |
| { |
| "epoch": 1.6204017151884451, |
| "grad_norm": 0.00185394287109375, |
| "learning_rate": 4.616023559616402e-06, |
| "loss": 0.0002, |
| "step": 7180 |
| }, |
| { |
| "epoch": 1.6226585420898219, |
| "grad_norm": 0.00064849853515625, |
| "learning_rate": 4.608472400513479e-06, |
| "loss": 0.0, |
| "step": 7190 |
| }, |
| { |
| "epoch": 1.6249153689911984, |
| "grad_norm": 0.00125885009765625, |
| "learning_rate": 4.600921241410557e-06, |
| "loss": 0.0, |
| "step": 7200 |
| }, |
| { |
| "epoch": 1.627172195892575, |
| "grad_norm": 0.0007171630859375, |
| "learning_rate": 4.593370082307634e-06, |
| "loss": 0.0, |
| "step": 7210 |
| }, |
| { |
| "epoch": 1.6294290227939516, |
| "grad_norm": 0.00335693359375, |
| "learning_rate": 4.585818923204712e-06, |
| "loss": 0.0, |
| "step": 7220 |
| }, |
| { |
| "epoch": 1.6316858496953284, |
| "grad_norm": 0.0038909912109375, |
| "learning_rate": 4.57826776410179e-06, |
| "loss": 0.2573, |
| "step": 7230 |
| }, |
| { |
| "epoch": 1.6339426765967051, |
| "grad_norm": 0.0015869140625, |
| "learning_rate": 4.570716604998868e-06, |
| "loss": 0.0, |
| "step": 7240 |
| }, |
| { |
| "epoch": 1.6361995034980819, |
| "grad_norm": 0.0031585693359375, |
| "learning_rate": 4.563165445895945e-06, |
| "loss": 0.2589, |
| "step": 7250 |
| }, |
| { |
| "epoch": 1.6384563303994584, |
| "grad_norm": 0.0035552978515625, |
| "learning_rate": 4.555614286793023e-06, |
| "loss": 0.0, |
| "step": 7260 |
| }, |
| { |
| "epoch": 1.640713157300835, |
| "grad_norm": 0.020751953125, |
| "learning_rate": 4.548063127690101e-06, |
| "loss": 0.2702, |
| "step": 7270 |
| }, |
| { |
| "epoch": 1.6429699842022116, |
| "grad_norm": 0.00457763671875, |
| "learning_rate": 4.540511968587179e-06, |
| "loss": 0.0, |
| "step": 7280 |
| }, |
| { |
| "epoch": 1.6452268111035884, |
| "grad_norm": 0.0078125, |
| "learning_rate": 4.5329608094842564e-06, |
| "loss": 0.0, |
| "step": 7290 |
| }, |
| { |
| "epoch": 1.6474836380049651, |
| "grad_norm": 0.00579833984375, |
| "learning_rate": 4.525409650381334e-06, |
| "loss": 0.0, |
| "step": 7300 |
| }, |
| { |
| "epoch": 1.6497404649063416, |
| "grad_norm": 0.0003147125244140625, |
| "learning_rate": 4.517858491278412e-06, |
| "loss": 0.0, |
| "step": 7310 |
| }, |
| { |
| "epoch": 1.6519972918077184, |
| "grad_norm": 0.00286865234375, |
| "learning_rate": 4.510307332175489e-06, |
| "loss": 0.1944, |
| "step": 7320 |
| }, |
| { |
| "epoch": 1.654254118709095, |
| "grad_norm": 0.004638671875, |
| "learning_rate": 4.5027561730725675e-06, |
| "loss": 0.0, |
| "step": 7330 |
| }, |
| { |
| "epoch": 1.6565109456104716, |
| "grad_norm": 0.002685546875, |
| "learning_rate": 4.495205013969644e-06, |
| "loss": 0.0, |
| "step": 7340 |
| }, |
| { |
| "epoch": 1.6587677725118484, |
| "grad_norm": 0.00156402587890625, |
| "learning_rate": 4.487653854866722e-06, |
| "loss": 0.0, |
| "step": 7350 |
| }, |
| { |
| "epoch": 1.6610245994132251, |
| "grad_norm": 0.000568389892578125, |
| "learning_rate": 4.4801026957638e-06, |
| "loss": 0.0, |
| "step": 7360 |
| }, |
| { |
| "epoch": 1.6632814263146016, |
| "grad_norm": 0.01422119140625, |
| "learning_rate": 4.472551536660878e-06, |
| "loss": 0.0, |
| "step": 7370 |
| }, |
| { |
| "epoch": 1.6655382532159784, |
| "grad_norm": 0.0009613037109375, |
| "learning_rate": 4.4650003775579554e-06, |
| "loss": 0.0, |
| "step": 7380 |
| }, |
| { |
| "epoch": 1.6677950801173549, |
| "grad_norm": 0.00055694580078125, |
| "learning_rate": 4.457449218455033e-06, |
| "loss": 0.2759, |
| "step": 7390 |
| }, |
| { |
| "epoch": 1.6700519070187316, |
| "grad_norm": 28.0, |
| "learning_rate": 4.449898059352111e-06, |
| "loss": 0.0055, |
| "step": 7400 |
| }, |
| { |
| "epoch": 1.6723087339201084, |
| "grad_norm": 0.0010528564453125, |
| "learning_rate": 4.442346900249189e-06, |
| "loss": 0.0, |
| "step": 7410 |
| }, |
| { |
| "epoch": 1.674565560821485, |
| "grad_norm": 0.00034332275390625, |
| "learning_rate": 4.4347957411462665e-06, |
| "loss": 0.0, |
| "step": 7420 |
| }, |
| { |
| "epoch": 1.6768223877228616, |
| "grad_norm": 0.0086669921875, |
| "learning_rate": 4.427244582043344e-06, |
| "loss": 0.0757, |
| "step": 7430 |
| }, |
| { |
| "epoch": 1.6790792146242384, |
| "grad_norm": 0.00023651123046875, |
| "learning_rate": 4.419693422940421e-06, |
| "loss": 0.0, |
| "step": 7440 |
| }, |
| { |
| "epoch": 1.6813360415256149, |
| "grad_norm": 0.0011138916015625, |
| "learning_rate": 4.4121422638375e-06, |
| "loss": 0.0, |
| "step": 7450 |
| }, |
| { |
| "epoch": 1.6835928684269916, |
| "grad_norm": 0.0016021728515625, |
| "learning_rate": 4.404591104734577e-06, |
| "loss": 0.0, |
| "step": 7460 |
| }, |
| { |
| "epoch": 1.6858496953283684, |
| "grad_norm": 0.004730224609375, |
| "learning_rate": 4.3970399456316544e-06, |
| "loss": 0.2891, |
| "step": 7470 |
| }, |
| { |
| "epoch": 1.688106522229745, |
| "grad_norm": 0.00030517578125, |
| "learning_rate": 4.389488786528733e-06, |
| "loss": 0.0, |
| "step": 7480 |
| }, |
| { |
| "epoch": 1.6903633491311216, |
| "grad_norm": 0.000568389892578125, |
| "learning_rate": 4.38193762742581e-06, |
| "loss": 0.0004, |
| "step": 7490 |
| }, |
| { |
| "epoch": 1.6926201760324981, |
| "grad_norm": 0.0004596710205078125, |
| "learning_rate": 4.3743864683228886e-06, |
| "loss": 0.0, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.6948770029338749, |
| "grad_norm": 0.0003757476806640625, |
| "learning_rate": 4.3668353092199655e-06, |
| "loss": 0.6468, |
| "step": 7510 |
| }, |
| { |
| "epoch": 1.6971338298352516, |
| "grad_norm": 0.00109100341796875, |
| "learning_rate": 4.359284150117043e-06, |
| "loss": 0.0, |
| "step": 7520 |
| }, |
| { |
| "epoch": 1.6993906567366284, |
| "grad_norm": 0.000308990478515625, |
| "learning_rate": 4.351732991014121e-06, |
| "loss": 0.0, |
| "step": 7530 |
| }, |
| { |
| "epoch": 1.701647483638005, |
| "grad_norm": 0.000705718994140625, |
| "learning_rate": 4.344181831911199e-06, |
| "loss": 0.0, |
| "step": 7540 |
| }, |
| { |
| "epoch": 1.7039043105393816, |
| "grad_norm": 0.00109100341796875, |
| "learning_rate": 4.3366306728082765e-06, |
| "loss": 0.195, |
| "step": 7550 |
| }, |
| { |
| "epoch": 1.7061611374407581, |
| "grad_norm": 0.00023937225341796875, |
| "learning_rate": 4.329079513705354e-06, |
| "loss": 0.2883, |
| "step": 7560 |
| }, |
| { |
| "epoch": 1.7084179643421349, |
| "grad_norm": 0.005950927734375, |
| "learning_rate": 4.321528354602432e-06, |
| "loss": 0.0, |
| "step": 7570 |
| }, |
| { |
| "epoch": 1.7106747912435116, |
| "grad_norm": 0.001373291015625, |
| "learning_rate": 4.31397719549951e-06, |
| "loss": 0.0, |
| "step": 7580 |
| }, |
| { |
| "epoch": 1.7129316181448884, |
| "grad_norm": 0.00046539306640625, |
| "learning_rate": 4.306426036396587e-06, |
| "loss": 0.0, |
| "step": 7590 |
| }, |
| { |
| "epoch": 1.715188445046265, |
| "grad_norm": 8.6875, |
| "learning_rate": 4.298874877293665e-06, |
| "loss": 0.0005, |
| "step": 7600 |
| }, |
| { |
| "epoch": 1.7174452719476416, |
| "grad_norm": 0.004058837890625, |
| "learning_rate": 4.291323718190742e-06, |
| "loss": 0.2448, |
| "step": 7610 |
| }, |
| { |
| "epoch": 1.7197020988490181, |
| "grad_norm": 0.001678466796875, |
| "learning_rate": 4.28377255908782e-06, |
| "loss": 0.0, |
| "step": 7620 |
| }, |
| { |
| "epoch": 1.7219589257503949, |
| "grad_norm": 9.012222290039062e-05, |
| "learning_rate": 4.276221399984898e-06, |
| "loss": 0.0, |
| "step": 7630 |
| }, |
| { |
| "epoch": 1.7242157526517716, |
| "grad_norm": 0.007354736328125, |
| "learning_rate": 4.2686702408819755e-06, |
| "loss": 0.0, |
| "step": 7640 |
| }, |
| { |
| "epoch": 1.7264725795531484, |
| "grad_norm": 0.00131988525390625, |
| "learning_rate": 4.261119081779053e-06, |
| "loss": 0.0, |
| "step": 7650 |
| }, |
| { |
| "epoch": 1.7287294064545249, |
| "grad_norm": 0.0031890869140625, |
| "learning_rate": 4.253567922676131e-06, |
| "loss": 0.0, |
| "step": 7660 |
| }, |
| { |
| "epoch": 1.7309862333559016, |
| "grad_norm": 0.00174713134765625, |
| "learning_rate": 4.246016763573209e-06, |
| "loss": 0.0, |
| "step": 7670 |
| }, |
| { |
| "epoch": 1.7332430602572781, |
| "grad_norm": 0.00029754638671875, |
| "learning_rate": 4.2384656044702866e-06, |
| "loss": 0.0, |
| "step": 7680 |
| }, |
| { |
| "epoch": 1.7354998871586549, |
| "grad_norm": 0.0001964569091796875, |
| "learning_rate": 4.230914445367364e-06, |
| "loss": 0.0, |
| "step": 7690 |
| }, |
| { |
| "epoch": 1.7377567140600316, |
| "grad_norm": 0.0036163330078125, |
| "learning_rate": 4.223363286264442e-06, |
| "loss": 0.1878, |
| "step": 7700 |
| }, |
| { |
| "epoch": 1.7400135409614084, |
| "grad_norm": 0.0152587890625, |
| "learning_rate": 4.21581212716152e-06, |
| "loss": 0.0, |
| "step": 7710 |
| }, |
| { |
| "epoch": 1.7422703678627849, |
| "grad_norm": 0.041015625, |
| "learning_rate": 4.208260968058598e-06, |
| "loss": 0.0, |
| "step": 7720 |
| }, |
| { |
| "epoch": 1.7445271947641616, |
| "grad_norm": 0.000701904296875, |
| "learning_rate": 4.200709808955675e-06, |
| "loss": 0.0, |
| "step": 7730 |
| }, |
| { |
| "epoch": 1.7467840216655381, |
| "grad_norm": 0.000568389892578125, |
| "learning_rate": 4.193158649852752e-06, |
| "loss": 0.24, |
| "step": 7740 |
| }, |
| { |
| "epoch": 1.7490408485669149, |
| "grad_norm": 0.01068115234375, |
| "learning_rate": 4.185607490749831e-06, |
| "loss": 0.0, |
| "step": 7750 |
| }, |
| { |
| "epoch": 1.7512976754682916, |
| "grad_norm": 0.001220703125, |
| "learning_rate": 4.178056331646908e-06, |
| "loss": 0.2857, |
| "step": 7760 |
| }, |
| { |
| "epoch": 1.7535545023696684, |
| "grad_norm": 0.0022430419921875, |
| "learning_rate": 4.170505172543986e-06, |
| "loss": 0.0, |
| "step": 7770 |
| }, |
| { |
| "epoch": 1.7558113292710449, |
| "grad_norm": 0.00173187255859375, |
| "learning_rate": 4.162954013441063e-06, |
| "loss": 0.0001, |
| "step": 7780 |
| }, |
| { |
| "epoch": 1.7580681561724216, |
| "grad_norm": 0.002044677734375, |
| "learning_rate": 4.155402854338141e-06, |
| "loss": 0.0, |
| "step": 7790 |
| }, |
| { |
| "epoch": 1.7603249830737981, |
| "grad_norm": 0.005859375, |
| "learning_rate": 4.147851695235219e-06, |
| "loss": 0.0, |
| "step": 7800 |
| }, |
| { |
| "epoch": 1.7625818099751749, |
| "grad_norm": 0.000335693359375, |
| "learning_rate": 4.140300536132297e-06, |
| "loss": 0.0, |
| "step": 7810 |
| }, |
| { |
| "epoch": 1.7648386368765516, |
| "grad_norm": 0.003814697265625, |
| "learning_rate": 4.132749377029374e-06, |
| "loss": 0.0, |
| "step": 7820 |
| }, |
| { |
| "epoch": 1.7670954637779284, |
| "grad_norm": 0.002349853515625, |
| "learning_rate": 4.125198217926452e-06, |
| "loss": 0.0, |
| "step": 7830 |
| }, |
| { |
| "epoch": 1.7693522906793049, |
| "grad_norm": 0.00189208984375, |
| "learning_rate": 4.11764705882353e-06, |
| "loss": 0.0, |
| "step": 7840 |
| }, |
| { |
| "epoch": 1.7716091175806814, |
| "grad_norm": 0.0025482177734375, |
| "learning_rate": 4.110095899720608e-06, |
| "loss": 0.2085, |
| "step": 7850 |
| }, |
| { |
| "epoch": 1.7738659444820581, |
| "grad_norm": 0.001220703125, |
| "learning_rate": 4.1025447406176846e-06, |
| "loss": 0.0, |
| "step": 7860 |
| }, |
| { |
| "epoch": 1.7761227713834349, |
| "grad_norm": 0.000881195068359375, |
| "learning_rate": 4.094993581514763e-06, |
| "loss": 0.0, |
| "step": 7870 |
| }, |
| { |
| "epoch": 1.7783795982848116, |
| "grad_norm": 0.000736236572265625, |
| "learning_rate": 4.08744242241184e-06, |
| "loss": 0.0, |
| "step": 7880 |
| }, |
| { |
| "epoch": 1.7806364251861884, |
| "grad_norm": 0.0113525390625, |
| "learning_rate": 4.079891263308918e-06, |
| "loss": 0.0, |
| "step": 7890 |
| }, |
| { |
| "epoch": 1.7828932520875649, |
| "grad_norm": 0.00067901611328125, |
| "learning_rate": 4.072340104205996e-06, |
| "loss": 0.0, |
| "step": 7900 |
| }, |
| { |
| "epoch": 1.7851500789889414, |
| "grad_norm": 0.00138092041015625, |
| "learning_rate": 4.064788945103073e-06, |
| "loss": 0.0, |
| "step": 7910 |
| }, |
| { |
| "epoch": 1.7874069058903181, |
| "grad_norm": 0.0015106201171875, |
| "learning_rate": 4.057237786000152e-06, |
| "loss": 0.0, |
| "step": 7920 |
| }, |
| { |
| "epoch": 1.7896637327916949, |
| "grad_norm": 0.00390625, |
| "learning_rate": 4.049686626897229e-06, |
| "loss": 0.0, |
| "step": 7930 |
| }, |
| { |
| "epoch": 1.7919205596930716, |
| "grad_norm": 0.004547119140625, |
| "learning_rate": 4.042135467794307e-06, |
| "loss": 0.0, |
| "step": 7940 |
| }, |
| { |
| "epoch": 1.7941773865944484, |
| "grad_norm": 0.000423431396484375, |
| "learning_rate": 4.034584308691384e-06, |
| "loss": 0.0, |
| "step": 7950 |
| }, |
| { |
| "epoch": 1.7964342134958249, |
| "grad_norm": 0.0036468505859375, |
| "learning_rate": 4.027033149588462e-06, |
| "loss": 0.0, |
| "step": 7960 |
| }, |
| { |
| "epoch": 1.7986910403972014, |
| "grad_norm": 0.00157928466796875, |
| "learning_rate": 4.01948199048554e-06, |
| "loss": 0.051, |
| "step": 7970 |
| }, |
| { |
| "epoch": 1.8009478672985781, |
| "grad_norm": 0.0004138946533203125, |
| "learning_rate": 4.011930831382618e-06, |
| "loss": 0.0, |
| "step": 7980 |
| }, |
| { |
| "epoch": 1.8032046941999549, |
| "grad_norm": 0.0005035400390625, |
| "learning_rate": 4.0043796722796955e-06, |
| "loss": 0.0, |
| "step": 7990 |
| }, |
| { |
| "epoch": 1.8054615211013316, |
| "grad_norm": 0.0003814697265625, |
| "learning_rate": 3.996828513176773e-06, |
| "loss": 0.1119, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.8077183480027081, |
| "grad_norm": 0.0019073486328125, |
| "learning_rate": 3.98927735407385e-06, |
| "loss": 0.0, |
| "step": 8010 |
| }, |
| { |
| "epoch": 1.8099751749040849, |
| "grad_norm": 0.001129150390625, |
| "learning_rate": 3.981726194970929e-06, |
| "loss": 0.0, |
| "step": 8020 |
| }, |
| { |
| "epoch": 1.8122320018054614, |
| "grad_norm": 0.00016307830810546875, |
| "learning_rate": 3.974175035868006e-06, |
| "loss": 0.0, |
| "step": 8030 |
| }, |
| { |
| "epoch": 1.8144888287068381, |
| "grad_norm": 0.00106048583984375, |
| "learning_rate": 3.966623876765084e-06, |
| "loss": 0.0, |
| "step": 8040 |
| }, |
| { |
| "epoch": 1.8167456556082149, |
| "grad_norm": 0.001220703125, |
| "learning_rate": 3.959072717662161e-06, |
| "loss": 0.0, |
| "step": 8050 |
| }, |
| { |
| "epoch": 1.8190024825095916, |
| "grad_norm": 0.0002155303955078125, |
| "learning_rate": 3.951521558559239e-06, |
| "loss": 0.0332, |
| "step": 8060 |
| }, |
| { |
| "epoch": 1.8212593094109681, |
| "grad_norm": 0.00019741058349609375, |
| "learning_rate": 3.943970399456317e-06, |
| "loss": 0.0, |
| "step": 8070 |
| }, |
| { |
| "epoch": 1.8235161363123449, |
| "grad_norm": 0.00012063980102539062, |
| "learning_rate": 3.9364192403533945e-06, |
| "loss": 0.0, |
| "step": 8080 |
| }, |
| { |
| "epoch": 1.8257729632137214, |
| "grad_norm": 0.004180908203125, |
| "learning_rate": 3.928868081250472e-06, |
| "loss": 0.0, |
| "step": 8090 |
| }, |
| { |
| "epoch": 1.8280297901150981, |
| "grad_norm": 0.0003299713134765625, |
| "learning_rate": 3.92131692214755e-06, |
| "loss": 0.0, |
| "step": 8100 |
| }, |
| { |
| "epoch": 1.8302866170164749, |
| "grad_norm": 0.000904083251953125, |
| "learning_rate": 3.913765763044628e-06, |
| "loss": 0.0, |
| "step": 8110 |
| }, |
| { |
| "epoch": 1.8325434439178516, |
| "grad_norm": 0.0008544921875, |
| "learning_rate": 3.9062146039417055e-06, |
| "loss": 0.0, |
| "step": 8120 |
| }, |
| { |
| "epoch": 1.8348002708192281, |
| "grad_norm": 0.00022792816162109375, |
| "learning_rate": 3.898663444838783e-06, |
| "loss": 0.0, |
| "step": 8130 |
| }, |
| { |
| "epoch": 1.8370570977206049, |
| "grad_norm": 0.000637054443359375, |
| "learning_rate": 3.891112285735861e-06, |
| "loss": 0.3226, |
| "step": 8140 |
| }, |
| { |
| "epoch": 1.8393139246219814, |
| "grad_norm": 0.00665283203125, |
| "learning_rate": 3.883561126632939e-06, |
| "loss": 0.0, |
| "step": 8150 |
| }, |
| { |
| "epoch": 1.8415707515233581, |
| "grad_norm": 0.0030975341796875, |
| "learning_rate": 3.8760099675300165e-06, |
| "loss": 0.303, |
| "step": 8160 |
| }, |
| { |
| "epoch": 1.8438275784247349, |
| "grad_norm": 0.0003681182861328125, |
| "learning_rate": 3.868458808427094e-06, |
| "loss": 0.0, |
| "step": 8170 |
| }, |
| { |
| "epoch": 1.8460844053261116, |
| "grad_norm": 0.0133056640625, |
| "learning_rate": 3.860907649324171e-06, |
| "loss": 0.6187, |
| "step": 8180 |
| }, |
| { |
| "epoch": 1.8483412322274881, |
| "grad_norm": 0.0015411376953125, |
| "learning_rate": 3.85335649022125e-06, |
| "loss": 0.0, |
| "step": 8190 |
| }, |
| { |
| "epoch": 1.8505980591288647, |
| "grad_norm": 0.001708984375, |
| "learning_rate": 3.845805331118327e-06, |
| "loss": 0.0025, |
| "step": 8200 |
| }, |
| { |
| "epoch": 1.8528548860302414, |
| "grad_norm": 0.00830078125, |
| "learning_rate": 3.8382541720154045e-06, |
| "loss": 0.0, |
| "step": 8210 |
| }, |
| { |
| "epoch": 1.8551117129316181, |
| "grad_norm": 0.00390625, |
| "learning_rate": 3.830703012912482e-06, |
| "loss": 0.0, |
| "step": 8220 |
| }, |
| { |
| "epoch": 1.8573685398329949, |
| "grad_norm": 0.00098419189453125, |
| "learning_rate": 3.82315185380956e-06, |
| "loss": 0.3487, |
| "step": 8230 |
| }, |
| { |
| "epoch": 1.8596253667343716, |
| "grad_norm": 0.007049560546875, |
| "learning_rate": 3.815600694706638e-06, |
| "loss": 0.2516, |
| "step": 8240 |
| }, |
| { |
| "epoch": 1.8618821936357481, |
| "grad_norm": 0.00165557861328125, |
| "learning_rate": 3.8080495356037155e-06, |
| "loss": 0.0, |
| "step": 8250 |
| }, |
| { |
| "epoch": 1.8641390205371247, |
| "grad_norm": 0.005218505859375, |
| "learning_rate": 3.800498376500793e-06, |
| "loss": 0.0, |
| "step": 8260 |
| }, |
| { |
| "epoch": 1.8663958474385014, |
| "grad_norm": 0.00144195556640625, |
| "learning_rate": 3.792947217397871e-06, |
| "loss": 0.0305, |
| "step": 8270 |
| }, |
| { |
| "epoch": 1.8686526743398781, |
| "grad_norm": 0.00064849853515625, |
| "learning_rate": 3.7853960582949484e-06, |
| "loss": 0.1983, |
| "step": 8280 |
| }, |
| { |
| "epoch": 1.8709095012412549, |
| "grad_norm": 0.00897216796875, |
| "learning_rate": 3.777844899192026e-06, |
| "loss": 0.0, |
| "step": 8290 |
| }, |
| { |
| "epoch": 1.8731663281426316, |
| "grad_norm": 0.0009918212890625, |
| "learning_rate": 3.770293740089104e-06, |
| "loss": 0.0, |
| "step": 8300 |
| }, |
| { |
| "epoch": 1.8754231550440081, |
| "grad_norm": 0.002105712890625, |
| "learning_rate": 3.7627425809861817e-06, |
| "loss": 0.0, |
| "step": 8310 |
| }, |
| { |
| "epoch": 1.8776799819453847, |
| "grad_norm": 0.01226806640625, |
| "learning_rate": 3.755191421883259e-06, |
| "loss": 0.0, |
| "step": 8320 |
| }, |
| { |
| "epoch": 1.8799368088467614, |
| "grad_norm": 0.00830078125, |
| "learning_rate": 3.7476402627803372e-06, |
| "loss": 0.0, |
| "step": 8330 |
| }, |
| { |
| "epoch": 1.8821936357481381, |
| "grad_norm": 0.01190185546875, |
| "learning_rate": 3.7400891036774146e-06, |
| "loss": 0.0, |
| "step": 8340 |
| }, |
| { |
| "epoch": 1.8844504626495149, |
| "grad_norm": 0.0004444122314453125, |
| "learning_rate": 3.7325379445744923e-06, |
| "loss": 0.1238, |
| "step": 8350 |
| }, |
| { |
| "epoch": 1.8867072895508914, |
| "grad_norm": 0.00946044921875, |
| "learning_rate": 3.7249867854715705e-06, |
| "loss": 0.0, |
| "step": 8360 |
| }, |
| { |
| "epoch": 1.8889641164522681, |
| "grad_norm": 0.004608154296875, |
| "learning_rate": 3.717435626368648e-06, |
| "loss": 0.0, |
| "step": 8370 |
| }, |
| { |
| "epoch": 1.8912209433536447, |
| "grad_norm": 0.000576019287109375, |
| "learning_rate": 3.709884467265726e-06, |
| "loss": 0.0, |
| "step": 8380 |
| }, |
| { |
| "epoch": 1.8934777702550214, |
| "grad_norm": 0.019775390625, |
| "learning_rate": 3.7023333081628034e-06, |
| "loss": 0.0, |
| "step": 8390 |
| }, |
| { |
| "epoch": 1.8957345971563981, |
| "grad_norm": 0.001556396484375, |
| "learning_rate": 3.694782149059881e-06, |
| "loss": 0.1019, |
| "step": 8400 |
| }, |
| { |
| "epoch": 1.8979914240577749, |
| "grad_norm": 0.00089263916015625, |
| "learning_rate": 3.6872309899569585e-06, |
| "loss": 0.6805, |
| "step": 8410 |
| }, |
| { |
| "epoch": 1.9002482509591514, |
| "grad_norm": 0.0027618408203125, |
| "learning_rate": 3.6796798308540366e-06, |
| "loss": 0.2843, |
| "step": 8420 |
| }, |
| { |
| "epoch": 1.9025050778605281, |
| "grad_norm": 0.002288818359375, |
| "learning_rate": 3.672128671751114e-06, |
| "loss": 0.0, |
| "step": 8430 |
| }, |
| { |
| "epoch": 1.9047619047619047, |
| "grad_norm": 0.0033111572265625, |
| "learning_rate": 3.664577512648192e-06, |
| "loss": 0.0191, |
| "step": 8440 |
| }, |
| { |
| "epoch": 1.9070187316632814, |
| "grad_norm": 0.001129150390625, |
| "learning_rate": 3.6570263535452695e-06, |
| "loss": 0.3397, |
| "step": 8450 |
| }, |
| { |
| "epoch": 1.9092755585646581, |
| "grad_norm": 0.001190185546875, |
| "learning_rate": 3.6494751944423473e-06, |
| "loss": 0.0, |
| "step": 8460 |
| }, |
| { |
| "epoch": 1.9115323854660349, |
| "grad_norm": 0.0184326171875, |
| "learning_rate": 3.6419240353394246e-06, |
| "loss": 0.0, |
| "step": 8470 |
| }, |
| { |
| "epoch": 1.9137892123674114, |
| "grad_norm": 0.0087890625, |
| "learning_rate": 3.6343728762365028e-06, |
| "loss": 0.0, |
| "step": 8480 |
| }, |
| { |
| "epoch": 1.9160460392687881, |
| "grad_norm": 0.00193023681640625, |
| "learning_rate": 3.62682171713358e-06, |
| "loss": 0.0, |
| "step": 8490 |
| }, |
| { |
| "epoch": 1.9183028661701647, |
| "grad_norm": 0.006805419921875, |
| "learning_rate": 3.619270558030658e-06, |
| "loss": 0.0, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.9205596930715414, |
| "grad_norm": 0.0019683837890625, |
| "learning_rate": 3.6117193989277356e-06, |
| "loss": 0.0, |
| "step": 8510 |
| }, |
| { |
| "epoch": 1.9228165199729181, |
| "grad_norm": 0.0028533935546875, |
| "learning_rate": 3.6041682398248134e-06, |
| "loss": 0.0, |
| "step": 8520 |
| }, |
| { |
| "epoch": 1.9250733468742949, |
| "grad_norm": 37.5, |
| "learning_rate": 3.5966170807218907e-06, |
| "loss": 0.0852, |
| "step": 8530 |
| }, |
| { |
| "epoch": 1.9273301737756714, |
| "grad_norm": 0.00604248046875, |
| "learning_rate": 3.589065921618969e-06, |
| "loss": 0.0012, |
| "step": 8540 |
| }, |
| { |
| "epoch": 1.929587000677048, |
| "grad_norm": 0.0025787353515625, |
| "learning_rate": 3.5815147625160463e-06, |
| "loss": 0.0, |
| "step": 8550 |
| }, |
| { |
| "epoch": 1.9318438275784247, |
| "grad_norm": 0.00112152099609375, |
| "learning_rate": 3.573963603413124e-06, |
| "loss": 0.0, |
| "step": 8560 |
| }, |
| { |
| "epoch": 1.9341006544798014, |
| "grad_norm": 0.0019683837890625, |
| "learning_rate": 3.566412444310202e-06, |
| "loss": 0.0, |
| "step": 8570 |
| }, |
| { |
| "epoch": 1.9363574813811781, |
| "grad_norm": 0.0019683837890625, |
| "learning_rate": 3.5588612852072795e-06, |
| "loss": 0.3208, |
| "step": 8580 |
| }, |
| { |
| "epoch": 1.9386143082825549, |
| "grad_norm": 0.00160980224609375, |
| "learning_rate": 3.5513101261043577e-06, |
| "loss": 0.0, |
| "step": 8590 |
| }, |
| { |
| "epoch": 1.9408711351839314, |
| "grad_norm": 0.0021514892578125, |
| "learning_rate": 3.543758967001435e-06, |
| "loss": 0.0, |
| "step": 8600 |
| }, |
| { |
| "epoch": 1.943127962085308, |
| "grad_norm": 0.0024261474609375, |
| "learning_rate": 3.536207807898513e-06, |
| "loss": 0.0, |
| "step": 8610 |
| }, |
| { |
| "epoch": 1.9453847889866847, |
| "grad_norm": 0.00347900390625, |
| "learning_rate": 3.52865664879559e-06, |
| "loss": 0.0, |
| "step": 8620 |
| }, |
| { |
| "epoch": 1.9476416158880614, |
| "grad_norm": 0.002838134765625, |
| "learning_rate": 3.5211054896926683e-06, |
| "loss": 0.0, |
| "step": 8630 |
| }, |
| { |
| "epoch": 1.9498984427894381, |
| "grad_norm": 0.000637054443359375, |
| "learning_rate": 3.5135543305897457e-06, |
| "loss": 0.0343, |
| "step": 8640 |
| }, |
| { |
| "epoch": 1.9521552696908149, |
| "grad_norm": 0.006072998046875, |
| "learning_rate": 3.506003171486824e-06, |
| "loss": 0.0, |
| "step": 8650 |
| }, |
| { |
| "epoch": 1.9544120965921914, |
| "grad_norm": 0.0037994384765625, |
| "learning_rate": 3.498452012383901e-06, |
| "loss": 0.0, |
| "step": 8660 |
| }, |
| { |
| "epoch": 1.956668923493568, |
| "grad_norm": 0.04638671875, |
| "learning_rate": 3.490900853280979e-06, |
| "loss": 0.2184, |
| "step": 8670 |
| }, |
| { |
| "epoch": 1.9589257503949447, |
| "grad_norm": 0.000858306884765625, |
| "learning_rate": 3.4833496941780563e-06, |
| "loss": 0.0, |
| "step": 8680 |
| }, |
| { |
| "epoch": 1.9611825772963214, |
| "grad_norm": 0.0023040771484375, |
| "learning_rate": 3.4757985350751345e-06, |
| "loss": 0.0, |
| "step": 8690 |
| }, |
| { |
| "epoch": 1.9634394041976981, |
| "grad_norm": 0.0012054443359375, |
| "learning_rate": 3.468247375972212e-06, |
| "loss": 0.0, |
| "step": 8700 |
| }, |
| { |
| "epoch": 1.9656962310990747, |
| "grad_norm": 0.0004787445068359375, |
| "learning_rate": 3.46069621686929e-06, |
| "loss": 0.0, |
| "step": 8710 |
| }, |
| { |
| "epoch": 1.9679530580004514, |
| "grad_norm": 0.00020313262939453125, |
| "learning_rate": 3.4531450577663673e-06, |
| "loss": 0.0, |
| "step": 8720 |
| }, |
| { |
| "epoch": 1.970209884901828, |
| "grad_norm": 131.0, |
| "learning_rate": 3.445593898663445e-06, |
| "loss": 0.2685, |
| "step": 8730 |
| }, |
| { |
| "epoch": 1.9724667118032047, |
| "grad_norm": 0.0003910064697265625, |
| "learning_rate": 3.4380427395605224e-06, |
| "loss": 0.0, |
| "step": 8740 |
| }, |
| { |
| "epoch": 1.9747235387045814, |
| "grad_norm": 0.0023345947265625, |
| "learning_rate": 3.4304915804576006e-06, |
| "loss": 0.0, |
| "step": 8750 |
| }, |
| { |
| "epoch": 1.9769803656059581, |
| "grad_norm": 0.00064849853515625, |
| "learning_rate": 3.422940421354678e-06, |
| "loss": 0.2698, |
| "step": 8760 |
| }, |
| { |
| "epoch": 1.9792371925073347, |
| "grad_norm": 0.000713348388671875, |
| "learning_rate": 3.4153892622517557e-06, |
| "loss": 0.2504, |
| "step": 8770 |
| }, |
| { |
| "epoch": 1.9814940194087114, |
| "grad_norm": 0.000518798828125, |
| "learning_rate": 3.407838103148834e-06, |
| "loss": 0.0, |
| "step": 8780 |
| }, |
| { |
| "epoch": 1.983750846310088, |
| "grad_norm": 0.004425048828125, |
| "learning_rate": 3.4002869440459112e-06, |
| "loss": 0.0555, |
| "step": 8790 |
| }, |
| { |
| "epoch": 1.9860076732114647, |
| "grad_norm": 0.000774383544921875, |
| "learning_rate": 3.3927357849429894e-06, |
| "loss": 0.1858, |
| "step": 8800 |
| }, |
| { |
| "epoch": 1.9882645001128414, |
| "grad_norm": 0.0018768310546875, |
| "learning_rate": 3.3851846258400668e-06, |
| "loss": 0.0, |
| "step": 8810 |
| }, |
| { |
| "epoch": 1.9905213270142181, |
| "grad_norm": 0.01324462890625, |
| "learning_rate": 3.3776334667371445e-06, |
| "loss": 0.1943, |
| "step": 8820 |
| }, |
| { |
| "epoch": 1.9927781539155947, |
| "grad_norm": 49.0, |
| "learning_rate": 3.370082307634222e-06, |
| "loss": 0.1281, |
| "step": 8830 |
| }, |
| { |
| "epoch": 1.9950349808169714, |
| "grad_norm": 0.00034332275390625, |
| "learning_rate": 3.3625311485313e-06, |
| "loss": 0.3255, |
| "step": 8840 |
| }, |
| { |
| "epoch": 1.997291807718348, |
| "grad_norm": 0.00057220458984375, |
| "learning_rate": 3.3549799894283774e-06, |
| "loss": 0.0, |
| "step": 8850 |
| }, |
| { |
| "epoch": 1.9995486346197247, |
| "grad_norm": 0.00177001953125, |
| "learning_rate": 3.3474288303254556e-06, |
| "loss": 0.0, |
| "step": 8860 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 13293, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.394507171136717e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|